diff --git a/v2/go.mod b/v2/go.mod index ba657020..580e6596 100644 --- a/v2/go.mod +++ b/v2/go.mod @@ -5,6 +5,7 @@ go 1.15 require ( github.com/Knetic/govaluate v3.0.0+incompatible github.com/andygrunwald/go-jira v1.13.0 + github.com/antchfx/htmlquery v1.2.3 github.com/apex/log v1.9.0 github.com/blang/semver v3.5.1+incompatible github.com/c4milo/unpackit v0.1.0 // indirect @@ -54,7 +55,7 @@ require ( go.uber.org/atomic v1.7.0 go.uber.org/multierr v1.6.0 go.uber.org/ratelimit v0.2.0 - golang.org/x/net v0.0.0-20210521195947-fe42d452be8f + golang.org/x/net v0.0.0-20210614182718-04defd469f4e golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99 golang.org/x/time v0.0.0-20201208040808-7e3f01d25324 // indirect google.golang.org/appengine v1.6.7 // indirect diff --git a/v2/go.sum b/v2/go.sum index eb6e6eb2..6ec5e06f 100644 --- a/v2/go.sum +++ b/v2/go.sum @@ -46,6 +46,10 @@ github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129 h1:MzBOUgng9or github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129/go.mod h1:rFgpPQZYZ8vdbc+48xibu8ALc3yeyd64IhHS+PU6Yyg= github.com/andygrunwald/go-jira v1.13.0 h1:vvIImGgX32bHfoiyUwkNo+/YrPnRczNarvhLOncP6dE= github.com/andygrunwald/go-jira v1.13.0/go.mod h1:jYi4kFDbRPZTJdJOVJO4mpMMIwdB+rcZwSO58DzPd2I= +github.com/antchfx/htmlquery v1.2.3 h1:sP3NFDneHx2stfNXCKbhHFo8XgNjCACnU/4AO5gWz6M= +github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0= +github.com/antchfx/xpath v1.1.6 h1:6sVh6hB5T6phw1pFpHRQ+C4bd8sNI+O58flqtg7h0R0= +github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= github.com/apex/log v1.9.0 h1:FHtw/xuaM8AgmvDDTI9fiwoAL25Sq2cxojnZICUU8l0= github.com/apex/log v1.9.0/go.mod h1:m82fZlWIuiWzWP04XCTXmnX0xRkYYbCdYn8jbJeLBEA= github.com/apex/logs v1.0.0/go.mod h1:XzxuLZ5myVHDy9SAmYpamKKRNApGj54PfYLcFrXqDwo= @@ -106,6 +110,7 @@ github.com/go-rod/rod v0.91.1/go.mod h1:/W4lcZiCALPD603MnJGIvhtywP3R6yRB9EDfFfsH github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= @@ -433,6 +438,7 @@ golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= @@ -442,8 +448,9 @@ golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81R golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20210521195947-fe42d452be8f h1:Si4U+UcgJzya9kpiEUJKQvjr512OLli+gL4poHrz93U= golang.org/x/net v0.0.0-20210521195947-fe42d452be8f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q= +golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20181106182150-f42d05182288/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= diff --git a/v2/pkg/operators/extractors/extract.go b/v2/pkg/operators/extractors/extract.go index 58845a83..913fd828 100644 --- a/v2/pkg/operators/extractors/extract.go +++ b/v2/pkg/operators/extractors/extract.go @@ -1,8 +1,12 @@ package extractors import ( + "strings" + "encoding/json" + "github.com/antchfx/htmlquery" + "github.com/projectdiscovery/nuclei/v2/pkg/types" ) @@ -45,6 +49,35 @@ func (e *Extractor) ExtractKval(data map[string]interface{}) map[string]struct{} return results } +// ExtractHTML extracts items from text using XPath selectors +func (e *Extractor) ExtractHTML(corpus string) map[string]struct{} { + results := make(map[string]struct{}) + + doc, err := htmlquery.Parse(strings.NewReader(corpus)) + if err != nil { + return results + } + for _, k := range e.XPath { + nodes, err := htmlquery.QueryAll(doc, k) + if err != nil { + continue + } + for _, node := range nodes { + var value string + + if e.Attribute != "" { + value = htmlquery.SelectAttr(node, e.Attribute) + } else { + value = htmlquery.InnerText(node) + } + if _, ok := results[value]; !ok { + results[value] = struct{}{} + } + } + } + return results +} + // ExtractJSON extracts text from a corpus using JQ queries and returns it func (e *Extractor) ExtractJSON(corpus string) map[string]struct{} { results := make(map[string]struct{}) diff --git a/v2/pkg/operators/extractors/extractors.go b/v2/pkg/operators/extractors/extractors.go index 2542bb0d..e3a2f051 100644 --- a/v2/pkg/operators/extractors/extractors.go +++ b/v2/pkg/operators/extractors/extractors.go @@ -25,6 +25,10 @@ type Extractor struct { // KVal are the kval to be present in the response headers/cookies KVal []string `yaml:"kval,omitempty"` + // XPath are the Xpath selectors for the extractor + XPath []string `yaml:"xpath"` + // Attribute is an optional attribute to extract from response XPath + Attribute string `yaml:"attribute"` // JSON are the json pattern required to be present in the response JSON []string `yaml:"json"` // jsonCompiled is the compiled variant @@ -46,6 +50,8 @@ const ( RegexExtractor ExtractorType = iota + 1 // KValExtractor extracts responses with key:value KValExtractor + // XPathExtractor extracts responses with Xpath selectors + XPathExtractor // JSONExtractor extracts responses with json JSONExtractor ) @@ -54,6 +60,7 @@ const ( var ExtractorTypes = map[string]ExtractorType{ "regex": RegexExtractor, "kval": KValExtractor, + "xpath": XPathExtractor, "json": JSONExtractor, } diff --git a/v2/pkg/protocols/http/operators.go b/v2/pkg/protocols/http/operators.go index ba6f9a6c..60283b30 100644 --- a/v2/pkg/protocols/http/operators.go +++ b/v2/pkg/protocols/http/operators.go @@ -54,6 +54,8 @@ func (r *Request) Extract(data map[string]interface{}, extractor *extractors.Ext return extractor.ExtractRegex(item) case extractors.KValExtractor: return extractor.ExtractKval(data) + case extractors.XPathExtractor: + return extractor.ExtractHTML(item) case extractors.JSONExtractor: return extractor.ExtractJSON(item) }