Merge pull request #900 from projectdiscovery/xpath-extractors

Added xpath extractors for http requests
dev
Sandeep Singh 2021-08-10 02:16:56 +05:30 committed by GitHub
commit af6a720e21
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 52 additions and 2 deletions

View File

@ -5,6 +5,7 @@ go 1.15
require (
github.com/Knetic/govaluate v3.0.0+incompatible
github.com/andygrunwald/go-jira v1.13.0
github.com/antchfx/htmlquery v1.2.3
github.com/apex/log v1.9.0
github.com/blang/semver v3.5.1+incompatible
github.com/c4milo/unpackit v0.1.0 // indirect
@ -54,7 +55,7 @@ require (
go.uber.org/atomic v1.7.0
go.uber.org/multierr v1.6.0
go.uber.org/ratelimit v0.2.0
golang.org/x/net v0.0.0-20210521195947-fe42d452be8f
golang.org/x/net v0.0.0-20210614182718-04defd469f4e
golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99
golang.org/x/time v0.0.0-20201208040808-7e3f01d25324 // indirect
google.golang.org/appengine v1.6.7 // indirect

View File

@ -46,6 +46,10 @@ github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129 h1:MzBOUgng9or
github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129/go.mod h1:rFgpPQZYZ8vdbc+48xibu8ALc3yeyd64IhHS+PU6Yyg=
github.com/andygrunwald/go-jira v1.13.0 h1:vvIImGgX32bHfoiyUwkNo+/YrPnRczNarvhLOncP6dE=
github.com/andygrunwald/go-jira v1.13.0/go.mod h1:jYi4kFDbRPZTJdJOVJO4mpMMIwdB+rcZwSO58DzPd2I=
github.com/antchfx/htmlquery v1.2.3 h1:sP3NFDneHx2stfNXCKbhHFo8XgNjCACnU/4AO5gWz6M=
github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0=
github.com/antchfx/xpath v1.1.6 h1:6sVh6hB5T6phw1pFpHRQ+C4bd8sNI+O58flqtg7h0R0=
github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
github.com/apex/log v1.9.0 h1:FHtw/xuaM8AgmvDDTI9fiwoAL25Sq2cxojnZICUU8l0=
github.com/apex/log v1.9.0/go.mod h1:m82fZlWIuiWzWP04XCTXmnX0xRkYYbCdYn8jbJeLBEA=
github.com/apex/logs v1.0.0/go.mod h1:XzxuLZ5myVHDy9SAmYpamKKRNApGj54PfYLcFrXqDwo=
@ -106,6 +110,7 @@ github.com/go-rod/rod v0.91.1/go.mod h1:/W4lcZiCALPD603MnJGIvhtywP3R6yRB9EDfFfsH
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
@ -433,6 +438,7 @@ golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
@ -442,8 +448,9 @@ golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81R
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210521195947-fe42d452be8f h1:Si4U+UcgJzya9kpiEUJKQvjr512OLli+gL4poHrz93U=
golang.org/x/net v0.0.0-20210521195947-fe42d452be8f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20181106182150-f42d05182288/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=

View File

@ -1,8 +1,12 @@
package extractors
import (
"strings"
"encoding/json"
"github.com/antchfx/htmlquery"
"github.com/projectdiscovery/nuclei/v2/pkg/types"
)
@ -45,6 +49,35 @@ func (e *Extractor) ExtractKval(data map[string]interface{}) map[string]struct{}
return results
}
// ExtractHTML extracts items from text using XPath selectors
func (e *Extractor) ExtractHTML(corpus string) map[string]struct{} {
results := make(map[string]struct{})
doc, err := htmlquery.Parse(strings.NewReader(corpus))
if err != nil {
return results
}
for _, k := range e.XPath {
nodes, err := htmlquery.QueryAll(doc, k)
if err != nil {
continue
}
for _, node := range nodes {
var value string
if e.Attribute != "" {
value = htmlquery.SelectAttr(node, e.Attribute)
} else {
value = htmlquery.InnerText(node)
}
if _, ok := results[value]; !ok {
results[value] = struct{}{}
}
}
}
return results
}
// ExtractJSON extracts text from a corpus using JQ queries and returns it
func (e *Extractor) ExtractJSON(corpus string) map[string]struct{} {
results := make(map[string]struct{})

View File

@ -25,6 +25,10 @@ type Extractor struct {
// KVal are the kval to be present in the response headers/cookies
KVal []string `yaml:"kval,omitempty"`
// XPath are the Xpath selectors for the extractor
XPath []string `yaml:"xpath"`
// Attribute is an optional attribute to extract from response XPath
Attribute string `yaml:"attribute"`
// JSON are the json pattern required to be present in the response
JSON []string `yaml:"json"`
// jsonCompiled is the compiled variant
@ -46,6 +50,8 @@ const (
RegexExtractor ExtractorType = iota + 1
// KValExtractor extracts responses with key:value
KValExtractor
// XPathExtractor extracts responses with Xpath selectors
XPathExtractor
// JSONExtractor extracts responses with json
JSONExtractor
)
@ -54,6 +60,7 @@ const (
var ExtractorTypes = map[string]ExtractorType{
"regex": RegexExtractor,
"kval": KValExtractor,
"xpath": XPathExtractor,
"json": JSONExtractor,
}

View File

@ -54,6 +54,8 @@ func (r *Request) Extract(data map[string]interface{}, extractor *extractors.Ext
return extractor.ExtractRegex(item)
case extractors.KValExtractor:
return extractor.ExtractKval(data)
case extractors.XPathExtractor:
return extractor.ExtractHTML(item)
case extractors.JSONExtractor:
return extractor.ExtractJSON(item)
}