Modified "xpath" extractor to support XML XPath in addition to HTML XPath (#2471)

* Modified "xpath" extractor to support XML XPath in addition to HTML XPath

* Updated function docs
dev
Dani Goland 2022-08-22 02:57:32 -07:00 committed by GitHub
parent 419924188b
commit 8670c8b20d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 46 additions and 5 deletions

View File

@ -65,6 +65,7 @@ require github.com/projectdiscovery/folderutil v0.0.0-20220215113126-add60a1e8e0
require (
github.com/DataDog/gostackparse v0.5.0
github.com/antchfx/xmlquery v1.3.12
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d
github.com/docker/go-units v0.4.0
github.com/h2non/filetype v1.1.3

View File

@ -98,6 +98,8 @@ github.com/andygrunwald/go-jira v1.16.0/go.mod h1:UQH4IBVxIYWbgagc0LF/k9FRs9xjIi
github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0=
github.com/antchfx/htmlquery v1.2.5 h1:1lXnx46/1wtv1E/kzmH8vrfMuUKYgkdDBA9pIdMJnk4=
github.com/antchfx/htmlquery v1.2.5/go.mod h1:2MCVBzYVafPBmKbrmwB9F5xdd+IEgRY61ci2oOsOQVw=
github.com/antchfx/xmlquery v1.3.12 h1:6TMGpdjpO/P8VhjnaYPXuqT3qyJ/VsqoyNTmJzNBTQ4=
github.com/antchfx/xmlquery v1.3.12/go.mod h1:3w2RvQvTz+DaT5fSgsELkSJcdNgkmg6vuXDEuhdwsPQ=
github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
github.com/antchfx/xpath v1.2.1 h1:qhp4EW6aCOVr5XIkT+l6LJ9ck/JsUH/yyauNgTQkBF8=
github.com/antchfx/xpath v1.2.1/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=

View File

@ -3,9 +3,9 @@ package extractors
import (
"encoding/json"
"fmt"
"strings"
"github.com/antchfx/htmlquery"
"github.com/antchfx/xmlquery"
"strings"
"github.com/projectdiscovery/nuclei/v2/pkg/types"
)
@ -59,7 +59,15 @@ func (e *Extractor) ExtractKval(data map[string]interface{}) map[string]struct{}
return results
}
// ExtractHTML extracts items from text using XPath selectors
// ExtractXPath extracts items from text using XPath selectors
func (e *Extractor) ExtractXPath(corpus string) map[string]struct{} {
if strings.HasPrefix(corpus, "<?xml") {
return e.ExtractXML(corpus)
}
return e.ExtractHTML(corpus)
}
// ExtractHTML extracts items from HTML using XPath selectors
func (e *Extractor) ExtractHTML(corpus string) map[string]struct{} {
results := make(map[string]struct{})
@ -88,6 +96,36 @@ func (e *Extractor) ExtractHTML(corpus string) map[string]struct{} {
return results
}
// ExtractXML extracts items from XML using XPath selectors
func (e *Extractor) ExtractXML(corpus string) map[string]struct{} {
results := make(map[string]struct{})
doc, err := xmlquery.Parse(strings.NewReader(corpus))
if err != nil {
return results
}
for _, k := range e.XPath {
nodes, err := xmlquery.QueryAll(doc, k)
if err != nil {
continue
}
for _, node := range nodes {
var value string
if e.Attribute != "" {
value = node.SelectAttr(e.Attribute)
} else {
value = node.InnerText()
}
if _, ok := results[value]; !ok {
results[value] = struct{}{}
}
}
}
return results
}
// ExtractJSON extracts text from a corpus using JQ queries and returns it
func (e *Extractor) ExtractJSON(corpus string) map[string]struct{} {
results := make(map[string]struct{})

View File

@ -67,7 +67,7 @@ func (request *Request) Extract(data map[string]interface{}, extractor *extracto
case extractors.KValExtractor:
return extractor.ExtractKval(data)
case extractors.XPathExtractor:
return extractor.ExtractHTML(item)
return extractor.ExtractXPath(item)
case extractors.JSONExtractor:
return extractor.ExtractJSON(item)
case extractors.DSLExtractor:

View File

@ -163,7 +163,7 @@ func MakeDefaultExtractFunc(data map[string]interface{}, extractor *extractors.E
case extractors.JSONExtractor:
return extractor.ExtractJSON(itemStr)
case extractors.XPathExtractor:
return extractor.ExtractHTML(itemStr)
return extractor.ExtractXPath(itemStr)
case extractors.DSLExtractor:
return extractor.ExtractDSL(data)
}