mirror of https://github.com/daffainfo/nuclei.git
Modified "xpath" extractor to support XML XPath in addition to HTML XPath (#2471)
* Modified "xpath" extractor to support XML XPath in addition to HTML XPath * Updated function docsdev
parent
419924188b
commit
8670c8b20d
|
@ -65,6 +65,7 @@ require github.com/projectdiscovery/folderutil v0.0.0-20220215113126-add60a1e8e0
|
|||
|
||||
require (
|
||||
github.com/DataDog/gostackparse v0.5.0
|
||||
github.com/antchfx/xmlquery v1.3.12
|
||||
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d
|
||||
github.com/docker/go-units v0.4.0
|
||||
github.com/h2non/filetype v1.1.3
|
||||
|
|
|
@ -98,6 +98,8 @@ github.com/andygrunwald/go-jira v1.16.0/go.mod h1:UQH4IBVxIYWbgagc0LF/k9FRs9xjIi
|
|||
github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0=
|
||||
github.com/antchfx/htmlquery v1.2.5 h1:1lXnx46/1wtv1E/kzmH8vrfMuUKYgkdDBA9pIdMJnk4=
|
||||
github.com/antchfx/htmlquery v1.2.5/go.mod h1:2MCVBzYVafPBmKbrmwB9F5xdd+IEgRY61ci2oOsOQVw=
|
||||
github.com/antchfx/xmlquery v1.3.12 h1:6TMGpdjpO/P8VhjnaYPXuqT3qyJ/VsqoyNTmJzNBTQ4=
|
||||
github.com/antchfx/xmlquery v1.3.12/go.mod h1:3w2RvQvTz+DaT5fSgsELkSJcdNgkmg6vuXDEuhdwsPQ=
|
||||
github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
|
||||
github.com/antchfx/xpath v1.2.1 h1:qhp4EW6aCOVr5XIkT+l6LJ9ck/JsUH/yyauNgTQkBF8=
|
||||
github.com/antchfx/xpath v1.2.1/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
|
||||
|
|
|
@ -3,9 +3,9 @@ package extractors
|
|||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/antchfx/htmlquery"
|
||||
"github.com/antchfx/xmlquery"
|
||||
"strings"
|
||||
|
||||
"github.com/projectdiscovery/nuclei/v2/pkg/types"
|
||||
)
|
||||
|
@ -59,7 +59,15 @@ func (e *Extractor) ExtractKval(data map[string]interface{}) map[string]struct{}
|
|||
return results
|
||||
}
|
||||
|
||||
// ExtractHTML extracts items from text using XPath selectors
|
||||
// ExtractXPath extracts items from text using XPath selectors
|
||||
func (e *Extractor) ExtractXPath(corpus string) map[string]struct{} {
|
||||
if strings.HasPrefix(corpus, "<?xml") {
|
||||
return e.ExtractXML(corpus)
|
||||
}
|
||||
return e.ExtractHTML(corpus)
|
||||
}
|
||||
|
||||
// ExtractHTML extracts items from HTML using XPath selectors
|
||||
func (e *Extractor) ExtractHTML(corpus string) map[string]struct{} {
|
||||
results := make(map[string]struct{})
|
||||
|
||||
|
@ -88,6 +96,36 @@ func (e *Extractor) ExtractHTML(corpus string) map[string]struct{} {
|
|||
return results
|
||||
}
|
||||
|
||||
// ExtractXML extracts items from XML using XPath selectors
|
||||
func (e *Extractor) ExtractXML(corpus string) map[string]struct{} {
|
||||
results := make(map[string]struct{})
|
||||
|
||||
doc, err := xmlquery.Parse(strings.NewReader(corpus))
|
||||
if err != nil {
|
||||
return results
|
||||
}
|
||||
|
||||
for _, k := range e.XPath {
|
||||
nodes, err := xmlquery.QueryAll(doc, k)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
for _, node := range nodes {
|
||||
var value string
|
||||
|
||||
if e.Attribute != "" {
|
||||
value = node.SelectAttr(e.Attribute)
|
||||
} else {
|
||||
value = node.InnerText()
|
||||
}
|
||||
if _, ok := results[value]; !ok {
|
||||
results[value] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
// ExtractJSON extracts text from a corpus using JQ queries and returns it
|
||||
func (e *Extractor) ExtractJSON(corpus string) map[string]struct{} {
|
||||
results := make(map[string]struct{})
|
||||
|
|
|
@ -67,7 +67,7 @@ func (request *Request) Extract(data map[string]interface{}, extractor *extracto
|
|||
case extractors.KValExtractor:
|
||||
return extractor.ExtractKval(data)
|
||||
case extractors.XPathExtractor:
|
||||
return extractor.ExtractHTML(item)
|
||||
return extractor.ExtractXPath(item)
|
||||
case extractors.JSONExtractor:
|
||||
return extractor.ExtractJSON(item)
|
||||
case extractors.DSLExtractor:
|
||||
|
|
|
@ -163,7 +163,7 @@ func MakeDefaultExtractFunc(data map[string]interface{}, extractor *extractors.E
|
|||
case extractors.JSONExtractor:
|
||||
return extractor.ExtractJSON(itemStr)
|
||||
case extractors.XPathExtractor:
|
||||
return extractor.ExtractHTML(itemStr)
|
||||
return extractor.ExtractXPath(itemStr)
|
||||
case extractors.DSLExtractor:
|
||||
return extractor.ExtractDSL(data)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue