From 8670c8b20d4de7887dcd3d2cac24908a2a4be71b Mon Sep 17 00:00:00 2001 From: Dani Goland Date: Mon, 22 Aug 2022 02:57:32 -0700 Subject: [PATCH] Modified "xpath" extractor to support XML XPath in addition to HTML XPath (#2471) * Modified "xpath" extractor to support XML XPath in addition to HTML XPath * Updated function docs --- v2/go.mod | 1 + v2/go.sum | 2 ++ v2/pkg/operators/extractors/extract.go | 44 ++++++++++++++++++++++++-- v2/pkg/protocols/http/operators.go | 2 +- v2/pkg/protocols/protocols.go | 2 +- 5 files changed, 46 insertions(+), 5 deletions(-) diff --git a/v2/go.mod b/v2/go.mod index 23c7013c..b7de3cb2 100644 --- a/v2/go.mod +++ b/v2/go.mod @@ -65,6 +65,7 @@ require github.com/projectdiscovery/folderutil v0.0.0-20220215113126-add60a1e8e0 require ( github.com/DataDog/gostackparse v0.5.0 + github.com/antchfx/xmlquery v1.3.12 github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d github.com/docker/go-units v0.4.0 github.com/h2non/filetype v1.1.3 diff --git a/v2/go.sum b/v2/go.sum index ad05b099..f9062f0f 100644 --- a/v2/go.sum +++ b/v2/go.sum @@ -98,6 +98,8 @@ github.com/andygrunwald/go-jira v1.16.0/go.mod h1:UQH4IBVxIYWbgagc0LF/k9FRs9xjIi github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0= github.com/antchfx/htmlquery v1.2.5 h1:1lXnx46/1wtv1E/kzmH8vrfMuUKYgkdDBA9pIdMJnk4= github.com/antchfx/htmlquery v1.2.5/go.mod h1:2MCVBzYVafPBmKbrmwB9F5xdd+IEgRY61ci2oOsOQVw= +github.com/antchfx/xmlquery v1.3.12 h1:6TMGpdjpO/P8VhjnaYPXuqT3qyJ/VsqoyNTmJzNBTQ4= +github.com/antchfx/xmlquery v1.3.12/go.mod h1:3w2RvQvTz+DaT5fSgsELkSJcdNgkmg6vuXDEuhdwsPQ= github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= github.com/antchfx/xpath v1.2.1 h1:qhp4EW6aCOVr5XIkT+l6LJ9ck/JsUH/yyauNgTQkBF8= github.com/antchfx/xpath v1.2.1/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= diff --git a/v2/pkg/operators/extractors/extract.go b/v2/pkg/operators/extractors/extract.go index e8fafc3b..d5c948ef 100644 --- a/v2/pkg/operators/extractors/extract.go +++ b/v2/pkg/operators/extractors/extract.go @@ -3,9 +3,9 @@ package extractors import ( "encoding/json" "fmt" - "strings" - "github.com/antchfx/htmlquery" + "github.com/antchfx/xmlquery" + "strings" "github.com/projectdiscovery/nuclei/v2/pkg/types" ) @@ -59,7 +59,15 @@ func (e *Extractor) ExtractKval(data map[string]interface{}) map[string]struct{} return results } -// ExtractHTML extracts items from text using XPath selectors +// ExtractXPath extracts items from text using XPath selectors +func (e *Extractor) ExtractXPath(corpus string) map[string]struct{} { + if strings.HasPrefix(corpus, "