nuclei/v2/pkg/operators/extractors/extract.go

125 lines
2.7 KiB
Go
Raw Normal View History

2020-04-05 18:35:01 +00:00
package extractors
2021-01-12 05:51:32 +00:00
import (
2021-07-31 20:49:23 +00:00
"encoding/json"
2021-11-25 15:09:20 +00:00
"strings"
2021-08-01 12:42:04 +00:00
2021-08-02 16:13:50 +00:00
"github.com/antchfx/htmlquery"
2021-01-12 05:51:32 +00:00
"github.com/projectdiscovery/nuclei/v2/pkg/types"
)
2020-07-16 08:32:00 +00:00
2020-12-24 15:17:41 +00:00
// ExtractRegex extracts text from a corpus and returns it
func (e *Extractor) ExtractRegex(corpus string) map[string]struct{} {
2020-04-27 18:04:08 +00:00
results := make(map[string]struct{})
groupPlusOne := e.RegexGroup + 1
for _, regex := range e.regexCompiled {
matches := regex.FindAllStringSubmatch(corpus, -1)
2020-12-24 06:43:18 +00:00
2020-04-27 18:04:08 +00:00
for _, match := range matches {
2020-12-24 06:43:18 +00:00
if len(match) < groupPlusOne {
continue
}
2020-12-24 06:43:18 +00:00
matchString := match[e.RegexGroup]
2020-12-24 06:43:18 +00:00
if _, ok := results[matchString]; !ok {
results[matchString] = struct{}{}
}
2020-07-16 08:32:00 +00:00
}
}
return results
}
2020-12-24 15:17:41 +00:00
// ExtractKval extracts key value pairs from a data map
func (e *Extractor) ExtractKval(data map[string]interface{}) map[string]struct{} {
if e.CaseInsensitive {
inputData := data
data = make(map[string]interface{}, len(inputData))
for k, v := range inputData {
if s, ok := v.(string); ok {
v = strings.ToLower(s)
}
data[strings.ToLower(k)] = v
}
}
results := make(map[string]struct{})
2020-07-16 10:58:56 +00:00
for _, k := range e.KVal {
2020-12-24 06:43:18 +00:00
item, ok := data[k]
if !ok {
continue
}
itemString := types.ToString(item)
if _, ok := results[itemString]; !ok {
results[itemString] = struct{}{}
2020-07-16 08:32:00 +00:00
}
}
return results
}
2021-08-02 16:13:50 +00:00
// ExtractHTML extracts items from text using XPath selectors
func (e *Extractor) ExtractHTML(corpus string) map[string]struct{} {
results := make(map[string]struct{})
doc, err := htmlquery.Parse(strings.NewReader(corpus))
if err != nil {
return results
}
for _, k := range e.XPath {
nodes, err := htmlquery.QueryAll(doc, k)
if err != nil {
continue
}
for _, node := range nodes {
var value string
if e.Attribute != "" {
value = htmlquery.SelectAttr(node, e.Attribute)
} else {
value = htmlquery.InnerText(node)
}
if _, ok := results[value]; !ok {
results[value] = struct{}{}
}
}
}
return results
}
// ExtractJSON extracts text from a corpus using JQ queries and returns it
2021-08-01 12:42:04 +00:00
func (e *Extractor) ExtractJSON(corpus string) map[string]struct{} {
2021-07-31 20:49:23 +00:00
results := make(map[string]struct{})
var jsonObj interface{}
if err := json.Unmarshal([]byte(corpus), &jsonObj); err != nil {
2021-07-31 20:49:23 +00:00
return results
}
for _, k := range e.jsonCompiled {
iter := k.Run(jsonObj)
for {
v, ok := iter.Next()
if !ok {
break
}
if _, ok := v.(error); ok {
break
}
var result string
if res, err := types.JSONScalarToString(v); err == nil {
result = res
} else if res, err := json.Marshal(v); err == nil {
result = string(res)
} else {
result = types.ToString(v)
}
if _, ok := results[result]; !ok {
results[result] = struct{}{}
2021-07-31 20:49:23 +00:00
}
}
}
return results
}