2020-04-05 18:35:01 +00:00
|
|
|
package extractors
|
|
|
|
|
2021-01-12 05:51:32 +00:00
|
|
|
import (
|
2021-07-31 20:49:23 +00:00
|
|
|
"encoding/json"
|
2021-11-25 15:09:20 +00:00
|
|
|
"strings"
|
2021-08-01 12:42:04 +00:00
|
|
|
|
2021-08-02 16:13:50 +00:00
|
|
|
"github.com/antchfx/htmlquery"
|
2021-08-09 11:04:57 +00:00
|
|
|
|
2021-01-12 05:51:32 +00:00
|
|
|
"github.com/projectdiscovery/nuclei/v2/pkg/types"
|
|
|
|
)
|
2020-07-16 08:32:00 +00:00
|
|
|
|
2020-12-24 15:17:41 +00:00
|
|
|
// ExtractRegex extracts text from a corpus and returns it
|
|
|
|
func (e *Extractor) ExtractRegex(corpus string) map[string]struct{} {
|
2020-04-27 18:04:08 +00:00
|
|
|
results := make(map[string]struct{})
|
2020-08-25 21:24:31 +00:00
|
|
|
|
2020-09-17 06:31:51 +00:00
|
|
|
groupPlusOne := e.RegexGroup + 1
|
2020-04-05 19:14:45 +00:00
|
|
|
for _, regex := range e.regexCompiled {
|
2020-09-17 06:31:51 +00:00
|
|
|
matches := regex.FindAllStringSubmatch(corpus, -1)
|
2020-12-24 06:43:18 +00:00
|
|
|
|
2020-04-27 18:04:08 +00:00
|
|
|
for _, match := range matches {
|
2020-12-24 06:43:18 +00:00
|
|
|
if len(match) < groupPlusOne {
|
|
|
|
continue
|
2020-09-17 06:31:51 +00:00
|
|
|
}
|
2020-12-24 06:43:18 +00:00
|
|
|
matchString := match[e.RegexGroup]
|
2020-08-25 21:24:31 +00:00
|
|
|
|
2020-12-24 06:43:18 +00:00
|
|
|
if _, ok := results[matchString]; !ok {
|
|
|
|
results[matchString] = struct{}{}
|
|
|
|
}
|
2020-07-16 08:32:00 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return results
|
|
|
|
}
|
|
|
|
|
2020-12-24 15:17:41 +00:00
|
|
|
// ExtractKval extracts key value pairs from a data map
|
|
|
|
func (e *Extractor) ExtractKval(data map[string]interface{}) map[string]struct{} {
|
2021-10-29 16:08:18 +00:00
|
|
|
if e.CaseInsensitive {
|
|
|
|
inputData := data
|
|
|
|
data = make(map[string]interface{}, len(inputData))
|
|
|
|
for k, v := range inputData {
|
|
|
|
if s, ok := v.(string); ok {
|
|
|
|
v = strings.ToLower(s)
|
|
|
|
}
|
|
|
|
data[strings.ToLower(k)] = v
|
|
|
|
}
|
|
|
|
}
|
2020-08-25 21:24:31 +00:00
|
|
|
|
2021-10-29 16:08:18 +00:00
|
|
|
results := make(map[string]struct{})
|
2020-07-16 10:58:56 +00:00
|
|
|
for _, k := range e.KVal {
|
2020-12-24 06:43:18 +00:00
|
|
|
item, ok := data[k]
|
|
|
|
if !ok {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
itemString := types.ToString(item)
|
|
|
|
if _, ok := results[itemString]; !ok {
|
|
|
|
results[itemString] = struct{}{}
|
2020-07-16 08:32:00 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return results
|
|
|
|
}
|
2021-08-02 16:13:50 +00:00
|
|
|
|
|
|
|
// ExtractHTML extracts items from text using XPath selectors
|
|
|
|
func (e *Extractor) ExtractHTML(corpus string) map[string]struct{} {
|
|
|
|
results := make(map[string]struct{})
|
|
|
|
|
|
|
|
doc, err := htmlquery.Parse(strings.NewReader(corpus))
|
|
|
|
if err != nil {
|
|
|
|
return results
|
|
|
|
}
|
|
|
|
for _, k := range e.XPath {
|
|
|
|
nodes, err := htmlquery.QueryAll(doc, k)
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
for _, node := range nodes {
|
|
|
|
var value string
|
|
|
|
|
|
|
|
if e.Attribute != "" {
|
|
|
|
value = htmlquery.SelectAttr(node, e.Attribute)
|
|
|
|
} else {
|
|
|
|
value = htmlquery.InnerText(node)
|
|
|
|
}
|
|
|
|
if _, ok := results[value]; !ok {
|
|
|
|
results[value] = struct{}{}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return results
|
|
|
|
}
|
2021-08-09 11:04:57 +00:00
|
|
|
|
2021-08-02 11:19:39 +00:00
|
|
|
// ExtractJSON extracts text from a corpus using JQ queries and returns it
|
2021-08-01 12:42:04 +00:00
|
|
|
func (e *Extractor) ExtractJSON(corpus string) map[string]struct{} {
|
2021-07-31 20:49:23 +00:00
|
|
|
results := make(map[string]struct{})
|
|
|
|
|
|
|
|
var jsonObj interface{}
|
|
|
|
|
2021-08-31 09:55:52 +00:00
|
|
|
if err := json.Unmarshal([]byte(corpus), &jsonObj); err != nil {
|
2021-07-31 20:49:23 +00:00
|
|
|
return results
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, k := range e.jsonCompiled {
|
|
|
|
iter := k.Run(jsonObj)
|
|
|
|
for {
|
|
|
|
v, ok := iter.Next()
|
|
|
|
if !ok {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
if _, ok := v.(error); ok {
|
|
|
|
break
|
|
|
|
}
|
2021-08-02 11:19:39 +00:00
|
|
|
var result string
|
|
|
|
if res, err := types.JSONScalarToString(v); err == nil {
|
|
|
|
result = res
|
|
|
|
} else if res, err := json.Marshal(v); err == nil {
|
|
|
|
result = string(res)
|
|
|
|
} else {
|
|
|
|
result = types.ToString(v)
|
|
|
|
}
|
|
|
|
if _, ok := results[result]; !ok {
|
|
|
|
results[result] = struct{}{}
|
2021-07-31 20:49:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return results
|
|
|
|
}
|