Added subdomain scraping sources
parent
0d6b09f41d
commit
a045078bb0
|
@ -0,0 +1,55 @@
|
||||||
|
package subscraping
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/tls"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NewSession creates a new session object for a domain
|
||||||
|
func NewSession(domain string, keys Keys, timeout int) (*Session, error) {
|
||||||
|
client := &http.Client{
|
||||||
|
Transport: &http.Transport{
|
||||||
|
MaxIdleConns: 100,
|
||||||
|
MaxIdleConnsPerHost: 100,
|
||||||
|
TLSClientConfig: &tls.Config{
|
||||||
|
InsecureSkipVerify: true,
|
||||||
|
},
|
||||||
|
TLSHandshakeTimeout: 10 * time.Second,
|
||||||
|
ResponseHeaderTimeout: 10 * time.Second,
|
||||||
|
ExpectContinueTimeout: 1 * time.Second,
|
||||||
|
},
|
||||||
|
Timeout: time.Duration(timeout) * time.Second,
|
||||||
|
}
|
||||||
|
|
||||||
|
session := &Session{
|
||||||
|
Client: client,
|
||||||
|
Keys: keys,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new extractor object for the current domain
|
||||||
|
extractor, err := NewSubdomainExtractor(domain)
|
||||||
|
session.Extractor = extractor
|
||||||
|
|
||||||
|
return session, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// NormalGet makes a normal GET request to a URL
|
||||||
|
func (s *Session) NormalGet(url string) (*http.Response, error) {
|
||||||
|
req, err := http.NewRequest("GET", url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't randomize user agents, as they cause issues sometimes
|
||||||
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36")
|
||||||
|
req.Header.Set("Accept", "*/*")
|
||||||
|
req.Header.Set("Accept-Language", "en")
|
||||||
|
|
||||||
|
resp, err := s.Client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp, nil
|
||||||
|
}
|
|
@ -0,0 +1,79 @@
|
||||||
|
// Package archiveis is a Archiveis Scraping Engine in Golang
|
||||||
|
package archiveis
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"io/ioutil"
|
||||||
|
"regexp"
|
||||||
|
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ArchiveIs is a struct for archiveurlsagent
|
||||||
|
type ArchiveIs struct {
|
||||||
|
Results chan subscraping.Result
|
||||||
|
Session *subscraping.Session
|
||||||
|
}
|
||||||
|
|
||||||
|
var reNext = regexp.MustCompile("<a id=\"next\" style=\".*\" href=\"(.*)\">→</a>")
|
||||||
|
|
||||||
|
func (a *ArchiveIs) enumerate(ctx context.Context, baseURL string) {
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
close(a.Results)
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
resp, err := a.Session.NormalGet(baseURL)
|
||||||
|
if err != nil {
|
||||||
|
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Error, Error: err}
|
||||||
|
close(a.Results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the response body
|
||||||
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Error, Error: err}
|
||||||
|
close(a.Results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
src := string(body)
|
||||||
|
|
||||||
|
for _, subdomain := range a.Session.Extractor.FindAllString(src, -1) {
|
||||||
|
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Subdomain, Value: subdomain}
|
||||||
|
}
|
||||||
|
|
||||||
|
match1 := reNext.FindStringSubmatch(src)
|
||||||
|
if len(match1) > 0 {
|
||||||
|
a.enumerate(ctx, match1[1])
|
||||||
|
}
|
||||||
|
close(a.Results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
aInstance := ArchiveIs{
|
||||||
|
Session: session,
|
||||||
|
Results: results,
|
||||||
|
}
|
||||||
|
|
||||||
|
go aInstance.enumerate(ctx, "http://archive.is/*."+domain)
|
||||||
|
|
||||||
|
return aInstance.Results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "archiveis"
|
||||||
|
}
|
|
@ -0,0 +1,106 @@
|
||||||
|
package binaryedge
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
jsoniter "github.com/json-iterator/go"
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
type binaryedgeResponse struct {
|
||||||
|
Subdomains []string `json:"events"`
|
||||||
|
Total int `json:"total"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
if session.Keys.Binaryedge == "" {
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := session.Client.Get(fmt.Sprintf("https://api.binaryedge.io/v2/query/domains/subdomain/%s", domain), "", map[string]string{"X-Key": session.Keys.Binaryedge})
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
response := new(binaryedgeResponse)
|
||||||
|
err = jsoniter.NewDecoder(resp.Body).Decode(&response)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
for _, subdomain := range response.Subdomains {
|
||||||
|
subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.")
|
||||||
|
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain}
|
||||||
|
}
|
||||||
|
|
||||||
|
remaining := response.Total - 100
|
||||||
|
currentPage := 2
|
||||||
|
|
||||||
|
for {
|
||||||
|
further := s.getSubdomains(ctx, domain, &remaining, ¤tPage, session, results)
|
||||||
|
if !further {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "binaryedge"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Source) getSubdomains(ctx context.Context, domain string, remaining, currentPage *int, session *subscraping.Session, results chan subscraping.Result) bool {
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return false
|
||||||
|
default:
|
||||||
|
resp, err := session.Client.Get(fmt.Sprintf("https://api.binaryedge.io/v2/query/domains/subdomain/%s?page=%d", domain, *currentPage), "", map[string]string{"X-Key": session.Keys.Binaryedge})
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
response := binaryedgeResponse{}
|
||||||
|
err = jsoniter.NewDecoder(resp.Body).Decode(&response)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
for _, subdomain := range response.Subdomains {
|
||||||
|
subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.")
|
||||||
|
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain}
|
||||||
|
}
|
||||||
|
|
||||||
|
*remaining = *remaining - 100
|
||||||
|
if *remaining <= 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
*currentPage++
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,57 @@
|
||||||
|
// Package bufferover is a bufferover Scraping Engine in Golang
|
||||||
|
package bufferover
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
// Run enumeration on subdomain dataset for historical SONAR datasets
|
||||||
|
s.getData(fmt.Sprintf("https://dns.bufferover.run/dns?q=.%s", domain), session, results)
|
||||||
|
s.getData(fmt.Sprintf("https://tls.bufferover.run/dns?q=.%s", domain), session, results)
|
||||||
|
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Source) getData(URL string, session *subscraping.Session, results chan subscraping.Result) {
|
||||||
|
resp, err := session.NormalGet(URL)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
src := string(body)
|
||||||
|
|
||||||
|
for _, subdomain := range session.Extractor.FindAllString(src, -1) {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "bufferover"
|
||||||
|
}
|
|
@ -0,0 +1,97 @@
|
||||||
|
package certspotter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
jsoniter "github.com/json-iterator/go"
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
type certspotterObject struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
DNSNames []string `json:"dns_names"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
if session.Keys.Certspotter == "" {
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := session.Client.Get(fmt.Sprintf("https://api.certspotter.com/v1/issuances?domain=%s&include_subdomains=true&expand=dns_names", domain), "", map[string]string{"Authorization": "Bearer " + session.Keys.Certspotter})
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
response := []certspotterObject{}
|
||||||
|
err = jsoniter.NewDecoder(resp.Body).Decode(&response)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
for _, cert := range response {
|
||||||
|
for _, subdomain := range cert.DNSNames {
|
||||||
|
subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.")
|
||||||
|
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
id := response[len(response)-1].ID
|
||||||
|
for {
|
||||||
|
reqURL := fmt.Sprintf("https://api.certspotter.com/v1/issuances?domain=%s&include_subdomains=true&expand=dns_names&after=%s", domain, id)
|
||||||
|
|
||||||
|
resp, err := session.Client.Get(reqURL, "", map[string]string{"Authorization": "Bearer " + session.Keys.Certspotter})
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
response := []certspotterObject{}
|
||||||
|
err = jsoniter.NewDecoder(resp.Body).Decode(&response)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
if len(response) == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, cert := range response {
|
||||||
|
for _, subdomain := range cert.DNSNames {
|
||||||
|
subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.")
|
||||||
|
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
id = response[len(response)-1].ID
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "certspotter"
|
||||||
|
}
|
|
@ -0,0 +1,51 @@
|
||||||
|
package certspotterold
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
resp, err := session.NormalGet(fmt.Sprintf("https://certspotter.com/api/v0/certs?domain=%s", domain))
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
src := string(body)
|
||||||
|
|
||||||
|
for _, subdomain := range session.Extractor.FindAllString(src, -1) {
|
||||||
|
subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.")
|
||||||
|
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain}
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "certspotterold"
|
||||||
|
}
|
|
@ -0,0 +1,105 @@
|
||||||
|
package commoncrawl
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
jsoniter "github.com/json-iterator/go"
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
const indexURL = "https://index.commoncrawl.org/collinfo.json"
|
||||||
|
|
||||||
|
type indexResponse struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
APIURL string `json:"cdx-api"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
var years = [...]string{"2019", "2018", "2017", "2016"}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
resp, err := session.NormalGet(indexURL)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
indexes := []indexResponse{}
|
||||||
|
err = jsoniter.NewDecoder(resp.Body).Decode(&indexes)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
searchIndexes := make(map[string]string)
|
||||||
|
for _, year := range years {
|
||||||
|
for _, index := range indexes {
|
||||||
|
if strings.Contains(index.ID, year) {
|
||||||
|
if _, ok := searchIndexes[year]; !ok {
|
||||||
|
searchIndexes[year] = index.APIURL
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, url := range searchIndexes {
|
||||||
|
further := s.getSubdomains(ctx, url, domain, session, results)
|
||||||
|
if !further {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "commoncrawl"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Source) getSubdomains(ctx context.Context, url string, domain string, session *subscraping.Session, results chan subscraping.Result) bool {
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return false
|
||||||
|
default:
|
||||||
|
resp, err := session.NormalGet(fmt.Sprintf("%s?url=*.%s&output=json", url, domain))
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
src := string(body)
|
||||||
|
|
||||||
|
for _, subdomain := range session.Extractor.FindAllString(src, -1) {
|
||||||
|
subdomain = strings.TrimPrefix(subdomain, "25")
|
||||||
|
subdomain = strings.TrimPrefix(subdomain, "2F")
|
||||||
|
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,51 @@
|
||||||
|
package crtsh
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
resp, err := session.NormalGet(fmt.Sprintf("https://crt.sh/?q=%%25.%s&output=json", domain))
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
src := string(body)
|
||||||
|
|
||||||
|
for _, subdomain := range session.Extractor.FindAllString(src, -1) {
|
||||||
|
subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.")
|
||||||
|
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain}
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "crtsh"
|
||||||
|
}
|
|
@ -0,0 +1,63 @@
|
||||||
|
package digicert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
jsoniter "github.com/json-iterator/go"
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
type responseData struct {
|
||||||
|
Data struct {
|
||||||
|
CertificateDetail []struct {
|
||||||
|
CommonName string `json:"string"`
|
||||||
|
SubjectAltNames []string `json:"subjectAlternativeNames"`
|
||||||
|
} `json:"certificateDetail"`
|
||||||
|
} `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
resp, err := session.Client.Get(fmt.Sprintf("https://ssltools.digicert.com/chainTester/webservice/ctsearch/search?keyword=%s", domain), "", nil)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
response := responseData{}
|
||||||
|
err = jsoniter.NewDecoder(resp.Body).Decode(&response)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, cert := range response.Data.CertificateDetail {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: strings.TrimPrefix(strings.ToLower(cert.CommonName), "*.")}
|
||||||
|
|
||||||
|
for _, subdomain := range cert.SubjectAltNames {
|
||||||
|
subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.")
|
||||||
|
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "digicert"
|
||||||
|
}
|
|
@ -0,0 +1,27 @@
|
||||||
|
package digicert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestDigicert(t *testing.T) {
|
||||||
|
agent, _ := subscraping.New("", 30)
|
||||||
|
session, err := agent.NewSession("freelancer.com", subscraping.Keys{})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Invalid subdomain found: %s\n", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
source := Source{}
|
||||||
|
for resp := range source.Run(context.Background(), "freelancer.com", session) {
|
||||||
|
if resp.Type == subscraping.Error {
|
||||||
|
t.Fatalf("Source %s errored out: %s\n", source.Name(), resp.Error)
|
||||||
|
}
|
||||||
|
if !strings.HasSuffix(resp.Value, "freelancer.com") {
|
||||||
|
t.Fatalf("Invalid, expected for %s got %s\n", "freelancer.com", resp.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,112 @@
|
||||||
|
package dnsdumpster
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"io/ioutil"
|
||||||
|
"net"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
var re = regexp.MustCompile("<input type=\"hidden\" name=\"csrfmiddlewaretoken\" value=\"(.*)\">")
|
||||||
|
|
||||||
|
// getCSRFToken gets the CSRF Token from the page
|
||||||
|
func getCSRFToken(page string) string {
|
||||||
|
if subs := re.FindStringSubmatch(page); len(subs) == 2 {
|
||||||
|
return strings.TrimSpace(subs[1])
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// postForm posts a form for a domain and returns the response
|
||||||
|
func postForm(token, domain string) (string, error) {
|
||||||
|
dial := net.Dialer{}
|
||||||
|
client := &http.Client{
|
||||||
|
Transport: &http.Transport{
|
||||||
|
DialContext: dial.DialContext,
|
||||||
|
TLSHandshakeTimeout: 10 * time.Second,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
params := url.Values{
|
||||||
|
"csrfmiddlewaretoken": {token},
|
||||||
|
"targetip": {domain},
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequest("POST", "https://dnsdumpster.com/", strings.NewReader(params.Encode()))
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
// The CSRF token needs to be sent as a cookie
|
||||||
|
cookie := &http.Cookie{
|
||||||
|
Name: "csrftoken",
|
||||||
|
Domain: "dnsdumpster.com",
|
||||||
|
Value: token,
|
||||||
|
}
|
||||||
|
req.AddCookie(cookie)
|
||||||
|
|
||||||
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36")
|
||||||
|
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||||
|
req.Header.Set("Referer", "https://dnsdumpster.com")
|
||||||
|
req.Header.Set("X-CSRF-Token", token)
|
||||||
|
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
// Now, grab the entire page
|
||||||
|
in, err := ioutil.ReadAll(resp.Body)
|
||||||
|
resp.Body.Close()
|
||||||
|
return string(in), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
resp, err := session.NormalGet("https://dnsdumpster.com/")
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
csrfToken := getCSRFToken(string(body))
|
||||||
|
|
||||||
|
data, err := postForm(csrfToken, domain)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, subdomain := range session.Extractor.FindAllString(data, -1) {
|
||||||
|
subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.")
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain}
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "dnsdumpster"
|
||||||
|
}
|
|
@ -0,0 +1,52 @@
|
||||||
|
package entrust
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
resp, err := session.NormalGet(fmt.Sprintf("https://ctsearch.entrust.com/api/v1/certificates?fields=issuerCN,subjectO,issuerDN,issuerO,subjectDN,signAlg,san,publicKeyType,publicKeySize,validFrom,validTo,sn,ev,logEntries.logName,subjectCNReversed,cert&domain=%s&includeExpired=true&exactMatch=false&limit=5000", domain))
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
src := string(body)
|
||||||
|
|
||||||
|
for _, subdomain := range session.Extractor.FindAllString(src, -1) {
|
||||||
|
subdomain = strings.TrimPrefix(subdomain, "u003d")
|
||||||
|
subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.")
|
||||||
|
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain}
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "entrust"
|
||||||
|
}
|
|
@ -0,0 +1,100 @@
|
||||||
|
package facebook
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
jsoniter "github.com/json-iterator/go"
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
type authResponse struct {
|
||||||
|
AccessToken string `json:"access_token"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type response struct {
|
||||||
|
Data []struct {
|
||||||
|
Domains []string `json:"domains"`
|
||||||
|
} `json:"data"`
|
||||||
|
|
||||||
|
Paging struct {
|
||||||
|
Next string `json:"next"`
|
||||||
|
} `json:"paging"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
if session.Keys.FacebookAppID == "" || session.Keys.FacebookAppSecret == "" {
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := session.NormalGet(fmt.Sprintf("https://graph.facebook.com/oauth/access_token?client_id=%s&client_secret=%s&grant_type=client_credentials", session.Keys.FacebookAppID, session.Keys.FacebookAppSecret))
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
auth := authResponse{}
|
||||||
|
err = jsoniter.NewDecoder(resp.Body).Decode(&auth)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if auth.AccessToken == "" {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: errors.New("no access token in Facebook API response")}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fetchURL := fmt.Sprintf("https://graph.facebook.com/certificates?fields=domains&access_token=%s&query=*.%s", auth.AccessToken, domain)
|
||||||
|
|
||||||
|
wrapper := new(response)
|
||||||
|
for {
|
||||||
|
resp, err := session.NormalGet(fetchURL)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
err = jsoniter.NewDecoder(resp.Body).Decode(&wrapper)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, data := range wrapper.Data {
|
||||||
|
for _, d := range data.Domains {
|
||||||
|
d := strings.TrimPrefix(strings.ToLower(d), "*.")
|
||||||
|
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: d}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fetchURL = wrapper.Paging.Next
|
||||||
|
if fetchURL == "" {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "facebook"
|
||||||
|
}
|
|
@ -0,0 +1,119 @@
|
||||||
|
package googleter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"io/ioutil"
|
||||||
|
"net/url"
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
metaRegex = regexp.MustCompile(`\[null,"(.*)",null,(.*),(.*)]`)
|
||||||
|
metaRegex2 = regexp.MustCompile(`\["(.*)",".*",null,(.*),(.*)]`)
|
||||||
|
)
|
||||||
|
|
||||||
|
type agent struct {
|
||||||
|
subdomains chan subscraping.Result
|
||||||
|
session *subscraping.Session
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *agent) makeRequest(token string, domain string) (string, error) {
|
||||||
|
requestURI := ""
|
||||||
|
|
||||||
|
if token == "" {
|
||||||
|
requestURI = "https://www.google.com/transparencyreport/api/v3/httpsreport/ct/certsearch?domain=" + url.QueryEscape(domain) + "&include_expired=true&include_subdomains=true"
|
||||||
|
} else {
|
||||||
|
requestURI = "https://www.google.com/transparencyreport/api/v3/httpsreport/ct/certsearch/page?domain=" + url.QueryEscape(domain) + "&include_expired=true&include_subdomains=true&p=" + url.QueryEscape(token)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := a.session.Client.Get(requestURI, "", map[string]string{
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
|
||||||
|
"Referer": "https://transparencyreport.google.com/https/certificates",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
return string(body), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
a := agent{
|
||||||
|
session: session,
|
||||||
|
subdomains: results,
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
respBody, err := a.makeRequest("", domain)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var Token string
|
||||||
|
|
||||||
|
matches := metaRegex.FindStringSubmatch(string(respBody))
|
||||||
|
if len(matches) <= 1 {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, sub := range session.Extractor.FindAllString(respBody, -1) {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: sub}
|
||||||
|
}
|
||||||
|
|
||||||
|
Token = matches[1]
|
||||||
|
MaxPages, _ := strconv.Atoi(matches[3])
|
||||||
|
for i := 1; i <= MaxPages; i++ {
|
||||||
|
further := a.getSubdomains(ctx, &Token, domain, session, s, results)
|
||||||
|
if !further {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "googleter"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *agent) getSubdomains(ctx context.Context, Token *string, domain string, session *subscraping.Session, s *Source, results chan subscraping.Result) bool {
|
||||||
|
respBody, err := a.makeRequest(*Token, domain)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for _, sub := range session.Extractor.FindAllString(respBody, -1) {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: sub}
|
||||||
|
}
|
||||||
|
|
||||||
|
matches := metaRegex2.FindStringSubmatch(respBody)
|
||||||
|
matches2 := metaRegex.FindStringSubmatch(respBody)
|
||||||
|
if len(matches2) > 1 {
|
||||||
|
*Token = matches2[1]
|
||||||
|
}
|
||||||
|
if len(matches) > 1 {
|
||||||
|
*Token = matches[1]
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
package hackertarget
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
resp, err := session.NormalGet(fmt.Sprintf("http://api.hackertarget.com/hostsearch/?q=%s", domain))
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the response body
|
||||||
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
src := string(body)
|
||||||
|
|
||||||
|
for _, match := range session.Extractor.FindAllString(src, -1) {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: match}
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "hackertarget"
|
||||||
|
}
|
|
@ -0,0 +1,167 @@
|
||||||
|
package ipv4info
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"io/ioutil"
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
resp, err := Session.NormalGet("http://ipv4info.com/search/" + domain)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
src := string(body)
|
||||||
|
|
||||||
|
regxTokens := regexp.MustCompile("/ip-address/(.*)/" + domain)
|
||||||
|
matchTokens := regxTokens.FindAllString(src, -1)
|
||||||
|
|
||||||
|
if len(matchTokens) <= 0 {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: errors.New("Could not get ip tokens")}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
token := matchTokens[0]
|
||||||
|
|
||||||
|
resp, err = Session.NormalGet("http://ipv4info.com" + token)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err = ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
src = string(body)
|
||||||
|
|
||||||
|
regxTokens = regexp.MustCompile("/dns/(.*?)/" + domain)
|
||||||
|
matchTokens = regxTokens.FindAllString(src, -1)
|
||||||
|
if len(matchTokens) <= 0 {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: errors.New("Could not get ip tokens")}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
token = matchTokens[0]
|
||||||
|
|
||||||
|
resp, err = Session.NormalGet("http://ipv4info.com" + token)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err = ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
src = string(body)
|
||||||
|
|
||||||
|
regxTokens = regexp.MustCompile("/subdomains/(.*?)/" + domain)
|
||||||
|
matchTokens = regxTokens.FindAllString(src, -1)
|
||||||
|
if len(matchTokens) <= 0 {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: errors.New("Could not get ip tokens")}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
token = matchTokens[0]
|
||||||
|
|
||||||
|
resp, err = Session.NormalGet("http://ipv4info.com" + token)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err = ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
src = string(body)
|
||||||
|
|
||||||
|
for _, match := range session.Extractor.FindAllString(src, -1) {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: match}
|
||||||
|
}
|
||||||
|
nextPage := 1
|
||||||
|
|
||||||
|
for {
|
||||||
|
further := s.getSubdomains(ctx, domain, &nextPage, src, session, results)
|
||||||
|
if !further {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "ipv4info"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Source) getSubdomains(ctx context.Context, domain string, nextPage *int, src string, session *subscraping.Session, results chan subscraping.Result) bool {
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return false
|
||||||
|
default:
|
||||||
|
regxTokens := regexp.MustCompile("/subdomains/.*/page" + strconv.Itoa(*nextPage) + "/" + domain + ".html")
|
||||||
|
matchTokens := regxTokens.FindAllString(src, -1)
|
||||||
|
if len(matchTokens) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
token := matchTokens[0]
|
||||||
|
|
||||||
|
resp, err := Session.NormalGet("http://ipv4info.com" + token)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
src = string(body)
|
||||||
|
for _, match := range session.Extractor.FindAllString(src, -1) {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: match}
|
||||||
|
}
|
||||||
|
*nextPage++
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,71 @@
|
||||||
|
package passivetotal
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
jsoniter "github.com/json-iterator/go"
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
type response struct {
|
||||||
|
Subdomains []string `json:"subdomains"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
if session.Keys.PassiveTotalUsername == "" || session.Keys.PassiveTotalPassword == "" {
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create JSON Get body
|
||||||
|
var request = []byte(`{"query":"` + domain + `"}`)
|
||||||
|
|
||||||
|
req, err := http.NewRequest("GET", "https://api.passivetotal.org/v2/enrichment/subdomains", bytes.NewBuffer(request))
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
req.SetBasicAuth(session.Keys.PassiveTotalUsername, session.Keys.PassiveTotalPassword)
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
|
||||||
|
resp, err := session.Client.Client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
data := response{}
|
||||||
|
err = jsoniter.NewDecoder(resp.Body).Decode(&data)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
for _, subdomain := range data.Subdomains {
|
||||||
|
finalSubdomain := subdomain + "." + domain
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: finalSubdomain}
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "passivetotal"
|
||||||
|
}
|
|
@ -0,0 +1,64 @@
|
||||||
|
package securitytrails
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
jsoniter "github.com/json-iterator/go"
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
type response struct {
|
||||||
|
Subdomains []string `json:"subdomains"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
if session.Keys.Securitytrails == "" {
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := session.Client.Get(fmt.Sprintf("https://api.securitytrails.com/v1/domain/%s/subdomains", domain), "", map[string]string{"APIKEY": session.Keys.Securitytrails})
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
response := response{}
|
||||||
|
err = jsoniter.NewDecoder(resp.Body).Decode(&response)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
for _, subdomain := range response.Subdomains {
|
||||||
|
if strings.HasSuffix(subdomain, ".") {
|
||||||
|
subdomain = subdomain + domain
|
||||||
|
} else {
|
||||||
|
subdomain = subdomain + "." + domain
|
||||||
|
}
|
||||||
|
subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.")
|
||||||
|
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain}
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "securitytrails"
|
||||||
|
}
|
|
@ -0,0 +1,82 @@
|
||||||
|
package sitedossier
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"math/rand"
|
||||||
|
"regexp"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
var reNext = regexp.MustCompile("<a href=\"([A-Za-z0-9\\/.]+)\"><b>")
|
||||||
|
|
||||||
|
type agent struct {
|
||||||
|
results chan subscraping.Result
|
||||||
|
session *subscraping.Session
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *agent) enumerate(ctx context.Context, baseURL string) error {
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return nil
|
||||||
|
default:
|
||||||
|
resp, err := a.Session.NormalGet(baseURL)
|
||||||
|
if err != nil {
|
||||||
|
a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err}
|
||||||
|
close(a.results)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err}
|
||||||
|
close(a.results)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
src := string(body)
|
||||||
|
|
||||||
|
for _, match := range a.session.Extractor.FindAllString(src, -1) {
|
||||||
|
a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Subdomain, Value: match}
|
||||||
|
}
|
||||||
|
|
||||||
|
match1 := reNext.FindStringSubmatch(src)
|
||||||
|
time.Sleep(time.Duration((3 + rand.Intn(5))) * time.Second)
|
||||||
|
|
||||||
|
if len(match1) > 0 {
|
||||||
|
a.enumerate(ctx, "http://www.sitedossier.com"+match1[1])
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
a := agent{
|
||||||
|
session: session,
|
||||||
|
results: results,
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
err := a.enumerate(ctx, fmt.Sprintf("http://www.sitedossier.com/parentdomain/%s", domain))
|
||||||
|
if err == nil {
|
||||||
|
close(a.results)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "sitedossier"
|
||||||
|
}
|
|
@ -0,0 +1,49 @@
|
||||||
|
package threatcrowd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
resp, err := Session.NormalGet(fmt.Sprintf("https://www.threatcrowd.org/searchApi/v2/domain/report/?domain=%s", domain))
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the response body
|
||||||
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
src := string(body)
|
||||||
|
|
||||||
|
for _, match := range session.Extractor.FindAllString(src, -1) {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: match}
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "threatcrowd"
|
||||||
|
}
|
|
@ -0,0 +1,49 @@
|
||||||
|
package threatminer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
resp, err := session.NormalGet(fmt.Sprintf("https://api.threatminer.org/v2/domain.php?q=%s&rt=5", domain))
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the response body
|
||||||
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
src := string(body)
|
||||||
|
|
||||||
|
for _, match := range session.Extractor.FindAllString(src, -1) {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: match}
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "threatminer"
|
||||||
|
}
|
|
@ -0,0 +1,60 @@
|
||||||
|
package urlscan
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
jsoniter "github.com/json-iterator/go"
|
||||||
|
"github.com/m-mizutani/urlscan-go/urlscan"
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
if session.Keys.URLScan == "" {
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
client := urlscan.NewClient(session.Keys.URLScan)
|
||||||
|
task, err := client.Submit(urlscan.SubmitArguments{URL: fmt.Sprintf("https://%s", domain)})
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
err = task.Wait()
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := jsoniter.Marshal(task.Result.Data)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
match := session.Extractor.FindAllString(string(data), -1)
|
||||||
|
for _, m := range match {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: m}
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "urlscan"
|
||||||
|
}
|
|
@ -0,0 +1,58 @@
|
||||||
|
package virustotal
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
jsoniter "github.com/json-iterator/go"
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
type response struct {
|
||||||
|
Subdomains []string `json:"subdomains"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
if session.Keys.Virustotal == "" {
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := Session.NormalGet(fmt.Sprintf("https://www.virustotal.com/vtapi/v2/domain/report?apikey=%s&domain=%s", session.Keys.Virustotal, domain))
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
data := response{}
|
||||||
|
err = jsoniter.NewDecoder(resp.Body).Decode(&data)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
for _, subdomain := range data.Subdomains {
|
||||||
|
subdomain = strings.TrimPrefix(strings.ToLower(subdomain), "*.")
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain}
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "virustotal"
|
||||||
|
}
|
|
@ -0,0 +1,51 @@
|
||||||
|
package waybackarchive
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/subfinder/subfinder/pkg/subscraping"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Source is the passive scraping agent
|
||||||
|
type Source struct{}
|
||||||
|
|
||||||
|
// Run function returns all subdomains found with the service
|
||||||
|
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
|
||||||
|
results := make(chan subscraping.Result)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
pagesResp, err := session.NormalGet(fmt.Sprintf("http://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&fl=original&collapse=urlkey", domain))
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := ioutil.ReadAll(pagesResp.Body)
|
||||||
|
if err != nil {
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
|
||||||
|
close(results)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
pagesResp.Body.Close()
|
||||||
|
|
||||||
|
match := session.Extractor.FindAllString(string(body), -1)
|
||||||
|
for _, subdomain := range match {
|
||||||
|
subdomain = strings.TrimPrefix(subdomain, "25")
|
||||||
|
subdomain = strings.TrimPrefix(subdomain, "2F")
|
||||||
|
|
||||||
|
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain}
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the name of the source
|
||||||
|
func (s *Source) Name() string {
|
||||||
|
return "waybackarchive"
|
||||||
|
}
|
|
@ -0,0 +1,58 @@
|
||||||
|
package subscraping
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"regexp"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Source is an interface inherited by each passive source
|
||||||
|
type Source interface {
|
||||||
|
// Run takes a domain as argument and a session object
|
||||||
|
// which contains the extractor for subdomains, http client
|
||||||
|
// and other stuff.
|
||||||
|
Run(context.Context, string, *Session) <-chan Result
|
||||||
|
// Name returns the name of the source
|
||||||
|
Name() string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Session is the option passed to the source, an option is created
|
||||||
|
// uniquely for eac source.
|
||||||
|
type Session struct {
|
||||||
|
// Extractor is the regex for subdomains created for each domain
|
||||||
|
Extractor *regexp.Regexp
|
||||||
|
// Keys is the API keys for the application
|
||||||
|
Keys Keys
|
||||||
|
// Client is the current http client
|
||||||
|
Client *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keys contains the current API Keys we have in store
|
||||||
|
type Keys struct {
|
||||||
|
Binaryedge string `json:"binaryedge"`
|
||||||
|
Certspotter string `json:"certspotter"`
|
||||||
|
FacebookAppID string `json:"facebook_appid"`
|
||||||
|
FacebookAppSecret string `json:"facebook_appsecret"`
|
||||||
|
PassiveTotalUsername string `json:"passivetotal_username"`
|
||||||
|
PassiveTotalPassword string `json:"passivetotal_password"`
|
||||||
|
Securitytrails string `json:"securitytrails"`
|
||||||
|
URLScan string `json:"urlscan"`
|
||||||
|
Virustotal string `json:"virustotal"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Result is a result structure returned by a source
|
||||||
|
type Result struct {
|
||||||
|
Type ResultType
|
||||||
|
Source string
|
||||||
|
Value string
|
||||||
|
Error error
|
||||||
|
}
|
||||||
|
|
||||||
|
// ResultType is the type of result returned by the source
|
||||||
|
type ResultType int
|
||||||
|
|
||||||
|
// Types of results returned by the source
|
||||||
|
const (
|
||||||
|
Subdomain ResultType = iota
|
||||||
|
Error
|
||||||
|
)
|
|
@ -0,0 +1,30 @@
|
||||||
|
package subscraping
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
var subdomainExtractorMutex = &sync.Mutex{}
|
||||||
|
|
||||||
|
// NewSubdomainExtractor creates a new regular expression to extract
|
||||||
|
// subdomains from text based on the given domain.
|
||||||
|
func NewSubdomainExtractor(domain string) (*regexp.Regexp, error) {
|
||||||
|
subdomainExtractorMutex.Lock()
|
||||||
|
defer subdomainExtractorMutex.Unlock()
|
||||||
|
extractor, err := regexp.Compile(`[a-zA-Z0-9\*_.-]+\.` + domain)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return extractor, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exists check if a key exist in a slice
|
||||||
|
func Exists(values []string, key string) bool {
|
||||||
|
for _, v := range values {
|
||||||
|
if v == key {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
Loading…
Reference in New Issue