From a045078bb03967553411c1be0a63eb5b026195ca Mon Sep 17 00:00:00 2001 From: Ice3man543 Date: Tue, 3 Dec 2019 18:07:11 +0530 Subject: [PATCH] Added subdomain scraping sources --- pkg/subscraping/agent.go | 55 ++++++ .../sources/archiveis/archiveis.go | 79 +++++++++ .../sources/binaryedge/binaryedge.go | 106 +++++++++++ .../sources/bufferover/bufferover.go | 57 ++++++ .../sources/certspotter/certspotter.go | 97 ++++++++++ .../sources/certspotterold/certspotterold.go | 51 ++++++ .../sources/commoncrawl/commoncrawl.go | 105 +++++++++++ pkg/subscraping/sources/crtsh/crtsh.go | 51 ++++++ pkg/subscraping/sources/digicert/digicert.go | 63 +++++++ .../sources/digicert/digicert_test.go | 27 +++ .../sources/dnsdumpster/dnsdumpster.go | 112 ++++++++++++ pkg/subscraping/sources/entrust/entrust.go | 52 ++++++ pkg/subscraping/sources/facebook/facebook.go | 100 +++++++++++ .../sources/googleter/googleter.go | 119 +++++++++++++ .../sources/hackertarget/hackertarget.go | 48 +++++ pkg/subscraping/sources/ipv4info/ipv4info.go | 167 ++++++++++++++++++ .../sources/passivetotal/passivetotal.go | 71 ++++++++ .../sources/securitytrails/securitytrails.go | 64 +++++++ .../sources/sitedossier/sitedossier.go | 82 +++++++++ .../sources/threatcrowd/threatcrowd.go | 49 +++++ .../sources/threatminer/threatminer.go | 49 +++++ pkg/subscraping/sources/urlscan/urlscan.go | 60 +++++++ .../sources/virustotal/virustotal.go | 58 ++++++ .../sources/waybackarchive/waybackarchive.go | 51 ++++++ pkg/subscraping/types.go | 58 ++++++ pkg/subscraping/utils.go | 30 ++++ 26 files changed, 1861 insertions(+) create mode 100755 pkg/subscraping/agent.go create mode 100755 pkg/subscraping/sources/archiveis/archiveis.go create mode 100755 pkg/subscraping/sources/binaryedge/binaryedge.go create mode 100755 pkg/subscraping/sources/bufferover/bufferover.go create mode 100755 pkg/subscraping/sources/certspotter/certspotter.go create mode 100644 pkg/subscraping/sources/certspotterold/certspotterold.go create mode 100755 pkg/subscraping/sources/commoncrawl/commoncrawl.go create mode 100755 pkg/subscraping/sources/crtsh/crtsh.go create mode 100644 pkg/subscraping/sources/digicert/digicert.go create mode 100644 pkg/subscraping/sources/digicert/digicert_test.go create mode 100755 pkg/subscraping/sources/dnsdumpster/dnsdumpster.go create mode 100755 pkg/subscraping/sources/entrust/entrust.go create mode 100755 pkg/subscraping/sources/facebook/facebook.go create mode 100755 pkg/subscraping/sources/googleter/googleter.go create mode 100755 pkg/subscraping/sources/hackertarget/hackertarget.go create mode 100755 pkg/subscraping/sources/ipv4info/ipv4info.go create mode 100755 pkg/subscraping/sources/passivetotal/passivetotal.go create mode 100755 pkg/subscraping/sources/securitytrails/securitytrails.go create mode 100755 pkg/subscraping/sources/sitedossier/sitedossier.go create mode 100755 pkg/subscraping/sources/threatcrowd/threatcrowd.go create mode 100755 pkg/subscraping/sources/threatminer/threatminer.go create mode 100755 pkg/subscraping/sources/urlscan/urlscan.go create mode 100755 pkg/subscraping/sources/virustotal/virustotal.go create mode 100755 pkg/subscraping/sources/waybackarchive/waybackarchive.go create mode 100755 pkg/subscraping/types.go create mode 100755 pkg/subscraping/utils.go diff --git a/pkg/subscraping/agent.go b/pkg/subscraping/agent.go new file mode 100755 index 0000000..aba207c --- /dev/null +++ b/pkg/subscraping/agent.go @@ -0,0 +1,55 @@ +package subscraping + +import ( + "crypto/tls" + "net/http" + "time" +) + +// NewSession creates a new session object for a domain +func NewSession(domain string, keys Keys, timeout int) (*Session, error) { + client := &http.Client{ + Transport: &http.Transport{ + MaxIdleConns: 100, + MaxIdleConnsPerHost: 100, + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: true, + }, + TLSHandshakeTimeout: 10 * time.Second, + ResponseHeaderTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + }, + Timeout: time.Duration(timeout) * time.Second, + } + + session := &Session{ + Client: client, + Keys: keys, + } + + // Create a new extractor object for the current domain + extractor, err := NewSubdomainExtractor(domain) + session.Extractor = extractor + + return session, err +} + +// NormalGet makes a normal GET request to a URL +func (s *Session) NormalGet(url string) (*http.Response, error) { + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, err + } + + // Don't randomize user agents, as they cause issues sometimes + req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36") + req.Header.Set("Accept", "*/*") + req.Header.Set("Accept-Language", "en") + + resp, err := s.Client.Do(req) + if err != nil { + return nil, err + } + + return resp, nil +} diff --git a/pkg/subscraping/sources/archiveis/archiveis.go b/pkg/subscraping/sources/archiveis/archiveis.go new file mode 100755 index 0000000..ebb8cb4 --- /dev/null +++ b/pkg/subscraping/sources/archiveis/archiveis.go @@ -0,0 +1,79 @@ +// Package archiveis is a Archiveis Scraping Engine in Golang +package archiveis + +import ( + "context" + "io/ioutil" + "regexp" + + "github.com/subfinder/subfinder/pkg/subscraping" +) + +// ArchiveIs is a struct for archiveurlsagent +type ArchiveIs struct { + Results chan subscraping.Result + Session *subscraping.Session +} + +var reNext = regexp.MustCompile("") + +func (a *ArchiveIs) enumerate(ctx context.Context, baseURL string) { + for { + select { + case <-ctx.Done(): + close(a.Results) + return + default: + resp, err := a.Session.NormalGet(baseURL) + if err != nil { + a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Error, Error: err} + close(a.Results) + return + } + + // Get the response body + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Error, Error: err} + close(a.Results) + return + } + resp.Body.Close() + + src := string(body) + + for _, subdomain := range a.Session.Extractor.FindAllString(src, -1) { + a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Subdomain, Value: subdomain} + } + + match1 := reNext.FindStringSubmatch(src) + if len(match1) > 0 { + a.enumerate(ctx, match1[1]) + } + close(a.Results) + return + } + } +} + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + aInstance := ArchiveIs{ + Session: session, + Results: results, + } + + go aInstance.enumerate(ctx, "http://archive.is/*."+domain) + + return aInstance.Results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "archiveis" +} diff --git a/pkg/subscraping/sources/binaryedge/binaryedge.go b/pkg/subscraping/sources/binaryedge/binaryedge.go new file mode 100755 index 0000000..c7c35da --- /dev/null +++ b/pkg/subscraping/sources/binaryedge/binaryedge.go @@ -0,0 +1,106 @@ +package binaryedge + +import ( + "context" + "fmt" + "strings" + + jsoniter "github.com/json-iterator/go" + "github.com/subfinder/subfinder/pkg/subscraping" +) + +type binaryedgeResponse struct { + Subdomains []string `json:"events"` + Total int `json:"total"` +} + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + if session.Keys.Binaryedge == "" { + close(results) + return + } + + resp, err := session.Client.Get(fmt.Sprintf("https://api.binaryedge.io/v2/query/domains/subdomain/%s", domain), "", map[string]string{"X-Key": session.Keys.Binaryedge}) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + response := new(binaryedgeResponse) + err = jsoniter.NewDecoder(resp.Body).Decode(&response) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + + for _, subdomain := range response.Subdomains { + subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.") + + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + } + + remaining := response.Total - 100 + currentPage := 2 + + for { + further := s.getSubdomains(ctx, domain, &remaining, ¤tPage, session, results) + if !further { + break + } + } + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "binaryedge" +} + +func (s *Source) getSubdomains(ctx context.Context, domain string, remaining, currentPage *int, session *subscraping.Session, results chan subscraping.Result) bool { + for { + select { + case <-ctx.Done(): + return false + default: + resp, err := session.Client.Get(fmt.Sprintf("https://api.binaryedge.io/v2/query/domains/subdomain/%s?page=%d", domain, *currentPage), "", map[string]string{"X-Key": session.Keys.Binaryedge}) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + return false + } + + response := binaryedgeResponse{} + err = jsoniter.NewDecoder(resp.Body).Decode(&response) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + return false + } + resp.Body.Close() + + for _, subdomain := range response.Subdomains { + subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.") + + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + } + + *remaining = *remaining - 100 + if *remaining <= 0 { + return false + } + *currentPage++ + return true + } + } +} diff --git a/pkg/subscraping/sources/bufferover/bufferover.go b/pkg/subscraping/sources/bufferover/bufferover.go new file mode 100755 index 0000000..70bbe18 --- /dev/null +++ b/pkg/subscraping/sources/bufferover/bufferover.go @@ -0,0 +1,57 @@ +// Package bufferover is a bufferover Scraping Engine in Golang +package bufferover + +import ( + "context" + "fmt" + "io/ioutil" + + "github.com/subfinder/subfinder/pkg/subscraping" +) + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + // Run enumeration on subdomain dataset for historical SONAR datasets + s.getData(fmt.Sprintf("https://dns.bufferover.run/dns?q=.%s", domain), session, results) + s.getData(fmt.Sprintf("https://tls.bufferover.run/dns?q=.%s", domain), session, results) + + close(results) + }() + + return results +} + +func (s *Source) getData(URL string, session *subscraping.Session, results chan subscraping.Result) { + resp, err := session.NormalGet(URL) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + + src := string(body) + + for _, subdomain := range session.Extractor.FindAllString(src, -1) { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + } + return +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "bufferover" +} diff --git a/pkg/subscraping/sources/certspotter/certspotter.go b/pkg/subscraping/sources/certspotter/certspotter.go new file mode 100755 index 0000000..82150d6 --- /dev/null +++ b/pkg/subscraping/sources/certspotter/certspotter.go @@ -0,0 +1,97 @@ +package certspotter + +import ( + "context" + "fmt" + "strings" + + jsoniter "github.com/json-iterator/go" + "github.com/subfinder/subfinder/pkg/subscraping" +) + +type certspotterObject struct { + ID string `json:"id"` + DNSNames []string `json:"dns_names"` +} + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + if session.Keys.Certspotter == "" { + close(results) + return + } + + resp, err := session.Client.Get(fmt.Sprintf("https://api.certspotter.com/v1/issuances?domain=%s&include_subdomains=true&expand=dns_names", domain), "", map[string]string{"Authorization": "Bearer " + session.Keys.Certspotter}) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + response := []certspotterObject{} + err = jsoniter.NewDecoder(resp.Body).Decode(&response) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + + for _, cert := range response { + for _, subdomain := range cert.DNSNames { + subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.") + + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + } + } + + id := response[len(response)-1].ID + for { + reqURL := fmt.Sprintf("https://api.certspotter.com/v1/issuances?domain=%s&include_subdomains=true&expand=dns_names&after=%s", domain, id) + + resp, err := session.Client.Get(reqURL, "", map[string]string{"Authorization": "Bearer " + session.Keys.Certspotter}) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + response := []certspotterObject{} + err = jsoniter.NewDecoder(resp.Body).Decode(&response) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + + if len(response) == 0 { + break + } + + for _, cert := range response { + for _, subdomain := range cert.DNSNames { + subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.") + + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + } + } + + id = response[len(response)-1].ID + } + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "certspotter" +} diff --git a/pkg/subscraping/sources/certspotterold/certspotterold.go b/pkg/subscraping/sources/certspotterold/certspotterold.go new file mode 100644 index 0000000..b6ee5be --- /dev/null +++ b/pkg/subscraping/sources/certspotterold/certspotterold.go @@ -0,0 +1,51 @@ +package certspotterold + +import ( + "context" + "fmt" + "io/ioutil" + "strings" + + "github.com/subfinder/subfinder/pkg/subscraping" +) + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + resp, err := session.NormalGet(fmt.Sprintf("https://certspotter.com/api/v0/certs?domain=%s", domain)) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + + src := string(body) + + for _, subdomain := range session.Extractor.FindAllString(src, -1) { + subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.") + + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + } + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "certspotterold" +} diff --git a/pkg/subscraping/sources/commoncrawl/commoncrawl.go b/pkg/subscraping/sources/commoncrawl/commoncrawl.go new file mode 100755 index 0000000..3d87cc8 --- /dev/null +++ b/pkg/subscraping/sources/commoncrawl/commoncrawl.go @@ -0,0 +1,105 @@ +package commoncrawl + +import ( + "context" + "fmt" + "io/ioutil" + "strings" + + jsoniter "github.com/json-iterator/go" + "github.com/subfinder/subfinder/pkg/subscraping" +) + +const indexURL = "https://index.commoncrawl.org/collinfo.json" + +type indexResponse struct { + ID string `json:"id"` + APIURL string `json:"cdx-api"` +} + +// Source is the passive scraping agent +type Source struct{} + +var years = [...]string{"2019", "2018", "2017", "2016"} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + resp, err := session.NormalGet(indexURL) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + indexes := []indexResponse{} + err = jsoniter.NewDecoder(resp.Body).Decode(&indexes) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + + searchIndexes := make(map[string]string) + for _, year := range years { + for _, index := range indexes { + if strings.Contains(index.ID, year) { + if _, ok := searchIndexes[year]; !ok { + searchIndexes[year] = index.APIURL + break + } + } + } + } + + for _, url := range searchIndexes { + further := s.getSubdomains(ctx, url, domain, session, results) + if !further { + break + } + } + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "commoncrawl" +} + +func (s *Source) getSubdomains(ctx context.Context, url string, domain string, session *subscraping.Session, results chan subscraping.Result) bool { + for { + select { + case <-ctx.Done(): + return false + default: + resp, err := session.NormalGet(fmt.Sprintf("%s?url=*.%s&output=json", url, domain)) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + return false + } + + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + return false + } + resp.Body.Close() + + src := string(body) + + for _, subdomain := range session.Extractor.FindAllString(src, -1) { + subdomain = strings.TrimPrefix(subdomain, "25") + subdomain = strings.TrimPrefix(subdomain, "2F") + + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + } + return true + } + } +} diff --git a/pkg/subscraping/sources/crtsh/crtsh.go b/pkg/subscraping/sources/crtsh/crtsh.go new file mode 100755 index 0000000..24c522c --- /dev/null +++ b/pkg/subscraping/sources/crtsh/crtsh.go @@ -0,0 +1,51 @@ +package crtsh + +import ( + "context" + "fmt" + "io/ioutil" + "strings" + + "github.com/subfinder/subfinder/pkg/subscraping" +) + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + resp, err := session.NormalGet(fmt.Sprintf("https://crt.sh/?q=%%25.%s&output=json", domain)) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + + src := string(body) + + for _, subdomain := range session.Extractor.FindAllString(src, -1) { + subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.") + + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + } + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "crtsh" +} diff --git a/pkg/subscraping/sources/digicert/digicert.go b/pkg/subscraping/sources/digicert/digicert.go new file mode 100644 index 0000000..ca8cab8 --- /dev/null +++ b/pkg/subscraping/sources/digicert/digicert.go @@ -0,0 +1,63 @@ +package digicert + +import ( + "context" + "fmt" + "strings" + + jsoniter "github.com/json-iterator/go" + "github.com/subfinder/subfinder/pkg/subscraping" +) + +// Source is the passive scraping agent +type Source struct{} + +type responseData struct { + Data struct { + CertificateDetail []struct { + CommonName string `json:"string"` + SubjectAltNames []string `json:"subjectAlternativeNames"` + } `json:"certificateDetail"` + } `json:"data"` +} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + resp, err := session.Client.Get(fmt.Sprintf("https://ssltools.digicert.com/chainTester/webservice/ctsearch/search?keyword=%s", domain), "", nil) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + response := responseData{} + err = jsoniter.NewDecoder(resp.Body).Decode(&response) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + for _, cert := range response.Data.CertificateDetail { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: strings.TrimPrefix(strings.ToLower(cert.CommonName), "*.")} + + for _, subdomain := range cert.SubjectAltNames { + subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.") + + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + } + } + + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "digicert" +} diff --git a/pkg/subscraping/sources/digicert/digicert_test.go b/pkg/subscraping/sources/digicert/digicert_test.go new file mode 100644 index 0000000..e2ba8ed --- /dev/null +++ b/pkg/subscraping/sources/digicert/digicert_test.go @@ -0,0 +1,27 @@ +package digicert + +import ( + "context" + "strings" + "testing" + + "github.com/subfinder/subfinder/pkg/subscraping" +) + +func TestDigicert(t *testing.T) { + agent, _ := subscraping.New("", 30) + session, err := agent.NewSession("freelancer.com", subscraping.Keys{}) + if err != nil { + t.Fatalf("Invalid subdomain found: %s\n", err) + } + + source := Source{} + for resp := range source.Run(context.Background(), "freelancer.com", session) { + if resp.Type == subscraping.Error { + t.Fatalf("Source %s errored out: %s\n", source.Name(), resp.Error) + } + if !strings.HasSuffix(resp.Value, "freelancer.com") { + t.Fatalf("Invalid, expected for %s got %s\n", "freelancer.com", resp.Value) + } + } +} diff --git a/pkg/subscraping/sources/dnsdumpster/dnsdumpster.go b/pkg/subscraping/sources/dnsdumpster/dnsdumpster.go new file mode 100755 index 0000000..b9a9298 --- /dev/null +++ b/pkg/subscraping/sources/dnsdumpster/dnsdumpster.go @@ -0,0 +1,112 @@ +package dnsdumpster + +import ( + "context" + "io/ioutil" + "net" + "net/http" + "net/url" + "regexp" + "strings" + "time" + + "github.com/subfinder/subfinder/pkg/subscraping" +) + +var re = regexp.MustCompile("") + +// getCSRFToken gets the CSRF Token from the page +func getCSRFToken(page string) string { + if subs := re.FindStringSubmatch(page); len(subs) == 2 { + return strings.TrimSpace(subs[1]) + } + return "" +} + +// postForm posts a form for a domain and returns the response +func postForm(token, domain string) (string, error) { + dial := net.Dialer{} + client := &http.Client{ + Transport: &http.Transport{ + DialContext: dial.DialContext, + TLSHandshakeTimeout: 10 * time.Second, + }, + } + params := url.Values{ + "csrfmiddlewaretoken": {token}, + "targetip": {domain}, + } + + req, err := http.NewRequest("POST", "https://dnsdumpster.com/", strings.NewReader(params.Encode())) + if err != nil { + return "", err + } + + // The CSRF token needs to be sent as a cookie + cookie := &http.Cookie{ + Name: "csrftoken", + Domain: "dnsdumpster.com", + Value: token, + } + req.AddCookie(cookie) + + req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36") + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + req.Header.Set("Referer", "https://dnsdumpster.com") + req.Header.Set("X-CSRF-Token", token) + + resp, err := client.Do(req) + if err != nil { + return "", err + } + // Now, grab the entire page + in, err := ioutil.ReadAll(resp.Body) + resp.Body.Close() + return string(in), nil +} + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + resp, err := session.NormalGet("https://dnsdumpster.com/") + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + csrfToken := getCSRFToken(string(body)) + + data, err := postForm(csrfToken, domain) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + for _, subdomain := range session.Extractor.FindAllString(data, -1) { + subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.") + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + } + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "dnsdumpster" +} diff --git a/pkg/subscraping/sources/entrust/entrust.go b/pkg/subscraping/sources/entrust/entrust.go new file mode 100755 index 0000000..a2a639f --- /dev/null +++ b/pkg/subscraping/sources/entrust/entrust.go @@ -0,0 +1,52 @@ +package entrust + +import ( + "context" + "fmt" + "io/ioutil" + "strings" + + "github.com/subfinder/subfinder/pkg/subscraping" +) + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + resp, err := session.NormalGet(fmt.Sprintf("https://ctsearch.entrust.com/api/v1/certificates?fields=issuerCN,subjectO,issuerDN,issuerO,subjectDN,signAlg,san,publicKeyType,publicKeySize,validFrom,validTo,sn,ev,logEntries.logName,subjectCNReversed,cert&domain=%s&includeExpired=true&exactMatch=false&limit=5000", domain)) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + + src := string(body) + + for _, subdomain := range session.Extractor.FindAllString(src, -1) { + subdomain = strings.TrimPrefix(subdomain, "u003d") + subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.") + + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + } + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "entrust" +} diff --git a/pkg/subscraping/sources/facebook/facebook.go b/pkg/subscraping/sources/facebook/facebook.go new file mode 100755 index 0000000..8c88d4c --- /dev/null +++ b/pkg/subscraping/sources/facebook/facebook.go @@ -0,0 +1,100 @@ +package facebook + +import ( + "errors" + "fmt" + "strings" + + jsoniter "github.com/json-iterator/go" + "github.com/subfinder/subfinder/pkg/subscraping" +) + +type authResponse struct { + AccessToken string `json:"access_token"` +} + +type response struct { + Data []struct { + Domains []string `json:"domains"` + } `json:"data"` + + Paging struct { + Next string `json:"next"` + } `json:"paging"` +} + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + if session.Keys.FacebookAppID == "" || session.Keys.FacebookAppSecret == "" { + close(results) + return + } + + resp, err := session.NormalGet(fmt.Sprintf("https://graph.facebook.com/oauth/access_token?client_id=%s&client_secret=%s&grant_type=client_credentials", session.Keys.FacebookAppID, session.Keys.FacebookAppSecret)) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + auth := authResponse{} + err = jsoniter.NewDecoder(resp.Body).Decode(&auth) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + if auth.AccessToken == "" { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: errors.New("no access token in Facebook API response")} + close(results) + return + } + + fetchURL := fmt.Sprintf("https://graph.facebook.com/certificates?fields=domains&access_token=%s&query=*.%s", auth.AccessToken, domain) + + wrapper := new(response) + for { + resp, err := session.NormalGet(fetchURL) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + err = jsoniter.NewDecoder(resp.Body).Decode(&wrapper) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + for _, data := range wrapper.Data { + for _, d := range data.Domains { + d := strings.TrimPrefix(strings.ToLower(d), "*.") + + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: d} + } + } + + fetchURL = wrapper.Paging.Next + if fetchURL == "" { + break + } + } + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "facebook" +} diff --git a/pkg/subscraping/sources/googleter/googleter.go b/pkg/subscraping/sources/googleter/googleter.go new file mode 100755 index 0000000..e2ea724 --- /dev/null +++ b/pkg/subscraping/sources/googleter/googleter.go @@ -0,0 +1,119 @@ +package googleter + +import ( + "context" + "io/ioutil" + "net/url" + "regexp" + "strconv" + + "github.com/subfinder/subfinder/pkg/subscraping" +) + +var ( + metaRegex = regexp.MustCompile(`\[null,"(.*)",null,(.*),(.*)]`) + metaRegex2 = regexp.MustCompile(`\["(.*)",".*",null,(.*),(.*)]`) +) + +type agent struct { + subdomains chan subscraping.Result + session *subscraping.Session +} + +func (a *agent) makeRequest(token string, domain string) (string, error) { + requestURI := "" + + if token == "" { + requestURI = "https://www.google.com/transparencyreport/api/v3/httpsreport/ct/certsearch?domain=" + url.QueryEscape(domain) + "&include_expired=true&include_subdomains=true" + } else { + requestURI = "https://www.google.com/transparencyreport/api/v3/httpsreport/ct/certsearch/page?domain=" + url.QueryEscape(domain) + "&include_expired=true&include_subdomains=true&p=" + url.QueryEscape(token) + } + + resp, err := a.session.Client.Get(requestURI, "", map[string]string{ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36", + "Referer": "https://transparencyreport.google.com/https/certificates", + }) + if err != nil { + return "", err + } + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return "", err + } + resp.Body.Close() + + return string(body), nil +} + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + a := agent{ + session: session, + subdomains: results, + } + + go func() { + respBody, err := a.makeRequest("", domain) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + var Token string + + matches := metaRegex.FindStringSubmatch(string(respBody)) + if len(matches) <= 1 { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + for _, sub := range session.Extractor.FindAllString(respBody, -1) { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: sub} + } + + Token = matches[1] + MaxPages, _ := strconv.Atoi(matches[3]) + for i := 1; i <= MaxPages; i++ { + further := a.getSubdomains(ctx, &Token, domain, session, s, results) + if !further { + break + } + } + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "googleter" +} + +func (a *agent) getSubdomains(ctx context.Context, Token *string, domain string, session *subscraping.Session, s *Source, results chan subscraping.Result) bool { + respBody, err := a.makeRequest(*Token, domain) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + return false + } + for _, sub := range session.Extractor.FindAllString(respBody, -1) { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: sub} + } + + matches := metaRegex2.FindStringSubmatch(respBody) + matches2 := metaRegex.FindStringSubmatch(respBody) + if len(matches2) > 1 { + *Token = matches2[1] + } + if len(matches) > 1 { + *Token = matches[1] + } + return true +} diff --git a/pkg/subscraping/sources/hackertarget/hackertarget.go b/pkg/subscraping/sources/hackertarget/hackertarget.go new file mode 100755 index 0000000..9baa92f --- /dev/null +++ b/pkg/subscraping/sources/hackertarget/hackertarget.go @@ -0,0 +1,48 @@ +package hackertarget + +import ( + "context" + "fmt" + "io/ioutil" + + "github.com/subfinder/subfinder/pkg/subscraping" +) + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + resp, err := session.NormalGet(fmt.Sprintf("http://api.hackertarget.com/hostsearch/?q=%s", domain)) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + // Get the response body + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + src := string(body) + + for _, match := range session.Extractor.FindAllString(src, -1) { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: match} + } + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "hackertarget" +} diff --git a/pkg/subscraping/sources/ipv4info/ipv4info.go b/pkg/subscraping/sources/ipv4info/ipv4info.go new file mode 100755 index 0000000..6ac4910 --- /dev/null +++ b/pkg/subscraping/sources/ipv4info/ipv4info.go @@ -0,0 +1,167 @@ +package ipv4info + +import ( + "context" + "errors" + "io/ioutil" + "regexp" + "strconv" + + "github.com/subfinder/subfinder/pkg/subscraping" +) + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + resp, err := Session.NormalGet("http://ipv4info.com/search/" + domain) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + src := string(body) + + regxTokens := regexp.MustCompile("/ip-address/(.*)/" + domain) + matchTokens := regxTokens.FindAllString(src, -1) + + if len(matchTokens) <= 0 { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: errors.New("Could not get ip tokens")} + close(results) + return + } + token := matchTokens[0] + + resp, err = Session.NormalGet("http://ipv4info.com" + token) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + body, err = ioutil.ReadAll(resp.Body) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + src = string(body) + + regxTokens = regexp.MustCompile("/dns/(.*?)/" + domain) + matchTokens = regxTokens.FindAllString(src, -1) + if len(matchTokens) <= 0 { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: errors.New("Could not get ip tokens")} + close(results) + return + } + token = matchTokens[0] + + resp, err = Session.NormalGet("http://ipv4info.com" + token) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + body, err = ioutil.ReadAll(resp.Body) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + src = string(body) + + regxTokens = regexp.MustCompile("/subdomains/(.*?)/" + domain) + matchTokens = regxTokens.FindAllString(src, -1) + if len(matchTokens) <= 0 { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: errors.New("Could not get ip tokens")} + close(results) + return + } + token = matchTokens[0] + + resp, err = Session.NormalGet("http://ipv4info.com" + token) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + body, err = ioutil.ReadAll(resp.Body) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + src = string(body) + + for _, match := range session.Extractor.FindAllString(src, -1) { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: match} + } + nextPage := 1 + + for { + further := s.getSubdomains(ctx, domain, &nextPage, src, session, results) + if !further { + break + } + } + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "ipv4info" +} + +func (s *Source) getSubdomains(ctx context.Context, domain string, nextPage *int, src string, session *subscraping.Session, results chan subscraping.Result) bool { + for { + select { + case <-ctx.Done(): + return false + default: + regxTokens := regexp.MustCompile("/subdomains/.*/page" + strconv.Itoa(*nextPage) + "/" + domain + ".html") + matchTokens := regxTokens.FindAllString(src, -1) + if len(matchTokens) == 0 { + return false + } + token := matchTokens[0] + + resp, err := Session.NormalGet("http://ipv4info.com" + token) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + return false + } + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + return false + } + resp.Body.Close() + src = string(body) + for _, match := range session.Extractor.FindAllString(src, -1) { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: match} + } + *nextPage++ + return true + } + } +} diff --git a/pkg/subscraping/sources/passivetotal/passivetotal.go b/pkg/subscraping/sources/passivetotal/passivetotal.go new file mode 100755 index 0000000..0a0d87d --- /dev/null +++ b/pkg/subscraping/sources/passivetotal/passivetotal.go @@ -0,0 +1,71 @@ +package passivetotal + +import ( + "bytes" + "context" + "net/http" + + jsoniter "github.com/json-iterator/go" + "github.com/subfinder/subfinder/pkg/subscraping" +) + +type response struct { + Subdomains []string `json:"subdomains"` +} + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + if session.Keys.PassiveTotalUsername == "" || session.Keys.PassiveTotalPassword == "" { + close(results) + return + } + + // Create JSON Get body + var request = []byte(`{"query":"` + domain + `"}`) + + req, err := http.NewRequest("GET", "https://api.passivetotal.org/v2/enrichment/subdomains", bytes.NewBuffer(request)) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + req.SetBasicAuth(session.Keys.PassiveTotalUsername, session.Keys.PassiveTotalPassword) + req.Header.Set("Content-Type", "application/json") + + resp, err := session.Client.Client.Do(req) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + data := response{} + err = jsoniter.NewDecoder(resp.Body).Decode(&data) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + + for _, subdomain := range data.Subdomains { + finalSubdomain := subdomain + "." + domain + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: finalSubdomain} + } + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "passivetotal" +} diff --git a/pkg/subscraping/sources/securitytrails/securitytrails.go b/pkg/subscraping/sources/securitytrails/securitytrails.go new file mode 100755 index 0000000..19a3201 --- /dev/null +++ b/pkg/subscraping/sources/securitytrails/securitytrails.go @@ -0,0 +1,64 @@ +package securitytrails + +import ( + "context" + "fmt" + "strings" + + jsoniter "github.com/json-iterator/go" + "github.com/subfinder/subfinder/pkg/subscraping" +) + +type response struct { + Subdomains []string `json:"subdomains"` +} + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + if session.Keys.Securitytrails == "" { + close(results) + return + } + + resp, err := session.Client.Get(fmt.Sprintf("https://api.securitytrails.com/v1/domain/%s/subdomains", domain), "", map[string]string{"APIKEY": session.Keys.Securitytrails}) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + response := response{} + err = jsoniter.NewDecoder(resp.Body).Decode(&response) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + + for _, subdomain := range response.Subdomains { + if strings.HasSuffix(subdomain, ".") { + subdomain = subdomain + domain + } else { + subdomain = subdomain + "." + domain + } + subdomain := strings.TrimPrefix(strings.ToLower(subdomain), "*.") + + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + } + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "securitytrails" +} diff --git a/pkg/subscraping/sources/sitedossier/sitedossier.go b/pkg/subscraping/sources/sitedossier/sitedossier.go new file mode 100755 index 0000000..7b7e980 --- /dev/null +++ b/pkg/subscraping/sources/sitedossier/sitedossier.go @@ -0,0 +1,82 @@ +package sitedossier + +import ( + "context" + "fmt" + "io/ioutil" + "math/rand" + "regexp" + "time" + + "github.com/subfinder/subfinder/pkg/subscraping" +) + +var reNext = regexp.MustCompile("") + +type agent struct { + results chan subscraping.Result + session *subscraping.Session +} + +func (a *agent) enumerate(ctx context.Context, baseURL string) error { + for { + select { + case <-ctx.Done(): + return nil + default: + resp, err := a.Session.NormalGet(baseURL) + if err != nil { + a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err} + close(a.results) + return err + } + + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err} + close(a.results) + return err + } + resp.Body.Close() + src := string(body) + + for _, match := range a.session.Extractor.FindAllString(src, -1) { + a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Subdomain, Value: match} + } + + match1 := reNext.FindStringSubmatch(src) + time.Sleep(time.Duration((3 + rand.Intn(5))) * time.Second) + + if len(match1) > 0 { + a.enumerate(ctx, "http://www.sitedossier.com"+match1[1]) + } + return nil + } + } +} + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + a := agent{ + session: session, + results: results, + } + + go func() { + err := a.enumerate(ctx, fmt.Sprintf("http://www.sitedossier.com/parentdomain/%s", domain)) + if err == nil { + close(a.results) + } + }() + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "sitedossier" +} diff --git a/pkg/subscraping/sources/threatcrowd/threatcrowd.go b/pkg/subscraping/sources/threatcrowd/threatcrowd.go new file mode 100755 index 0000000..7a62ad3 --- /dev/null +++ b/pkg/subscraping/sources/threatcrowd/threatcrowd.go @@ -0,0 +1,49 @@ +package threatcrowd + +import ( + "context" + "fmt" + "io/ioutil" + + "github.com/subfinder/subfinder/pkg/subscraping" +) + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + resp, err := Session.NormalGet(fmt.Sprintf("https://www.threatcrowd.org/searchApi/v2/domain/report/?domain=%s", domain)) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + // Get the response body + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + + src := string(body) + + for _, match := range session.Extractor.FindAllString(src, -1) { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: match} + } + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "threatcrowd" +} diff --git a/pkg/subscraping/sources/threatminer/threatminer.go b/pkg/subscraping/sources/threatminer/threatminer.go new file mode 100755 index 0000000..f7f0a0b --- /dev/null +++ b/pkg/subscraping/sources/threatminer/threatminer.go @@ -0,0 +1,49 @@ +package threatminer + +import ( + "context" + "fmt" + "io/ioutil" + + "github.com/subfinder/subfinder/pkg/subscraping" +) + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + resp, err := session.NormalGet(fmt.Sprintf("https://api.threatminer.org/v2/domain.php?q=%s&rt=5", domain)) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + // Get the response body + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + + src := string(body) + + for _, match := range session.Extractor.FindAllString(src, -1) { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: match} + } + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "threatminer" +} diff --git a/pkg/subscraping/sources/urlscan/urlscan.go b/pkg/subscraping/sources/urlscan/urlscan.go new file mode 100755 index 0000000..5b716b8 --- /dev/null +++ b/pkg/subscraping/sources/urlscan/urlscan.go @@ -0,0 +1,60 @@ +package urlscan + +import ( + "context" + "fmt" + + jsoniter "github.com/json-iterator/go" + "github.com/m-mizutani/urlscan-go/urlscan" + "github.com/subfinder/subfinder/pkg/subscraping" +) + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + if session.Keys.URLScan == "" { + close(results) + return + } + + client := urlscan.NewClient(session.Keys.URLScan) + task, err := client.Submit(urlscan.SubmitArguments{URL: fmt.Sprintf("https://%s", domain)}) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + err = task.Wait() + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + data, err := jsoniter.Marshal(task.Result.Data) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + match := session.Extractor.FindAllString(string(data), -1) + for _, m := range match { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: m} + } + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "urlscan" +} diff --git a/pkg/subscraping/sources/virustotal/virustotal.go b/pkg/subscraping/sources/virustotal/virustotal.go new file mode 100755 index 0000000..189ad60 --- /dev/null +++ b/pkg/subscraping/sources/virustotal/virustotal.go @@ -0,0 +1,58 @@ +package virustotal + +import ( + "context" + "fmt" + "strings" + + jsoniter "github.com/json-iterator/go" + "github.com/subfinder/subfinder/pkg/subscraping" +) + +type response struct { + Subdomains []string `json:"subdomains"` +} + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + if session.Keys.Virustotal == "" { + close(results) + return + } + + resp, err := Session.NormalGet(fmt.Sprintf("https://www.virustotal.com/vtapi/v2/domain/report?apikey=%s&domain=%s", session.Keys.Virustotal, domain)) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + data := response{} + err = jsoniter.NewDecoder(resp.Body).Decode(&data) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + resp.Body.Close() + + for _, subdomain := range data.Subdomains { + subdomain = strings.TrimPrefix(strings.ToLower(subdomain), "*.") + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + } + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "virustotal" +} diff --git a/pkg/subscraping/sources/waybackarchive/waybackarchive.go b/pkg/subscraping/sources/waybackarchive/waybackarchive.go new file mode 100755 index 0000000..84b668c --- /dev/null +++ b/pkg/subscraping/sources/waybackarchive/waybackarchive.go @@ -0,0 +1,51 @@ +package waybackarchive + +import ( + "context" + "fmt" + "io/ioutil" + "strings" + + "github.com/subfinder/subfinder/pkg/subscraping" +) + +// Source is the passive scraping agent +type Source struct{} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + + go func() { + pagesResp, err := session.NormalGet(fmt.Sprintf("http://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&fl=original&collapse=urlkey", domain)) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + + body, err := ioutil.ReadAll(pagesResp.Body) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + close(results) + return + } + pagesResp.Body.Close() + + match := session.Extractor.FindAllString(string(body), -1) + for _, subdomain := range match { + subdomain = strings.TrimPrefix(subdomain, "25") + subdomain = strings.TrimPrefix(subdomain, "2F") + + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + } + close(results) + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "waybackarchive" +} diff --git a/pkg/subscraping/types.go b/pkg/subscraping/types.go new file mode 100755 index 0000000..9608073 --- /dev/null +++ b/pkg/subscraping/types.go @@ -0,0 +1,58 @@ +package subscraping + +import ( + "context" + "net/http" + "regexp" +) + +// Source is an interface inherited by each passive source +type Source interface { + // Run takes a domain as argument and a session object + // which contains the extractor for subdomains, http client + // and other stuff. + Run(context.Context, string, *Session) <-chan Result + // Name returns the name of the source + Name() string +} + +// Session is the option passed to the source, an option is created +// uniquely for eac source. +type Session struct { + // Extractor is the regex for subdomains created for each domain + Extractor *regexp.Regexp + // Keys is the API keys for the application + Keys Keys + // Client is the current http client + Client *http.Client +} + +// Keys contains the current API Keys we have in store +type Keys struct { + Binaryedge string `json:"binaryedge"` + Certspotter string `json:"certspotter"` + FacebookAppID string `json:"facebook_appid"` + FacebookAppSecret string `json:"facebook_appsecret"` + PassiveTotalUsername string `json:"passivetotal_username"` + PassiveTotalPassword string `json:"passivetotal_password"` + Securitytrails string `json:"securitytrails"` + URLScan string `json:"urlscan"` + Virustotal string `json:"virustotal"` +} + +// Result is a result structure returned by a source +type Result struct { + Type ResultType + Source string + Value string + Error error +} + +// ResultType is the type of result returned by the source +type ResultType int + +// Types of results returned by the source +const ( + Subdomain ResultType = iota + Error +) diff --git a/pkg/subscraping/utils.go b/pkg/subscraping/utils.go new file mode 100755 index 0000000..a65d0ea --- /dev/null +++ b/pkg/subscraping/utils.go @@ -0,0 +1,30 @@ +package subscraping + +import ( + "regexp" + "sync" +) + +var subdomainExtractorMutex = &sync.Mutex{} + +// NewSubdomainExtractor creates a new regular expression to extract +// subdomains from text based on the given domain. +func NewSubdomainExtractor(domain string) (*regexp.Regexp, error) { + subdomainExtractorMutex.Lock() + defer subdomainExtractorMutex.Unlock() + extractor, err := regexp.Compile(`[a-zA-Z0-9\*_.-]+\.` + domain) + if err != nil { + return nil, err + } + return extractor, nil +} + +// Exists check if a key exist in a slice +func Exists(values []string, key string) bool { + for _, v := range values { + if v == key { + return true + } + } + return false +}