From b2faabd5028f808815f62c72056fa31e9bbabee1 Mon Sep 17 00:00:00 2001 From: Ice3man543 Date: Sun, 8 Jul 2018 10:22:17 +0530 Subject: [PATCH] Added commoncrawl engine --- README.md | 4 +- libsubfinder/engines/passive/passive.go | 14 ++++ .../sources/commoncrawl/commoncrawl.go | 70 +++++++++++++++++++ 3 files changed, 86 insertions(+), 2 deletions(-) create mode 100644 libsubfinder/sources/commoncrawl/commoncrawl.go diff --git a/README.md b/README.md index d82cd7e..aed7d68 100644 --- a/README.md +++ b/README.md @@ -25,12 +25,12 @@ We have designed SubFinder to comply with all passive sources licenses, and usag - Simple and modular code base making it easy to contribute. - Fast And Powerful Bruteforcing Module - Powerful Permutation generation engine. (In Development) - - Many Passive Data Sources (30 At Present) + - Many Passive Data Sources (31 At Present) - Multiple Output formats - Embeddable Project - Raspberry Pi Support -> Ask, Archive.is, Baidu, Bing, Censys, CertDB, CertSpotter, CrtSH, DnsDB, DNSDumpster, Dogpile, Entrust CT-Search, Exalead, FindSubdomains, GoogleTER, Hackertarget, IPv4Info, Netcraft, PassiveTotal, PTRArchive, Riddler, SecurityTrails, SiteDossier, Shodan, ThreatCrowd, ThreatMiner, Virustotal, WaybackArchive, Yahoo +> Ask, Archive.is, Baidu, Bing, Censys, CertDB, CertSpotter, Commoncrawl, CrtSH, DnsDB, DNSDumpster, Dogpile, Entrust CT-Search, Exalead, FindSubdomains, GoogleTER, Hackertarget, IPv4Info, Netcraft, PassiveTotal, PTRArchive, Riddler, SecurityTrails, SiteDossier, Shodan, ThreatCrowd, ThreatMiner, Virustotal, WaybackArchive, Yahoo ***We ensure that we abide by the terms and conditions of all sources that we query. For this reason we don't perform scraping on any site that doesn't allow it.*** diff --git a/libsubfinder/engines/passive/passive.go b/libsubfinder/engines/passive/passive.go index cd8ee3c..0ae59ba 100644 --- a/libsubfinder/engines/passive/passive.go +++ b/libsubfinder/engines/passive/passive.go @@ -31,6 +31,7 @@ import ( "github.com/subfinder/subfinder/libsubfinder/sources/certdb" "github.com/subfinder/subfinder/libsubfinder/sources/certificatetransparency" "github.com/subfinder/subfinder/libsubfinder/sources/certspotter" + "github.com/subfinder/subfinder/libsubfinder/sources/commoncrawl" "github.com/subfinder/subfinder/libsubfinder/sources/crtsh" "github.com/subfinder/subfinder/libsubfinder/sources/dnsdb" "github.com/subfinder/subfinder/libsubfinder/sources/dnsdumpster" @@ -66,6 +67,7 @@ type Source struct { Bing bool Censys bool Certdb bool + Commoncrawl bool Crtsh bool Certspotter bool Dnsdb bool @@ -99,6 +101,7 @@ func (s *Source) enableAll() { s.Censys = true s.Certdb = true s.Certspotter = true + s.Commoncrawl = true s.Crtsh = true s.Dnsdb = true s.Dnsdumpster = true @@ -140,6 +143,8 @@ func (s *Source) enable(dataSources []string) { s.Certdb = true case "certspotter": s.Certspotter = true + case "commoncrawl": + s.Commoncrawl = true case "crtsh": s.Crtsh = true case "dnsdb": @@ -205,6 +210,8 @@ func (s *Source) disable(dataSources []string) { s.Certdb = false case "certspotter": s.Certspotter = false + case "commoncrawl": + s.Commoncrawl = false case "crtsh": s.Crtsh = false case "dnsdb": @@ -257,6 +264,7 @@ func (s *Source) disable(dataSources []string) { s.Censys = false s.Certdb = false s.Certspotter = false + s.Commoncrawl = false s.Crtsh = false s.Dnsdb = false s.Dnsdumpster = false @@ -309,6 +317,9 @@ func (s *Source) printSummary() { if s.Certspotter { fmt.Printf("\nRunning Source: %sCertspotter%s", helper.Info, helper.Reset) } + if s.Commoncrawl { + fmt.Printf("\nRunning Source: %sCommoncrawl%s", helper.Info, helper.Reset) + } if s.Crtsh { fmt.Printf("\nRunning Source: %sCrt.sh%s", helper.Info, helper.Reset) } @@ -534,6 +545,9 @@ func discover(state *helper.State, domain string, sourceConfig *Source) (subdoma if sourceConfig.Googleter { domainDiscoverPool.Add(googleter.Query, domain, state) } + if sourceConfig.Commoncrawl { + domainDiscoverPool.Add(commoncrawl.Query, domain, state) + } domainDiscoverPool.Wait() diff --git a/libsubfinder/sources/commoncrawl/commoncrawl.go b/libsubfinder/sources/commoncrawl/commoncrawl.go new file mode 100644 index 0000000..576c678 --- /dev/null +++ b/libsubfinder/sources/commoncrawl/commoncrawl.go @@ -0,0 +1,70 @@ +// +// Written By : @ice3man (Nizamul Rana) +// +// Distributed Under MIT License +// Copyrights (C) 2018 Ice3man +// + +// Package commoncrawl is a Golang based client for Parsing Subdomains from Commoncrawl +package commoncrawl + +import ( + "fmt" + "io/ioutil" + + "github.com/subfinder/subfinder/libsubfinder/helper" +) + +// all subdomains found +var subdomains []string + +type commoncrawlObject struct { + NameValue string `json:"url"` +} + +// array of all results returned +var commoncrawlData []string + +// Query function returns all subdomains found using the service. +func Query(args ...interface{}) interface{} { + + domain := args[0].(string) + state := args[1].(*helper.State) + + // Make a http request to Threatcrowd + resp, err := helper.GetHTTPResponse("http://index.commoncrawl.org/CC-MAIN-2018-17-index?url=*."+domain+"&output=json", state.Timeout) + if err != nil { + if !state.Silent { + fmt.Printf("\ncommoncrawl: %v\n", err) + } + return subdomains + } + + // Get the response body + respBody, err := ioutil.ReadAll(resp.Body) + if err != nil { + if !state.Silent { + fmt.Printf("\ncommoncrawl: %v\n", err) + } + return subdomains + } + + commoncrawlData := helper.ExtractSubdomains(string(respBody), domain) + + for _, subdomain := range commoncrawlData { + if helper.SubdomainExists(subdomain, subdomains) == false { + if state.Verbose == true { + if state.Color == true { + fmt.Printf("\n[%sCommoncrawl%s] %s", helper.Red, helper.Reset, subdomain) + } else { + fmt.Printf("\n[Commoncrawl] %s", subdomain) + } + } + + subdomains = append(subdomains, subdomain) + } + } + + return subdomains + +}