Added commoncrawl engine
parent
e8b91d2d9a
commit
b2faabd502
|
@ -25,12 +25,12 @@ We have designed SubFinder to comply with all passive sources licenses, and usag
|
||||||
- Simple and modular code base making it easy to contribute.
|
- Simple and modular code base making it easy to contribute.
|
||||||
- Fast And Powerful Bruteforcing Module
|
- Fast And Powerful Bruteforcing Module
|
||||||
- Powerful Permutation generation engine. (In Development)
|
- Powerful Permutation generation engine. (In Development)
|
||||||
- Many Passive Data Sources (30 At Present)
|
- Many Passive Data Sources (31 At Present)
|
||||||
- Multiple Output formats
|
- Multiple Output formats
|
||||||
- Embeddable Project
|
- Embeddable Project
|
||||||
- Raspberry Pi Support
|
- Raspberry Pi Support
|
||||||
|
|
||||||
> Ask, Archive.is, Baidu, Bing, Censys, CertDB, CertSpotter, CrtSH, DnsDB, DNSDumpster, Dogpile, Entrust CT-Search, Exalead, FindSubdomains, GoogleTER, Hackertarget, IPv4Info, Netcraft, PassiveTotal, PTRArchive, Riddler, SecurityTrails, SiteDossier, Shodan, ThreatCrowd, ThreatMiner, Virustotal, WaybackArchive, Yahoo
|
> Ask, Archive.is, Baidu, Bing, Censys, CertDB, CertSpotter, Commoncrawl, CrtSH, DnsDB, DNSDumpster, Dogpile, Entrust CT-Search, Exalead, FindSubdomains, GoogleTER, Hackertarget, IPv4Info, Netcraft, PassiveTotal, PTRArchive, Riddler, SecurityTrails, SiteDossier, Shodan, ThreatCrowd, ThreatMiner, Virustotal, WaybackArchive, Yahoo
|
||||||
|
|
||||||
***We ensure that we abide by the terms and conditions of all sources that we query. For this reason we don't perform scraping on any site that doesn't allow it.***
|
***We ensure that we abide by the terms and conditions of all sources that we query. For this reason we don't perform scraping on any site that doesn't allow it.***
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,7 @@ import (
|
||||||
"github.com/subfinder/subfinder/libsubfinder/sources/certdb"
|
"github.com/subfinder/subfinder/libsubfinder/sources/certdb"
|
||||||
"github.com/subfinder/subfinder/libsubfinder/sources/certificatetransparency"
|
"github.com/subfinder/subfinder/libsubfinder/sources/certificatetransparency"
|
||||||
"github.com/subfinder/subfinder/libsubfinder/sources/certspotter"
|
"github.com/subfinder/subfinder/libsubfinder/sources/certspotter"
|
||||||
|
"github.com/subfinder/subfinder/libsubfinder/sources/commoncrawl"
|
||||||
"github.com/subfinder/subfinder/libsubfinder/sources/crtsh"
|
"github.com/subfinder/subfinder/libsubfinder/sources/crtsh"
|
||||||
"github.com/subfinder/subfinder/libsubfinder/sources/dnsdb"
|
"github.com/subfinder/subfinder/libsubfinder/sources/dnsdb"
|
||||||
"github.com/subfinder/subfinder/libsubfinder/sources/dnsdumpster"
|
"github.com/subfinder/subfinder/libsubfinder/sources/dnsdumpster"
|
||||||
|
@ -66,6 +67,7 @@ type Source struct {
|
||||||
Bing bool
|
Bing bool
|
||||||
Censys bool
|
Censys bool
|
||||||
Certdb bool
|
Certdb bool
|
||||||
|
Commoncrawl bool
|
||||||
Crtsh bool
|
Crtsh bool
|
||||||
Certspotter bool
|
Certspotter bool
|
||||||
Dnsdb bool
|
Dnsdb bool
|
||||||
|
@ -99,6 +101,7 @@ func (s *Source) enableAll() {
|
||||||
s.Censys = true
|
s.Censys = true
|
||||||
s.Certdb = true
|
s.Certdb = true
|
||||||
s.Certspotter = true
|
s.Certspotter = true
|
||||||
|
s.Commoncrawl = true
|
||||||
s.Crtsh = true
|
s.Crtsh = true
|
||||||
s.Dnsdb = true
|
s.Dnsdb = true
|
||||||
s.Dnsdumpster = true
|
s.Dnsdumpster = true
|
||||||
|
@ -140,6 +143,8 @@ func (s *Source) enable(dataSources []string) {
|
||||||
s.Certdb = true
|
s.Certdb = true
|
||||||
case "certspotter":
|
case "certspotter":
|
||||||
s.Certspotter = true
|
s.Certspotter = true
|
||||||
|
case "commoncrawl":
|
||||||
|
s.Commoncrawl = true
|
||||||
case "crtsh":
|
case "crtsh":
|
||||||
s.Crtsh = true
|
s.Crtsh = true
|
||||||
case "dnsdb":
|
case "dnsdb":
|
||||||
|
@ -205,6 +210,8 @@ func (s *Source) disable(dataSources []string) {
|
||||||
s.Certdb = false
|
s.Certdb = false
|
||||||
case "certspotter":
|
case "certspotter":
|
||||||
s.Certspotter = false
|
s.Certspotter = false
|
||||||
|
case "commoncrawl":
|
||||||
|
s.Commoncrawl = false
|
||||||
case "crtsh":
|
case "crtsh":
|
||||||
s.Crtsh = false
|
s.Crtsh = false
|
||||||
case "dnsdb":
|
case "dnsdb":
|
||||||
|
@ -257,6 +264,7 @@ func (s *Source) disable(dataSources []string) {
|
||||||
s.Censys = false
|
s.Censys = false
|
||||||
s.Certdb = false
|
s.Certdb = false
|
||||||
s.Certspotter = false
|
s.Certspotter = false
|
||||||
|
s.Commoncrawl = false
|
||||||
s.Crtsh = false
|
s.Crtsh = false
|
||||||
s.Dnsdb = false
|
s.Dnsdb = false
|
||||||
s.Dnsdumpster = false
|
s.Dnsdumpster = false
|
||||||
|
@ -309,6 +317,9 @@ func (s *Source) printSummary() {
|
||||||
if s.Certspotter {
|
if s.Certspotter {
|
||||||
fmt.Printf("\nRunning Source: %sCertspotter%s", helper.Info, helper.Reset)
|
fmt.Printf("\nRunning Source: %sCertspotter%s", helper.Info, helper.Reset)
|
||||||
}
|
}
|
||||||
|
if s.Commoncrawl {
|
||||||
|
fmt.Printf("\nRunning Source: %sCommoncrawl%s", helper.Info, helper.Reset)
|
||||||
|
}
|
||||||
if s.Crtsh {
|
if s.Crtsh {
|
||||||
fmt.Printf("\nRunning Source: %sCrt.sh%s", helper.Info, helper.Reset)
|
fmt.Printf("\nRunning Source: %sCrt.sh%s", helper.Info, helper.Reset)
|
||||||
}
|
}
|
||||||
|
@ -534,6 +545,9 @@ func discover(state *helper.State, domain string, sourceConfig *Source) (subdoma
|
||||||
if sourceConfig.Googleter {
|
if sourceConfig.Googleter {
|
||||||
domainDiscoverPool.Add(googleter.Query, domain, state)
|
domainDiscoverPool.Add(googleter.Query, domain, state)
|
||||||
}
|
}
|
||||||
|
if sourceConfig.Commoncrawl {
|
||||||
|
domainDiscoverPool.Add(commoncrawl.Query, domain, state)
|
||||||
|
}
|
||||||
|
|
||||||
domainDiscoverPool.Wait()
|
domainDiscoverPool.Wait()
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,70 @@
|
||||||
|
//
|
||||||
|
// Written By : @ice3man (Nizamul Rana)
|
||||||
|
//
|
||||||
|
// Distributed Under MIT License
|
||||||
|
// Copyrights (C) 2018 Ice3man
|
||||||
|
//
|
||||||
|
|
||||||
|
// Package commoncrawl is a Golang based client for Parsing Subdomains from Commoncrawl
|
||||||
|
package commoncrawl
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
|
||||||
|
"github.com/subfinder/subfinder/libsubfinder/helper"
|
||||||
|
)
|
||||||
|
|
||||||
|
// all subdomains found
|
||||||
|
var subdomains []string
|
||||||
|
|
||||||
|
type commoncrawlObject struct {
|
||||||
|
NameValue string `json:"url"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// array of all results returned
|
||||||
|
var commoncrawlData []string
|
||||||
|
|
||||||
|
// Query function returns all subdomains found using the service.
|
||||||
|
func Query(args ...interface{}) interface{} {
|
||||||
|
|
||||||
|
domain := args[0].(string)
|
||||||
|
state := args[1].(*helper.State)
|
||||||
|
|
||||||
|
// Make a http request to Threatcrowd
|
||||||
|
resp, err := helper.GetHTTPResponse("http://index.commoncrawl.org/CC-MAIN-2018-17-index?url=*."+domain+"&output=json", state.Timeout)
|
||||||
|
if err != nil {
|
||||||
|
if !state.Silent {
|
||||||
|
fmt.Printf("\ncommoncrawl: %v\n", err)
|
||||||
|
}
|
||||||
|
return subdomains
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the response body
|
||||||
|
respBody, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
if !state.Silent {
|
||||||
|
fmt.Printf("\ncommoncrawl: %v\n", err)
|
||||||
|
}
|
||||||
|
return subdomains
|
||||||
|
}
|
||||||
|
|
||||||
|
commoncrawlData := helper.ExtractSubdomains(string(respBody), domain)
|
||||||
|
|
||||||
|
for _, subdomain := range commoncrawlData {
|
||||||
|
if helper.SubdomainExists(subdomain, subdomains) == false {
|
||||||
|
if state.Verbose == true {
|
||||||
|
if state.Color == true {
|
||||||
|
fmt.Printf("\n[%sCommoncrawl%s] %s", helper.Red, helper.Reset, subdomain)
|
||||||
|
} else {
|
||||||
|
fmt.Printf("\n[Commoncrawl] %s", subdomain)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
subdomains = append(subdomains, subdomain)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return subdomains
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue