Added commoncrawl engine
parent
e8b91d2d9a
commit
b2faabd502
|
@ -25,12 +25,12 @@ We have designed SubFinder to comply with all passive sources licenses, and usag
|
|||
- Simple and modular code base making it easy to contribute.
|
||||
- Fast And Powerful Bruteforcing Module
|
||||
- Powerful Permutation generation engine. (In Development)
|
||||
- Many Passive Data Sources (30 At Present)
|
||||
- Many Passive Data Sources (31 At Present)
|
||||
- Multiple Output formats
|
||||
- Embeddable Project
|
||||
- Raspberry Pi Support
|
||||
|
||||
> Ask, Archive.is, Baidu, Bing, Censys, CertDB, CertSpotter, CrtSH, DnsDB, DNSDumpster, Dogpile, Entrust CT-Search, Exalead, FindSubdomains, GoogleTER, Hackertarget, IPv4Info, Netcraft, PassiveTotal, PTRArchive, Riddler, SecurityTrails, SiteDossier, Shodan, ThreatCrowd, ThreatMiner, Virustotal, WaybackArchive, Yahoo
|
||||
> Ask, Archive.is, Baidu, Bing, Censys, CertDB, CertSpotter, Commoncrawl, CrtSH, DnsDB, DNSDumpster, Dogpile, Entrust CT-Search, Exalead, FindSubdomains, GoogleTER, Hackertarget, IPv4Info, Netcraft, PassiveTotal, PTRArchive, Riddler, SecurityTrails, SiteDossier, Shodan, ThreatCrowd, ThreatMiner, Virustotal, WaybackArchive, Yahoo
|
||||
|
||||
***We ensure that we abide by the terms and conditions of all sources that we query. For this reason we don't perform scraping on any site that doesn't allow it.***
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ import (
|
|||
"github.com/subfinder/subfinder/libsubfinder/sources/certdb"
|
||||
"github.com/subfinder/subfinder/libsubfinder/sources/certificatetransparency"
|
||||
"github.com/subfinder/subfinder/libsubfinder/sources/certspotter"
|
||||
"github.com/subfinder/subfinder/libsubfinder/sources/commoncrawl"
|
||||
"github.com/subfinder/subfinder/libsubfinder/sources/crtsh"
|
||||
"github.com/subfinder/subfinder/libsubfinder/sources/dnsdb"
|
||||
"github.com/subfinder/subfinder/libsubfinder/sources/dnsdumpster"
|
||||
|
@ -66,6 +67,7 @@ type Source struct {
|
|||
Bing bool
|
||||
Censys bool
|
||||
Certdb bool
|
||||
Commoncrawl bool
|
||||
Crtsh bool
|
||||
Certspotter bool
|
||||
Dnsdb bool
|
||||
|
@ -99,6 +101,7 @@ func (s *Source) enableAll() {
|
|||
s.Censys = true
|
||||
s.Certdb = true
|
||||
s.Certspotter = true
|
||||
s.Commoncrawl = true
|
||||
s.Crtsh = true
|
||||
s.Dnsdb = true
|
||||
s.Dnsdumpster = true
|
||||
|
@ -140,6 +143,8 @@ func (s *Source) enable(dataSources []string) {
|
|||
s.Certdb = true
|
||||
case "certspotter":
|
||||
s.Certspotter = true
|
||||
case "commoncrawl":
|
||||
s.Commoncrawl = true
|
||||
case "crtsh":
|
||||
s.Crtsh = true
|
||||
case "dnsdb":
|
||||
|
@ -205,6 +210,8 @@ func (s *Source) disable(dataSources []string) {
|
|||
s.Certdb = false
|
||||
case "certspotter":
|
||||
s.Certspotter = false
|
||||
case "commoncrawl":
|
||||
s.Commoncrawl = false
|
||||
case "crtsh":
|
||||
s.Crtsh = false
|
||||
case "dnsdb":
|
||||
|
@ -257,6 +264,7 @@ func (s *Source) disable(dataSources []string) {
|
|||
s.Censys = false
|
||||
s.Certdb = false
|
||||
s.Certspotter = false
|
||||
s.Commoncrawl = false
|
||||
s.Crtsh = false
|
||||
s.Dnsdb = false
|
||||
s.Dnsdumpster = false
|
||||
|
@ -309,6 +317,9 @@ func (s *Source) printSummary() {
|
|||
if s.Certspotter {
|
||||
fmt.Printf("\nRunning Source: %sCertspotter%s", helper.Info, helper.Reset)
|
||||
}
|
||||
if s.Commoncrawl {
|
||||
fmt.Printf("\nRunning Source: %sCommoncrawl%s", helper.Info, helper.Reset)
|
||||
}
|
||||
if s.Crtsh {
|
||||
fmt.Printf("\nRunning Source: %sCrt.sh%s", helper.Info, helper.Reset)
|
||||
}
|
||||
|
@ -534,6 +545,9 @@ func discover(state *helper.State, domain string, sourceConfig *Source) (subdoma
|
|||
if sourceConfig.Googleter {
|
||||
domainDiscoverPool.Add(googleter.Query, domain, state)
|
||||
}
|
||||
if sourceConfig.Commoncrawl {
|
||||
domainDiscoverPool.Add(commoncrawl.Query, domain, state)
|
||||
}
|
||||
|
||||
domainDiscoverPool.Wait()
|
||||
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
//
|
||||
// Written By : @ice3man (Nizamul Rana)
|
||||
//
|
||||
// Distributed Under MIT License
|
||||
// Copyrights (C) 2018 Ice3man
|
||||
//
|
||||
|
||||
// Package commoncrawl is a Golang based client for Parsing Subdomains from Commoncrawl
|
||||
package commoncrawl
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
|
||||
"github.com/subfinder/subfinder/libsubfinder/helper"
|
||||
)
|
||||
|
||||
// all subdomains found
|
||||
var subdomains []string
|
||||
|
||||
type commoncrawlObject struct {
|
||||
NameValue string `json:"url"`
|
||||
}
|
||||
|
||||
// array of all results returned
|
||||
var commoncrawlData []string
|
||||
|
||||
// Query function returns all subdomains found using the service.
|
||||
func Query(args ...interface{}) interface{} {
|
||||
|
||||
domain := args[0].(string)
|
||||
state := args[1].(*helper.State)
|
||||
|
||||
// Make a http request to Threatcrowd
|
||||
resp, err := helper.GetHTTPResponse("http://index.commoncrawl.org/CC-MAIN-2018-17-index?url=*."+domain+"&output=json", state.Timeout)
|
||||
if err != nil {
|
||||
if !state.Silent {
|
||||
fmt.Printf("\ncommoncrawl: %v\n", err)
|
||||
}
|
||||
return subdomains
|
||||
}
|
||||
|
||||
// Get the response body
|
||||
respBody, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
if !state.Silent {
|
||||
fmt.Printf("\ncommoncrawl: %v\n", err)
|
||||
}
|
||||
return subdomains
|
||||
}
|
||||
|
||||
commoncrawlData := helper.ExtractSubdomains(string(respBody), domain)
|
||||
|
||||
for _, subdomain := range commoncrawlData {
|
||||
if helper.SubdomainExists(subdomain, subdomains) == false {
|
||||
if state.Verbose == true {
|
||||
if state.Color == true {
|
||||
fmt.Printf("\n[%sCommoncrawl%s] %s", helper.Red, helper.Reset, subdomain)
|
||||
} else {
|
||||
fmt.Printf("\n[Commoncrawl] %s", subdomain)
|
||||
}
|
||||
}
|
||||
|
||||
subdomains = append(subdomains, subdomain)
|
||||
}
|
||||
}
|
||||
|
||||
return subdomains
|
||||
|
||||
}
|
Loading…
Reference in New Issue