Added commoncrawl engine

master
Ice3man543 2018-07-08 10:22:17 +05:30
parent e8b91d2d9a
commit b2faabd502
No known key found for this signature in database
GPG Key ID: 0D3F37291AF33149
3 changed files with 86 additions and 2 deletions

View File

@ -25,12 +25,12 @@ We have designed SubFinder to comply with all passive sources licenses, and usag
- Simple and modular code base making it easy to contribute.
- Fast And Powerful Bruteforcing Module
- Powerful Permutation generation engine. (In Development)
- Many Passive Data Sources (30 At Present)
- Many Passive Data Sources (31 At Present)
- Multiple Output formats
- Embeddable Project
- Raspberry Pi Support
> Ask, Archive.is, Baidu, Bing, Censys, CertDB, CertSpotter, CrtSH, DnsDB, DNSDumpster, Dogpile, Entrust CT-Search, Exalead, FindSubdomains, GoogleTER, Hackertarget, IPv4Info, Netcraft, PassiveTotal, PTRArchive, Riddler, SecurityTrails, SiteDossier, Shodan, ThreatCrowd, ThreatMiner, Virustotal, WaybackArchive, Yahoo
> Ask, Archive.is, Baidu, Bing, Censys, CertDB, CertSpotter, Commoncrawl, CrtSH, DnsDB, DNSDumpster, Dogpile, Entrust CT-Search, Exalead, FindSubdomains, GoogleTER, Hackertarget, IPv4Info, Netcraft, PassiveTotal, PTRArchive, Riddler, SecurityTrails, SiteDossier, Shodan, ThreatCrowd, ThreatMiner, Virustotal, WaybackArchive, Yahoo
***We ensure that we abide by the terms and conditions of all sources that we query. For this reason we don't perform scraping on any site that doesn't allow it.***

View File

@ -31,6 +31,7 @@ import (
"github.com/subfinder/subfinder/libsubfinder/sources/certdb"
"github.com/subfinder/subfinder/libsubfinder/sources/certificatetransparency"
"github.com/subfinder/subfinder/libsubfinder/sources/certspotter"
"github.com/subfinder/subfinder/libsubfinder/sources/commoncrawl"
"github.com/subfinder/subfinder/libsubfinder/sources/crtsh"
"github.com/subfinder/subfinder/libsubfinder/sources/dnsdb"
"github.com/subfinder/subfinder/libsubfinder/sources/dnsdumpster"
@ -66,6 +67,7 @@ type Source struct {
Bing bool
Censys bool
Certdb bool
Commoncrawl bool
Crtsh bool
Certspotter bool
Dnsdb bool
@ -99,6 +101,7 @@ func (s *Source) enableAll() {
s.Censys = true
s.Certdb = true
s.Certspotter = true
s.Commoncrawl = true
s.Crtsh = true
s.Dnsdb = true
s.Dnsdumpster = true
@ -140,6 +143,8 @@ func (s *Source) enable(dataSources []string) {
s.Certdb = true
case "certspotter":
s.Certspotter = true
case "commoncrawl":
s.Commoncrawl = true
case "crtsh":
s.Crtsh = true
case "dnsdb":
@ -205,6 +210,8 @@ func (s *Source) disable(dataSources []string) {
s.Certdb = false
case "certspotter":
s.Certspotter = false
case "commoncrawl":
s.Commoncrawl = false
case "crtsh":
s.Crtsh = false
case "dnsdb":
@ -257,6 +264,7 @@ func (s *Source) disable(dataSources []string) {
s.Censys = false
s.Certdb = false
s.Certspotter = false
s.Commoncrawl = false
s.Crtsh = false
s.Dnsdb = false
s.Dnsdumpster = false
@ -309,6 +317,9 @@ func (s *Source) printSummary() {
if s.Certspotter {
fmt.Printf("\nRunning Source: %sCertspotter%s", helper.Info, helper.Reset)
}
if s.Commoncrawl {
fmt.Printf("\nRunning Source: %sCommoncrawl%s", helper.Info, helper.Reset)
}
if s.Crtsh {
fmt.Printf("\nRunning Source: %sCrt.sh%s", helper.Info, helper.Reset)
}
@ -534,6 +545,9 @@ func discover(state *helper.State, domain string, sourceConfig *Source) (subdoma
if sourceConfig.Googleter {
domainDiscoverPool.Add(googleter.Query, domain, state)
}
if sourceConfig.Commoncrawl {
domainDiscoverPool.Add(commoncrawl.Query, domain, state)
}
domainDiscoverPool.Wait()

View File

@ -0,0 +1,70 @@
//
// Written By : @ice3man (Nizamul Rana)
//
// Distributed Under MIT License
// Copyrights (C) 2018 Ice3man
//
// Package commoncrawl is a Golang based client for Parsing Subdomains from Commoncrawl
package commoncrawl
import (
"fmt"
"io/ioutil"
"github.com/subfinder/subfinder/libsubfinder/helper"
)
// all subdomains found
var subdomains []string
type commoncrawlObject struct {
NameValue string `json:"url"`
}
// array of all results returned
var commoncrawlData []string
// Query function returns all subdomains found using the service.
func Query(args ...interface{}) interface{} {
domain := args[0].(string)
state := args[1].(*helper.State)
// Make a http request to Threatcrowd
resp, err := helper.GetHTTPResponse("http://index.commoncrawl.org/CC-MAIN-2018-17-index?url=*."+domain+"&output=json", state.Timeout)
if err != nil {
if !state.Silent {
fmt.Printf("\ncommoncrawl: %v\n", err)
}
return subdomains
}
// Get the response body
respBody, err := ioutil.ReadAll(resp.Body)
if err != nil {
if !state.Silent {
fmt.Printf("\ncommoncrawl: %v\n", err)
}
return subdomains
}
commoncrawlData := helper.ExtractSubdomains(string(respBody), domain)
for _, subdomain := range commoncrawlData {
if helper.SubdomainExists(subdomain, subdomains) == false {
if state.Verbose == true {
if state.Color == true {
fmt.Printf("\n[%sCommoncrawl%s] %s", helper.Red, helper.Reset, subdomain)
} else {
fmt.Printf("\n[Commoncrawl] %s", subdomain)
}
}
subdomains = append(subdomains, subdomain)
}
}
return subdomains
}