archiveis - fix panic from closing a closed channel

If one of the recursive calls encounters an error, it was closing the channel without setting the closed flag. This change moves the channel close outside enumerate() so there's only one place that needs to close it.

Also removed the loop in enumerate() which wasn't necessary.
master
Donncha Fahy 2020-02-05 11:57:19 +11:00
parent ad3c7e8df8
commit d7b50e0b03
1 changed files with 28 additions and 39 deletions

View File

@ -13,54 +13,40 @@ import (
type ArchiveIs struct {
Results chan subscraping.Result
Session *subscraping.Session
closed bool
}
var reNext = regexp.MustCompile("<a id=\"next\" style=\".*\" href=\"(.*)\">&rarr;</a>")
func (a *ArchiveIs) enumerate(ctx context.Context, baseURL string) {
for {
select {
case <-ctx.Done():
close(a.Results)
return
default:
resp, err := a.Session.NormalGetWithContext(ctx, baseURL)
if err != nil {
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Error, Error: err}
close(a.Results)
return
}
select {
case <-ctx.Done():
return
default:
}
// Get the response body
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Error, Error: err}
resp.Body.Close()
close(a.Results)
return
}
resp.Body.Close()
resp, err := a.Session.NormalGetWithContext(ctx, baseURL)
if err != nil {
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Error, Error: err}
return
}
src := string(body)
// Get the response body
body, err := ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Error, Error: err}
return
}
for _, subdomain := range a.Session.Extractor.FindAllString(src, -1) {
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Subdomain, Value: subdomain}
}
src := string(body)
match1 := reNext.FindStringSubmatch(src)
if len(match1) > 0 {
a.enumerate(ctx, match1[1])
}
for _, subdomain := range a.Session.Extractor.FindAllString(src, -1) {
a.Results <- subscraping.Result{Source: "archiveis", Type: subscraping.Subdomain, Value: subdomain}
}
// Guard channel closing during recursion
if !a.closed {
close(a.Results)
a.closed = true
}
return
}
match1 := reNext.FindStringSubmatch(src)
if len(match1) > 0 {
a.enumerate(ctx, match1[1])
}
}
@ -76,7 +62,10 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se
Results: results,
}
go aInstance.enumerate(ctx, "http://archive.is/*."+domain)
go func() {
aInstance.enumerate(ctx, "http://archive.is/*."+domain)
close(aInstance.Results)
}()
return aInstance.Results
}