From 8d904709c0ec5fd3979286dd023d25bd593303a2 Mon Sep 17 00:00:00 2001 From: xxcdd <42600601+xxcdd@users.noreply.github.com> Date: Thu, 1 Feb 2024 15:19:20 +0800 Subject: [PATCH] HybridTechDetection with wappalyzer and tech templates. (#4656) * HybridTechDetection with wappalyzer and tech templates. * automatic scan: refactor + misc updates * fix progress multi init panic * fix no result found statement * fix target in print statement * include detect,favicon tags as part of detection --------- Co-authored-by: Tarun Koyalwar --- internal/runner/runner.go | 4 +- .../common/automaticscan/automaticscan.go | 280 +++++++++++------- pkg/protocols/common/automaticscan/util.go | 63 ++++ 3 files changed, 245 insertions(+), 102 deletions(-) create mode 100644 pkg/protocols/common/automaticscan/util.go diff --git a/internal/runner/runner.go b/internal/runner/runner.go index 5924a8b0..f7954c2c 100644 --- a/internal/runner/runner.go +++ b/internal/runner/runner.go @@ -549,7 +549,9 @@ func (r *Runner) executeSmartWorkflowInput(executorOpts protocols.ExecutorOption if err != nil { return nil, errors.Wrap(err, "could not create automatic scan service") } - service.Execute() + if err := service.Execute(); err != nil { + return nil, errors.Wrap(err, "could not execute automatic scan") + } result := &atomic.Bool{} result.Store(service.Close()) return result, nil diff --git a/pkg/protocols/common/automaticscan/automaticscan.go b/pkg/protocols/common/automaticscan/automaticscan.go index 95051005..abca61cd 100644 --- a/pkg/protocols/common/automaticscan/automaticscan.go +++ b/pkg/protocols/common/automaticscan/automaticscan.go @@ -6,39 +6,39 @@ import ( "os" "path/filepath" "strings" + "sync" + "sync/atomic" + "github.com/logrusorgru/aurora" "github.com/pkg/errors" "github.com/projectdiscovery/gologger" "github.com/projectdiscovery/nuclei/v3/pkg/catalog/config" "github.com/projectdiscovery/nuclei/v3/pkg/catalog/loader" "github.com/projectdiscovery/nuclei/v3/pkg/core" + "github.com/projectdiscovery/nuclei/v3/pkg/core/inputs" + "github.com/projectdiscovery/nuclei/v3/pkg/output" "github.com/projectdiscovery/nuclei/v3/pkg/protocols" "github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/contextargs" + "github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/helpers/writer" "github.com/projectdiscovery/nuclei/v3/pkg/protocols/http/httpclientpool" httputil "github.com/projectdiscovery/nuclei/v3/pkg/protocols/utils/http" + "github.com/projectdiscovery/nuclei/v3/pkg/scan" "github.com/projectdiscovery/nuclei/v3/pkg/templates" - "github.com/projectdiscovery/nuclei/v3/pkg/templates/types" + "github.com/projectdiscovery/nuclei/v3/pkg/testutils" "github.com/projectdiscovery/retryablehttp-go" "github.com/projectdiscovery/useragent" + mapsutil "github.com/projectdiscovery/utils/maps" sliceutil "github.com/projectdiscovery/utils/slice" + stringsutil "github.com/projectdiscovery/utils/strings" wappalyzer "github.com/projectdiscovery/wappalyzergo" + "github.com/remeh/sizedwaitgroup" "gopkg.in/yaml.v2" ) -// Service is a service for automatic scan execution -type Service struct { - opts protocols.ExecutorOptions - store *loader.Store - engine *core.Engine - target core.InputProvider - wappalyzer *wappalyzer.Wappalyze - childExecuter *core.ChildExecuter - httpclient *retryablehttp.Client - - results bool - allTemplates []string - technologyMappings map[string]string -} +const ( + mappingFilename = "wappalyzer-mapping.yml" + maxDefaultBody = 4 * 1024 * 1024 // 4MB +) // Options contains configuration options for automatic scan service type Options struct { @@ -48,7 +48,21 @@ type Options struct { Target core.InputProvider } -const mappingFilename = "wappalyzer-mapping.yml" +// Service is a service for automatic scan execution +type Service struct { + opts protocols.ExecutorOptions + store *loader.Store + engine *core.Engine + target core.InputProvider + wappalyzer *wappalyzer.Wappalyze + childExecuter *core.ChildExecuter + httpclient *retryablehttp.Client + templateDirs []string // root Template Directories + technologyMappings map[string]string + techTemplates []*templates.Template + ServiceOpts Options + hasResults *atomic.Bool +} // New takes options and returns a new automatic scan service func New(opts Options) (*Service, error) { @@ -57,35 +71,30 @@ func New(opts Options) (*Service, error) { return nil, err } + // load extra mapping from nuclei-templates for normalization var mappingData map[string]string - config := config.DefaultConfig - - mappingFile := filepath.Join(config.TemplatesDirectory, mappingFilename) + mappingFile := filepath.Join(config.DefaultConfig.GetTemplateDir(), mappingFilename) if file, err := os.Open(mappingFile); err == nil { _ = yaml.NewDecoder(file).Decode(&mappingData) file.Close() } - if opts.ExecuterOpts.Options.Verbose { gologger.Verbose().Msgf("Normalized mapping (%d): %v\n", len(mappingData), mappingData) } - defaultTemplatesDirectories := []string{config.TemplatesDirectory} - // adding custom template path if available - if len(opts.ExecuterOpts.Options.Templates) > 0 { - defaultTemplatesDirectories = append(defaultTemplatesDirectories, opts.ExecuterOpts.Options.Templates...) + // get template directories + templateDirs, err := getTemplateDirs(opts) + if err != nil { + return nil, err } - // Collect path for default directories we want to look for templates in - var allTemplates []string - for _, directory := range defaultTemplatesDirectories { - templates, err := opts.ExecuterOpts.Catalog.GetTemplatePath(directory) - if err != nil { - return nil, errors.Wrap(err, "could not get templates in directory") - } - allTemplates = append(allTemplates, templates...) + + // load tech detect templates + techDetectTemplates, err := LoadTemplatesWithTags(opts, templateDirs, []string{"tech", "detect", "favicon"}, true) + if err != nil { + return nil, err } + childExecuter := opts.Engine.ChildExecuter() - httpclient, err := httpclientpool.Get(opts.ExecuterOpts.Options, &httpclientpool.Configuration{ Connection: &httpclientpool.ConnectionConfiguration{ DisableKeepAlive: httputil.ShouldDisableKeepAlive(opts.ExecuterOpts.Options), @@ -94,95 +103,119 @@ func New(opts Options) (*Service, error) { if err != nil { return nil, errors.Wrap(err, "could not get http client") } - return &Service{ opts: opts.ExecuterOpts, store: opts.Store, engine: opts.Engine, target: opts.Target, wappalyzer: wappalyzer, - allTemplates: allTemplates, + templateDirs: templateDirs, // fix this childExecuter: childExecuter, httpclient: httpclient, technologyMappings: mappingData, + techTemplates: techDetectTemplates, + ServiceOpts: opts, + hasResults: &atomic.Bool{}, }, nil } // Close closes the service func (s *Service) Close() bool { - results := s.childExecuter.Close() - if results.Load() { - s.results = true - } - return s.results + return s.hasResults.Load() } -// Execute performs the execution of automatic scan on provided input -func (s *Service) Execute() { - if err := s.executeWappalyzerTechDetection(); err != nil { - gologger.Error().Msgf("Could not execute wappalyzer based detection: %s", err) - } -} - -const maxDefaultBody = 2 * 1024 * 1024 - -// executeWappalyzerTechDetection implements the logic to run the wappalyzer -// technologies detection on inputs which returns tech. -// -// The returned tags are then used for further execution. -func (s *Service) executeWappalyzerTechDetection() error { - gologger.Info().Msgf("Executing wappalyzer based tech detection on input urls") - - // Iterate through each target making http request and identifying fingerprints - inputPool := s.engine.WorkPool().InputPool(types.HTTPProtocol) - +// Execute automatic scan on each target with -bs host concurrency +func (s *Service) Execute() error { + gologger.Info().Msgf("Executing Automatic scan on %d target[s]", s.target.Count()) + // setup host concurrency + sg := sizedwaitgroup.New(s.opts.Options.BulkSize) s.target.Scan(func(value *contextargs.MetaInput) bool { - inputPool.WaitGroup.Add() - + sg.Add() go func(input *contextargs.MetaInput) { - defer inputPool.WaitGroup.Done() - - s.processWappalyzerInputPair(input) + defer sg.Done() + s.executeAutomaticScanOnTarget(input) }(value) return true }) - inputPool.WaitGroup.Wait() + sg.Wait() return nil } -func (s *Service) processWappalyzerInputPair(input *contextargs.MetaInput) { +// executeAutomaticScanOnTarget executes automatic scan on given target +func (s *Service) executeAutomaticScanOnTarget(input *contextargs.MetaInput) { + // get tags using wappalyzer + tagsFromWappalyzer := s.getTagsUsingWappalyzer(input) + // get tags using detection templates + tagsFromDetectTemplates, matched := s.getTagsUsingDetectionTemplates(input) + if matched > 0 { + s.hasResults.Store(true) + } + + // create combined final tags + finalTags := []string{} + for _, tags := range append(tagsFromWappalyzer, tagsFromDetectTemplates...) { + if stringsutil.EqualFoldAny(tags, "tech", "waf", "favicon") { + continue + } + finalTags = append(finalTags, tags) + } + finalTags = sliceutil.Dedupe(finalTags) + + gologger.Info().Msgf("Found %d tags and %d matches on detection templates on %v [wappalyzer: %d, detection: %d]\n", len(finalTags), matched, input.Input, len(tagsFromWappalyzer), len(tagsFromDetectTemplates)) + + // also include any extra tags passed by user + finalTags = append(finalTags, s.opts.Options.Tags...) + finalTags = sliceutil.Dedupe(finalTags) + + if len(finalTags) == 0 { + gologger.Warning().Msgf("Skipping automatic scan since no tags were found on %v\n", input.Input) + return + } + gologger.Verbose().Msgf("Final tags identified for %v: %+v\n", input.Input, finalTags) + + finalTemplates, err := LoadTemplatesWithTags(s.ServiceOpts, s.templateDirs, finalTags, false) + if err != nil { + gologger.Error().Msgf("%v Error loading templates: %s\n", input.Input, err) + return + } + gologger.Info().Msgf("Executing %d templates on %v", len(finalTemplates), input.Input) + eng := core.New(s.opts.Options) + execOptions := s.opts.Copy() + execOptions.Progress = &testutils.MockProgressClient{} // stats are not supported yet due to centralized logic and cannot be reinitialized + eng.SetExecuterOptions(execOptions) + tmp := eng.ExecuteScanWithOpts(finalTemplates, &inputs.SimpleInputProvider{Inputs: []*contextargs.MetaInput{input}}, true) + s.hasResults.Store(tmp.Load()) +} + +// getTagsUsingWappalyzer returns tags using wappalyzer by fingerprinting target +// and utilizing the mapping data +func (s *Service) getTagsUsingWappalyzer(input *contextargs.MetaInput) []string { req, err := retryablehttp.NewRequest(http.MethodGet, input.Input, nil) if err != nil { - return + return nil } userAgent := useragent.PickRandom() req.Header.Set("User-Agent", userAgent.Raw) resp, err := s.httpclient.Do(req) if err != nil { - if resp != nil { - resp.Body.Close() - } - return + return nil } - reader := io.LimitReader(resp.Body, maxDefaultBody) - data, err := io.ReadAll(reader) + defer resp.Body.Close() + data, err := io.ReadAll(io.LimitReader(resp.Body, maxDefaultBody)) if err != nil { - resp.Body.Close() - return + return nil } - resp.Body.Close() + // fingerprint headers and body fingerprints := s.wappalyzer.Fingerprint(resp.Header, data) normalized := make(map[string]struct{}) for k := range fingerprints { normalized[normalizeAppName(k)] = struct{}{} } + gologger.Verbose().Msgf("Found %d fingerprints for %s\n", len(normalized), input.Input) - if s.opts.Options.Verbose { - gologger.Verbose().Msgf("Wappalyzer fingerprints %v for %s\n", normalized, input) - } - + // normalize fingerprints using mapping data for k := range normalized { // Replace values with mapping data if value, ok := s.technologyMappings[k]; ok { @@ -190,7 +223,7 @@ func (s *Service) processWappalyzerInputPair(input *contextargs.MetaInput) { normalized[value] = struct{}{} } } - + // more post processing items := make([]string, 0, len(normalized)) for k := range normalized { if strings.Contains(k, " ") { @@ -200,28 +233,73 @@ func (s *Service) processWappalyzerInputPair(input *contextargs.MetaInput) { items = append(items, strings.ToLower(k)) } } - if len(items) == 0 { - return - } - // Add tags as addition to -as for comprehensive scans. Ref: nuclei/issues/3348 - items = append(items, s.opts.Options.Tags...) - uniqueTags := sliceutil.Dedupe(items) - - templatesList := s.store.LoadTemplatesWithTags(s.allTemplates, uniqueTags) - gologger.Info().Msgf("Executing tags (%v) for host %s (%d templates)", strings.Join(uniqueTags, ","), input, len(templatesList)) - for _, t := range templatesList { - s.opts.Progress.AddToTotal(int64(t.Executer.Requests())) - - if s.opts.Options.VerboseVerbose { - gologger.Print().Msgf("%s\n", templates.TemplateLogMessage(t.ID, - t.Info.Name, - t.Info.Authors.ToSlice(), - t.Info.SeverityHolder.Severity)) - } - s.childExecuter.Execute(t, input) - } + return sliceutil.Dedupe(items) } +// getTagsUsingDetectionTemplates returns tags using detection templates +func (s *Service) getTagsUsingDetectionTemplates(input *contextargs.MetaInput) ([]string, int) { + ctxArgs := contextargs.NewWithInput(input.Input) + + // execute tech detection templates on target + tags := map[string]struct{}{} + m := &sync.Mutex{} + sg := sizedwaitgroup.New(s.opts.Options.TemplateThreads) + counter := atomic.Uint32{} + + // run + for _, t := range s.techTemplates { + sg.Add() + go func(template *templates.Template) { + defer sg.Done() + ctx := scan.NewScanContext(ctxArgs) + ctx.OnResult = func(event *output.InternalWrappedEvent) { + if event == nil { + return + } + if event.HasOperatorResult() && event.OperatorsResult.Matched { + // match found + // find unique tags + m.Lock() + for _, tag := range template.Info.Tags.ToSlice() { + // we shouldn't add all tags since tags also contain protocol type tags + // and are not just limited to products or technologies + // ex: tags: js,mssql,detect,network + + // A good trick for this is check if tag is present in template-id + if !strings.Contains(template.ID, tag) { + // unlikely this is relevant + continue + } + if _, ok := tags[tag]; !ok { + tags[tag] = struct{}{} + } + // matcher names are also relevant in tech detection templates (ex: tech-detect) + for k := range event.OperatorsResult.Matches { + if _, ok := tags[k]; !ok { + tags[k] = struct{}{} + } + } + } + m.Unlock() + _ = counter.Add(1) + + // TBD: should we show or hide tech detection results? what about matcher-status flag? + _ = writer.WriteResult(event, s.opts.Output, s.opts.Progress, s.opts.IssuesClient) + } + } + + _, err := template.Executer.ExecuteWithResults(ctx) + if err != nil { + gologger.Verbose().Msgf("[%s] error executing template: %s\n", aurora.BrightYellow(template.ID), err) + return + } + }(t) + } + sg.Wait() + return mapsutil.GetKeys(tags), int(counter.Load()) +} + +// normalizeAppName normalizes app name func normalizeAppName(appName string) string { if strings.Contains(appName, ":") { if parts := strings.Split(appName, ":"); len(parts) == 2 { diff --git a/pkg/protocols/common/automaticscan/util.go b/pkg/protocols/common/automaticscan/util.go new file mode 100644 index 00000000..376f8053 --- /dev/null +++ b/pkg/protocols/common/automaticscan/util.go @@ -0,0 +1,63 @@ +package automaticscan + +import ( + "github.com/pkg/errors" + "github.com/projectdiscovery/gologger" + "github.com/projectdiscovery/nuclei/v3/pkg/catalog/config" + "github.com/projectdiscovery/nuclei/v3/pkg/templates" + sliceutil "github.com/projectdiscovery/utils/slice" +) + +// getTemplateDirs returns template directories for given input +// by default it returns default template directory +func getTemplateDirs(opts Options) ([]string, error) { + defaultTemplatesDirectories := []string{config.DefaultConfig.GetTemplateDir()} + // adding custom template path if available + if len(opts.ExecuterOpts.Options.Templates) > 0 { + defaultTemplatesDirectories = append(defaultTemplatesDirectories, opts.ExecuterOpts.Options.Templates...) + } + // Collect path for default directories we want to look for templates in + var allTemplates []string + for _, directory := range defaultTemplatesDirectories { + templates, err := opts.ExecuterOpts.Catalog.GetTemplatePath(directory) + if err != nil { + return nil, errors.Wrap(err, "could not get templates in directory") + } + allTemplates = append(allTemplates, templates...) + } + allTemplates = sliceutil.Dedupe(allTemplates) + if len(allTemplates) == 0 { + return nil, errors.New("no templates found for given input") + } + return allTemplates, nil +} + +// LoadTemplatesWithTags loads and returns templates with given tags +func LoadTemplatesWithTags(opts Options, templateDirs []string, tags []string, logInfo bool) ([]*templates.Template, error) { + tagTemplates := opts.Store.LoadTemplatesWithTags(templateDirs, tags) + if len(tagTemplates) == 0 { + return nil, errors.New("could not find any templates with tech tag") + } + // cluster and reduce requests + totalReqBeforeCluster := getRequestCount(tagTemplates) * int(opts.Target.Count()) + finalTemplates, clusterCount := templates.ClusterTemplates(tagTemplates, opts.ExecuterOpts) + totalReqAfterClustering := getRequestCount(finalTemplates) * int(opts.Target.Count()) + if totalReqAfterClustering < totalReqBeforeCluster && logInfo { + gologger.Info().Msgf("Automatic scan tech-detect: Templates clustered: %d (Reduced %d Requests)", clusterCount, totalReqBeforeCluster-totalReqAfterClustering) + } + return finalTemplates, nil +} + +// returns total requests count +func getRequestCount(templates []*templates.Template) int { + count := 0 + for _, template := range templates { + // ignore requests in workflows as total requests in workflow + // depends on what templates will be called in workflow + if len(template.Workflows) > 0 { + continue + } + count += template.TotalRequests + } + return count +}