use maxsize (#4814)

2024-02-29 00:51:17 +03:00 · 2024-02-29 00:51:17 +03:00 · 6abff96435
parent 30bbdd0163
commit 6abff96435
6 changed files with 13 additions and 409 deletions
--- a/pkg/protocols/http/httputils/chain.go
+++ b/pkg/protocols/http/httputils/chain.go
@ -1,169 +0,0 @@
-package httputils
-
-import (
-	"bytes"
-	"fmt"
-	"net/http"
-	"sync"
-
-	protoUtil "github.com/projectdiscovery/nuclei/v3/pkg/protocols/utils"
-)
-
-// use buffer pool for storing response body
-// and reuse it for each request
-var bufPool = sync.Pool{
-	New: func() any {
-		// The Pool's New function should generally only return pointer
-		// types, since a pointer can be put into the return interface
-		// value without an allocation:
-		return new(bytes.Buffer)
-	},
-}
-
-// getBuffer returns a buffer from the pool
-func getBuffer() *bytes.Buffer {
-	return bufPool.Get().(*bytes.Buffer)
-}
-
-// putBuffer returns a buffer to the pool
-func putBuffer(buf *bytes.Buffer) {
-	buf.Reset()
-	bufPool.Put(buf)
-}
-
-// Performance Notes:
-// do not use http.Response once we create ResponseChain from it
-// as this reuses buffers and saves allocations and also drains response
-// body automatically.
-// In required cases it can be used but should never be used for anything
-// related to response body.
-// Bytes.Buffer returned by getters should not be used and are only meant for convinience
-// purposes like .String() or .Bytes() calls.
-// Remember to call Close() on ResponseChain once you are done with it.
-
-// ResponseChain is a response chain for a http request
-// on every call to previous it returns the previous response
-// if it was redirected.
-type ResponseChain struct {
-	headers      *bytes.Buffer
-	body         *bytes.Buffer
-	fullResponse *bytes.Buffer
-	resp         *http.Response
-	reloaded     bool // if response was reloaded to its previous redirect
-}
-
-// NewResponseChain creates a new response chain for a http request
-// with a maximum body size. (if -1 stick to default 4MB)
-func NewResponseChain(resp *http.Response, maxBody int64) *ResponseChain {
-	if _, ok := resp.Body.(protoUtil.LimitResponseBody); !ok {
-		resp.Body = protoUtil.NewLimitResponseBodyWithSize(resp.Body, maxBody)
-	}
-	return &ResponseChain{
-		headers:      getBuffer(),
-		body:         getBuffer(),
-		fullResponse: getBuffer(),
-		resp:         resp,
-	}
-}
-
-// Response returns the current response in the chain
-func (r *ResponseChain) Headers() *bytes.Buffer {
-	return r.headers
-}
-
-// Body returns the current response body in the chain
-func (r *ResponseChain) Body() *bytes.Buffer {
-	return r.body
-}
-
-// FullResponse returns the current response in the chain
-func (r *ResponseChain) FullResponse() *bytes.Buffer {
-	return r.fullResponse
-}
-
-// previous updates response pointer to previous response
-// if it was redirected and returns true else false
-func (r *ResponseChain) Previous() bool {
-	if r.resp != nil && r.resp.Request != nil && r.resp.Request.Response != nil {
-		r.resp = r.resp.Request.Response
-		r.reloaded = true
-		return true
-	}
-	return false
-}
-
-// Fill buffers
-func (r *ResponseChain) Fill() error {
-	r.reset()
-	if r.resp == nil {
-		return fmt.Errorf("response is nil")
-	}
-
-	// load headers
-	err := DumpResponseIntoBuffer(r.resp, false, r.headers)
-	if err != nil {
-		return fmt.Errorf("error dumping response headers: %s", err)
-	}
-
-	if r.resp.StatusCode != http.StatusSwitchingProtocols && !r.reloaded {
-		// Note about reloaded:
-		// this is a known behaviour existing from earlier version
-		// when redirect is followed and operators are executed on all redirect chain
-		// body of those requests is not available since its already been redirected
-		// This is not a issue since redirect happens with empty body according to RFC
-		// but this may be required sometimes
-		// Solution: Manual redirect using dynamic matchers or hijack redirected responses
-		// at transport level at replace with bytes buffer and then use it
-
-		// load body
-		err = readNNormalizeRespBody(r, r.body)
-		if err != nil {
-			return fmt.Errorf("error reading response body: %s", err)
-		}
-
-		// response body should not be used anymore
-		// drain and close
-		DrainResponseBody(r.resp)
-	}
-
-	// join headers and body
-	r.fullResponse.Write(r.headers.Bytes())
-	r.fullResponse.Write(r.body.Bytes())
-	return nil
-}
-
-// Close the response chain and releases the buffers.
-func (r *ResponseChain) Close() {
-	putBuffer(r.headers)
-	putBuffer(r.body)
-	putBuffer(r.fullResponse)
-	r.headers = nil
-	r.body = nil
-	r.fullResponse = nil
-}
-
-// Has returns true if the response chain has a response
-func (r *ResponseChain) Has() bool {
-	return r.resp != nil
-}
-
-// Request is request of current response
-func (r *ResponseChain) Request() *http.Request {
-	if r.resp == nil {
-		return nil
-	}
-	return r.resp.Request
-}
-
-// Response is response of current response
-func (r *ResponseChain) Response() *http.Response {
-	return r.resp
-}
-
-// reset without releasing the buffers
-// useful for redirect chain
-func (r *ResponseChain) reset() {
-	r.headers.Reset()
-	r.body.Reset()
-	r.fullResponse.Reset()
-}
--- a/pkg/protocols/http/httputils/internal.go
+++ b/pkg/protocols/http/httputils/internal.go
@ -1,47 +0,0 @@
-package httputils
-
-import (
-	"bytes"
-	"errors"
-	"io"
-	"net/http"
-	"strings"
-)
-
-// implementations copied from stdlib
-
-// errNoBody is a sentinel error value used by failureToReadBody so we
-// can detect that the lack of body was intentional.
-var errNoBody = errors.New("sentinel error value")
-
-// failureToReadBody is an io.ReadCloser that just returns errNoBody on
-// Read. It's swapped in when we don't actually want to consume
-// the body, but need a non-nil one, and want to distinguish the
-// error from reading the dummy body.
-type failureToReadBody struct{}
-
-func (failureToReadBody) Read([]byte) (int, error) { return 0, errNoBody }
-func (failureToReadBody) Close() error             { return nil }
-
-// emptyBody is an instance of empty reader.
-var emptyBody = io.NopCloser(strings.NewReader(""))
-
-// drainBody reads all of b to memory and then returns two equivalent
-// ReadClosers yielding the same bytes.
-//
-// It returns an error if the initial slurp of all bytes fails. It does not attempt
-// to make the returned ReadClosers have identical error-matching behavior.
-func drainBody(b io.ReadCloser) (r1, r2 io.ReadCloser, err error) {
-	if b == nil || b == http.NoBody {
-		// No copying needed. Preserve the magic sentinel meaning of NoBody.
-		return http.NoBody, http.NoBody, nil
-	}
-	var buf bytes.Buffer
-	if _, err = buf.ReadFrom(b); err != nil {
-		return nil, b, err
-	}
-	if err = b.Close(); err != nil {
-		return nil, b, err
-	}
-	return io.NopCloser(&buf), io.NopCloser(bytes.NewReader(buf.Bytes())), nil
-}
--- a/pkg/protocols/http/httputils/normalization.go
+++ b/pkg/protocols/http/httputils/normalization.go
@ -1,88 +0,0 @@
-package httputils
-
-import (
-	"bytes"
-	"compress/gzip"
-	"compress/zlib"
-	"fmt"
-	"io"
-	"net/http"
-	"strings"
-
-	"github.com/pkg/errors"
-	"golang.org/x/text/encoding/simplifiedchinese"
-	"golang.org/x/text/transform"
-
-	stringsutil "github.com/projectdiscovery/utils/strings"
-)
-
-// readNNormalizeRespBody performs normalization on the http response object.
-// and fills body buffer with actual response body.
-func readNNormalizeRespBody(rc *ResponseChain, body *bytes.Buffer) (err error) {
-	response := rc.resp
-	if response == nil {
-		return fmt.Errorf("something went wrong response is nil")
-	}
-	// net/http doesn't automatically decompress the response body if an
-	// encoding has been specified by the user in the request so in case we have to
-	// manually do it.
-
-	origBody := rc.resp.Body
-	if origBody == nil {
-		// skip normalization if body is nil
-		return nil
-	}
-	// wrap with decode if applicable
-	wrapped, err := wrapDecodeReader(response)
-	if err != nil {
-		wrapped = origBody
-	}
-	// read response body to buffer
-	_, err = body.ReadFrom(wrapped)
-	if err != nil {
-		if strings.Contains(err.Error(), "gzip: invalid header") {
-			// its invalid gzip but we will still use it from original body
-			_, err = body.ReadFrom(origBody)
-			if err != nil {
-				return errors.Wrap(err, "could not read response body after gzip error")
-			}
-		}
-		if stringsutil.ContainsAny(err.Error(), "unexpected EOF", "read: connection reset by peer", "user canceled") {
-			// keep partial body and continue (skip error) (add meta header in response for debugging)
-			if response.Header == nil {
-				response.Header = make(http.Header)
-			}
-			response.Header.Set("x-nuclei-ignore-error", err.Error())
-			return nil
-		}
-		return errors.Wrap(err, "could not read response body")
-	}
-	return nil
-}
-
-// wrapDecodeReader wraps a decompression reader around the response body if it's compressed
-// using gzip or deflate.
-func wrapDecodeReader(resp *http.Response) (rc io.ReadCloser, err error) {
-	switch resp.Header.Get("Content-Encoding") {
-	case "gzip":
-		rc, err = gzip.NewReader(resp.Body)
-	case "deflate":
-		rc, err = zlib.NewReader(resp.Body)
-	default:
-		rc = resp.Body
-	}
-	if err != nil {
-		return nil, err
-	}
-	// handle GBK encoding
-	if isContentTypeGbk(resp.Header.Get("Content-Type")) {
-		rc = io.NopCloser(transform.NewReader(rc, simplifiedchinese.GBK.NewDecoder()))
-	}
-	return rc, nil
-}
-
-// isContentTypeGbk checks if the content-type header is gbk
-func isContentTypeGbk(contentType string) bool {
-	contentType = strings.ToLower(contentType)
-	return stringsutil.ContainsAny(contentType, "gbk", "gb2312", "gb18030")
-}
--- a/pkg/protocols/http/httputils/response.go
+++ b/pkg/protocols/http/httputils/response.go
@ -1,52 +0,0 @@
-package httputils
-
-import (
-	"bytes"
-	"fmt"
-	"io"
-	"net/http"
-
-	protocolutil "github.com/projectdiscovery/nuclei/v3/pkg/protocols/utils"
-)
-
-// DumpResponseIntoBuffer dumps a http response without allocating a new buffer
-// for the response body.
-func DumpResponseIntoBuffer(resp *http.Response, body bool, buff *bytes.Buffer) (err error) {
-	if resp == nil {
-		return fmt.Errorf("response is nil")
-	}
-	save := resp.Body
-	savecl := resp.ContentLength
-
-	if !body {
-		// For content length of zero. Make sure the body is an empty
-		// reader, instead of returning error through failureToReadBody{}.
-		if resp.ContentLength == 0 {
-			resp.Body = emptyBody
-		} else {
-			resp.Body = failureToReadBody{}
-		}
-	} else if resp.Body == nil {
-		resp.Body = emptyBody
-	} else {
-		save, resp.Body, err = drainBody(resp.Body)
-		if err != nil {
-			return err
-		}
-	}
-	err = resp.Write(buff)
-	if err == errNoBody {
-		err = nil
-	}
-	resp.Body = save
-	resp.ContentLength = savecl
-	return
-}
-
-// DrainResponseBody drains the response body and closes it.
-func DrainResponseBody(resp *http.Response) {
-	defer resp.Body.Close()
-	// don't reuse connection and just close if body length is more than 2 * MaxBodyRead
-	// to avoid DOS
-	_, _ = io.CopyN(io.Discard, resp.Body, 2*protocolutil.MaxBodyRead)
-}
--- a/pkg/protocols/http/request.go
+++ b/pkg/protocols/http/request.go
@ -34,11 +34,11 @@ import (
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/http/httputils"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/http/signer"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/http/signerpool"
-	protocolutil "github.com/projectdiscovery/nuclei/v3/pkg/protocols/utils"
 	templateTypes "github.com/projectdiscovery/nuclei/v3/pkg/templates/types"
 	"github.com/projectdiscovery/nuclei/v3/pkg/types"
 	"github.com/projectdiscovery/rawhttp"
 	convUtil "github.com/projectdiscovery/utils/conversion"
+	httpUtils "github.com/projectdiscovery/utils/http"
 	"github.com/projectdiscovery/utils/reader"
 	sliceutil "github.com/projectdiscovery/utils/slice"
 	stringsutil "github.com/projectdiscovery/utils/strings"
@ -49,6 +49,10 @@ const (
 	defaultMaxWorkers = 150
 )

+var (
+	MaxBodyRead = int64(1 << 22) // 4MB using shift operator
+)
+
 // Type returns the type of the protocol request
 func (request *Request) Type() templateTypes.ProtocolType {
 	return templateTypes.HTTPProtocol
@ -753,10 +757,7 @@ func (request *Request) executeRequest(input *contextargs.Context, generatedRequ
 			}
 		}
 	}
-	// global wrap response body reader
-	if resp != nil && resp.Body != nil {
-		resp.Body = protocolutil.NewLimitResponseBody(resp.Body)
-	}
+
 	if err != nil {
 		// rawhttp doesn't support draining response bodies.
 		if resp != nil && resp.Body != nil && generatedRequest.rawRequest == nil && !generatedRequest.original.Pipeline {
@ -802,17 +803,19 @@ func (request *Request) executeRequest(input *contextargs.Context, generatedRequ
 	request.options.Output.Request(request.options.TemplatePath, formedURL, request.Type().String(), err)

 	duration := time.Since(timeStart)
+
 	// define max body read limit
-	maxBodylimit := -1 // stick to default 4MB
+	maxBodylimit := MaxBodyRead // 10MB
 	if request.MaxSize > 0 {
-		maxBodylimit = request.MaxSize
-	} else if request.options.Options.ResponseReadSize != 0 {
-		maxBodylimit = request.options.Options.ResponseReadSize
+		maxBodylimit = int64(request.MaxSize)
+	}
+	if request.options.Options.ResponseReadSize != 0 {
+		maxBodylimit = int64(request.options.Options.ResponseReadSize)
 	}

 	// respChain is http response chain that reads response body
 	// efficiently by reusing buffers and does all decoding and optimizations
-	respChain := httputils.NewResponseChain(resp, int64(maxBodylimit))
+	respChain := httpUtils.NewResponseChain(resp, maxBodylimit)
 	defer respChain.Close() // reuse buffers

 	// we only intend to log/save the final redirected response
--- a/pkg/protocols/utils/reader.go
+++ b/pkg/protocols/utils/reader.go
@ -1,43 +0,0 @@
-package utils
-
-import (
-	"io"
-)
-
-var (
-	MaxBodyRead = int64(1 << 22) // 4MB using shift operator
-)
-
-var _ io.ReadCloser = &LimitResponseBody{}
-
-type LimitResponseBody struct {
-	io.Reader
-	io.Closer
-}
-
-// NewLimitResponseBody wraps response body with a limit reader.
-// thus only allowing MaxBodyRead bytes to be read. i.e 4MB
-func NewLimitResponseBody(body io.ReadCloser) io.ReadCloser {
-	return NewLimitResponseBodyWithSize(body, MaxBodyRead)
-}
-
-// NewLimitResponseBody wraps response body with a limit reader.
-// thus only allowing MaxBodyRead bytes to be read. i.e 4MB
-func NewLimitResponseBodyWithSize(body io.ReadCloser, size int64) io.ReadCloser {
-	if body == nil {
-		return nil
-	}
-	if size == -1 {
-		// stick to default 4MB
-		size = MaxBodyRead
-	}
-	return &LimitResponseBody{
-		Reader: io.LimitReader(body, size),
-		Closer: body,
-	}
-}
-
-// LimitBodyRead limits the body read to MaxBodyRead bytes.
-func LimitBodyRead(r io.Reader) ([]byte, error) {
-	return io.ReadAll(io.LimitReader(r, MaxBodyRead))
-}