use maxsize (#4814)

dev
Dogan Can Bakir 2024-02-29 00:51:17 +03:00 committed by GitHub
parent 30bbdd0163
commit 6abff96435
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 13 additions and 409 deletions

View File

@ -1,169 +0,0 @@
package httputils
import (
"bytes"
"fmt"
"net/http"
"sync"
protoUtil "github.com/projectdiscovery/nuclei/v3/pkg/protocols/utils"
)
// use buffer pool for storing response body
// and reuse it for each request
var bufPool = sync.Pool{
New: func() any {
// The Pool's New function should generally only return pointer
// types, since a pointer can be put into the return interface
// value without an allocation:
return new(bytes.Buffer)
},
}
// getBuffer returns a buffer from the pool
func getBuffer() *bytes.Buffer {
return bufPool.Get().(*bytes.Buffer)
}
// putBuffer returns a buffer to the pool
func putBuffer(buf *bytes.Buffer) {
buf.Reset()
bufPool.Put(buf)
}
// Performance Notes:
// do not use http.Response once we create ResponseChain from it
// as this reuses buffers and saves allocations and also drains response
// body automatically.
// In required cases it can be used but should never be used for anything
// related to response body.
// Bytes.Buffer returned by getters should not be used and are only meant for convinience
// purposes like .String() or .Bytes() calls.
// Remember to call Close() on ResponseChain once you are done with it.
// ResponseChain is a response chain for a http request
// on every call to previous it returns the previous response
// if it was redirected.
type ResponseChain struct {
headers *bytes.Buffer
body *bytes.Buffer
fullResponse *bytes.Buffer
resp *http.Response
reloaded bool // if response was reloaded to its previous redirect
}
// NewResponseChain creates a new response chain for a http request
// with a maximum body size. (if -1 stick to default 4MB)
func NewResponseChain(resp *http.Response, maxBody int64) *ResponseChain {
if _, ok := resp.Body.(protoUtil.LimitResponseBody); !ok {
resp.Body = protoUtil.NewLimitResponseBodyWithSize(resp.Body, maxBody)
}
return &ResponseChain{
headers: getBuffer(),
body: getBuffer(),
fullResponse: getBuffer(),
resp: resp,
}
}
// Response returns the current response in the chain
func (r *ResponseChain) Headers() *bytes.Buffer {
return r.headers
}
// Body returns the current response body in the chain
func (r *ResponseChain) Body() *bytes.Buffer {
return r.body
}
// FullResponse returns the current response in the chain
func (r *ResponseChain) FullResponse() *bytes.Buffer {
return r.fullResponse
}
// previous updates response pointer to previous response
// if it was redirected and returns true else false
func (r *ResponseChain) Previous() bool {
if r.resp != nil && r.resp.Request != nil && r.resp.Request.Response != nil {
r.resp = r.resp.Request.Response
r.reloaded = true
return true
}
return false
}
// Fill buffers
func (r *ResponseChain) Fill() error {
r.reset()
if r.resp == nil {
return fmt.Errorf("response is nil")
}
// load headers
err := DumpResponseIntoBuffer(r.resp, false, r.headers)
if err != nil {
return fmt.Errorf("error dumping response headers: %s", err)
}
if r.resp.StatusCode != http.StatusSwitchingProtocols && !r.reloaded {
// Note about reloaded:
// this is a known behaviour existing from earlier version
// when redirect is followed and operators are executed on all redirect chain
// body of those requests is not available since its already been redirected
// This is not a issue since redirect happens with empty body according to RFC
// but this may be required sometimes
// Solution: Manual redirect using dynamic matchers or hijack redirected responses
// at transport level at replace with bytes buffer and then use it
// load body
err = readNNormalizeRespBody(r, r.body)
if err != nil {
return fmt.Errorf("error reading response body: %s", err)
}
// response body should not be used anymore
// drain and close
DrainResponseBody(r.resp)
}
// join headers and body
r.fullResponse.Write(r.headers.Bytes())
r.fullResponse.Write(r.body.Bytes())
return nil
}
// Close the response chain and releases the buffers.
func (r *ResponseChain) Close() {
putBuffer(r.headers)
putBuffer(r.body)
putBuffer(r.fullResponse)
r.headers = nil
r.body = nil
r.fullResponse = nil
}
// Has returns true if the response chain has a response
func (r *ResponseChain) Has() bool {
return r.resp != nil
}
// Request is request of current response
func (r *ResponseChain) Request() *http.Request {
if r.resp == nil {
return nil
}
return r.resp.Request
}
// Response is response of current response
func (r *ResponseChain) Response() *http.Response {
return r.resp
}
// reset without releasing the buffers
// useful for redirect chain
func (r *ResponseChain) reset() {
r.headers.Reset()
r.body.Reset()
r.fullResponse.Reset()
}

View File

@ -1,47 +0,0 @@
package httputils
import (
"bytes"
"errors"
"io"
"net/http"
"strings"
)
// implementations copied from stdlib
// errNoBody is a sentinel error value used by failureToReadBody so we
// can detect that the lack of body was intentional.
var errNoBody = errors.New("sentinel error value")
// failureToReadBody is an io.ReadCloser that just returns errNoBody on
// Read. It's swapped in when we don't actually want to consume
// the body, but need a non-nil one, and want to distinguish the
// error from reading the dummy body.
type failureToReadBody struct{}
func (failureToReadBody) Read([]byte) (int, error) { return 0, errNoBody }
func (failureToReadBody) Close() error { return nil }
// emptyBody is an instance of empty reader.
var emptyBody = io.NopCloser(strings.NewReader(""))
// drainBody reads all of b to memory and then returns two equivalent
// ReadClosers yielding the same bytes.
//
// It returns an error if the initial slurp of all bytes fails. It does not attempt
// to make the returned ReadClosers have identical error-matching behavior.
func drainBody(b io.ReadCloser) (r1, r2 io.ReadCloser, err error) {
if b == nil || b == http.NoBody {
// No copying needed. Preserve the magic sentinel meaning of NoBody.
return http.NoBody, http.NoBody, nil
}
var buf bytes.Buffer
if _, err = buf.ReadFrom(b); err != nil {
return nil, b, err
}
if err = b.Close(); err != nil {
return nil, b, err
}
return io.NopCloser(&buf), io.NopCloser(bytes.NewReader(buf.Bytes())), nil
}

View File

@ -1,88 +0,0 @@
package httputils
import (
"bytes"
"compress/gzip"
"compress/zlib"
"fmt"
"io"
"net/http"
"strings"
"github.com/pkg/errors"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/transform"
stringsutil "github.com/projectdiscovery/utils/strings"
)
// readNNormalizeRespBody performs normalization on the http response object.
// and fills body buffer with actual response body.
func readNNormalizeRespBody(rc *ResponseChain, body *bytes.Buffer) (err error) {
response := rc.resp
if response == nil {
return fmt.Errorf("something went wrong response is nil")
}
// net/http doesn't automatically decompress the response body if an
// encoding has been specified by the user in the request so in case we have to
// manually do it.
origBody := rc.resp.Body
if origBody == nil {
// skip normalization if body is nil
return nil
}
// wrap with decode if applicable
wrapped, err := wrapDecodeReader(response)
if err != nil {
wrapped = origBody
}
// read response body to buffer
_, err = body.ReadFrom(wrapped)
if err != nil {
if strings.Contains(err.Error(), "gzip: invalid header") {
// its invalid gzip but we will still use it from original body
_, err = body.ReadFrom(origBody)
if err != nil {
return errors.Wrap(err, "could not read response body after gzip error")
}
}
if stringsutil.ContainsAny(err.Error(), "unexpected EOF", "read: connection reset by peer", "user canceled") {
// keep partial body and continue (skip error) (add meta header in response for debugging)
if response.Header == nil {
response.Header = make(http.Header)
}
response.Header.Set("x-nuclei-ignore-error", err.Error())
return nil
}
return errors.Wrap(err, "could not read response body")
}
return nil
}
// wrapDecodeReader wraps a decompression reader around the response body if it's compressed
// using gzip or deflate.
func wrapDecodeReader(resp *http.Response) (rc io.ReadCloser, err error) {
switch resp.Header.Get("Content-Encoding") {
case "gzip":
rc, err = gzip.NewReader(resp.Body)
case "deflate":
rc, err = zlib.NewReader(resp.Body)
default:
rc = resp.Body
}
if err != nil {
return nil, err
}
// handle GBK encoding
if isContentTypeGbk(resp.Header.Get("Content-Type")) {
rc = io.NopCloser(transform.NewReader(rc, simplifiedchinese.GBK.NewDecoder()))
}
return rc, nil
}
// isContentTypeGbk checks if the content-type header is gbk
func isContentTypeGbk(contentType string) bool {
contentType = strings.ToLower(contentType)
return stringsutil.ContainsAny(contentType, "gbk", "gb2312", "gb18030")
}

View File

@ -1,52 +0,0 @@
package httputils
import (
"bytes"
"fmt"
"io"
"net/http"
protocolutil "github.com/projectdiscovery/nuclei/v3/pkg/protocols/utils"
)
// DumpResponseIntoBuffer dumps a http response without allocating a new buffer
// for the response body.
func DumpResponseIntoBuffer(resp *http.Response, body bool, buff *bytes.Buffer) (err error) {
if resp == nil {
return fmt.Errorf("response is nil")
}
save := resp.Body
savecl := resp.ContentLength
if !body {
// For content length of zero. Make sure the body is an empty
// reader, instead of returning error through failureToReadBody{}.
if resp.ContentLength == 0 {
resp.Body = emptyBody
} else {
resp.Body = failureToReadBody{}
}
} else if resp.Body == nil {
resp.Body = emptyBody
} else {
save, resp.Body, err = drainBody(resp.Body)
if err != nil {
return err
}
}
err = resp.Write(buff)
if err == errNoBody {
err = nil
}
resp.Body = save
resp.ContentLength = savecl
return
}
// DrainResponseBody drains the response body and closes it.
func DrainResponseBody(resp *http.Response) {
defer resp.Body.Close()
// don't reuse connection and just close if body length is more than 2 * MaxBodyRead
// to avoid DOS
_, _ = io.CopyN(io.Discard, resp.Body, 2*protocolutil.MaxBodyRead)
}

View File

@ -34,11 +34,11 @@ import (
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/http/httputils"
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/http/signer"
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/http/signerpool"
protocolutil "github.com/projectdiscovery/nuclei/v3/pkg/protocols/utils"
templateTypes "github.com/projectdiscovery/nuclei/v3/pkg/templates/types"
"github.com/projectdiscovery/nuclei/v3/pkg/types"
"github.com/projectdiscovery/rawhttp"
convUtil "github.com/projectdiscovery/utils/conversion"
httpUtils "github.com/projectdiscovery/utils/http"
"github.com/projectdiscovery/utils/reader"
sliceutil "github.com/projectdiscovery/utils/slice"
stringsutil "github.com/projectdiscovery/utils/strings"
@ -49,6 +49,10 @@ const (
defaultMaxWorkers = 150
)
var (
MaxBodyRead = int64(1 << 22) // 4MB using shift operator
)
// Type returns the type of the protocol request
func (request *Request) Type() templateTypes.ProtocolType {
return templateTypes.HTTPProtocol
@ -753,10 +757,7 @@ func (request *Request) executeRequest(input *contextargs.Context, generatedRequ
}
}
}
// global wrap response body reader
if resp != nil && resp.Body != nil {
resp.Body = protocolutil.NewLimitResponseBody(resp.Body)
}
if err != nil {
// rawhttp doesn't support draining response bodies.
if resp != nil && resp.Body != nil && generatedRequest.rawRequest == nil && !generatedRequest.original.Pipeline {
@ -802,17 +803,19 @@ func (request *Request) executeRequest(input *contextargs.Context, generatedRequ
request.options.Output.Request(request.options.TemplatePath, formedURL, request.Type().String(), err)
duration := time.Since(timeStart)
// define max body read limit
maxBodylimit := -1 // stick to default 4MB
maxBodylimit := MaxBodyRead // 10MB
if request.MaxSize > 0 {
maxBodylimit = request.MaxSize
} else if request.options.Options.ResponseReadSize != 0 {
maxBodylimit = request.options.Options.ResponseReadSize
maxBodylimit = int64(request.MaxSize)
}
if request.options.Options.ResponseReadSize != 0 {
maxBodylimit = int64(request.options.Options.ResponseReadSize)
}
// respChain is http response chain that reads response body
// efficiently by reusing buffers and does all decoding and optimizations
respChain := httputils.NewResponseChain(resp, int64(maxBodylimit))
respChain := httpUtils.NewResponseChain(resp, maxBodylimit)
defer respChain.Close() // reuse buffers
// we only intend to log/save the final redirected response

View File

@ -1,43 +0,0 @@
package utils
import (
"io"
)
var (
MaxBodyRead = int64(1 << 22) // 4MB using shift operator
)
var _ io.ReadCloser = &LimitResponseBody{}
type LimitResponseBody struct {
io.Reader
io.Closer
}
// NewLimitResponseBody wraps response body with a limit reader.
// thus only allowing MaxBodyRead bytes to be read. i.e 4MB
func NewLimitResponseBody(body io.ReadCloser) io.ReadCloser {
return NewLimitResponseBodyWithSize(body, MaxBodyRead)
}
// NewLimitResponseBody wraps response body with a limit reader.
// thus only allowing MaxBodyRead bytes to be read. i.e 4MB
func NewLimitResponseBodyWithSize(body io.ReadCloser, size int64) io.ReadCloser {
if body == nil {
return nil
}
if size == -1 {
// stick to default 4MB
size = MaxBodyRead
}
return &LimitResponseBody{
Reader: io.LimitReader(body, size),
Closer: body,
}
}
// LimitBodyRead limits the body read to MaxBodyRead bytes.
func LimitBodyRead(r io.Reader) ([]byte, error) {
return io.ReadAll(io.LimitReader(r, MaxBodyRead))
}