diff --git a/frontend/dockerfile/parser/dumper/main.go b/frontend/dockerfile/parser/dumper/main.go index 8c357f4c..6561708c 100644 --- a/frontend/dockerfile/parser/dumper/main.go +++ b/frontend/dockerfile/parser/dumper/main.go @@ -22,7 +22,10 @@ func main() { panic(err) } - ast, err := parser.Parse(f) + d := parser.Directive{LookingForDirectives: true} + parser.SetEscapeToken(parser.DefaultEscapeToken, &d) + + ast, err := parser.Parse(f, &d) if err != nil { panic(err) } else { diff --git a/frontend/dockerfile/parser/json_test.go b/frontend/dockerfile/parser/json_test.go index a256f845..60d74d9c 100644 --- a/frontend/dockerfile/parser/json_test.go +++ b/frontend/dockerfile/parser/json_test.go @@ -28,7 +28,10 @@ var validJSONArraysOfStrings = map[string][]string{ func TestJSONArraysOfStrings(t *testing.T) { for json, expected := range validJSONArraysOfStrings { - if node, _, err := parseJSON(json); err != nil { + d := Directive{} + SetEscapeToken(DefaultEscapeToken, &d) + + if node, _, err := parseJSON(json, &d); err != nil { t.Fatalf("%q should be a valid JSON array of strings, but wasn't! (err: %q)", json, err) } else { i := 0 @@ -48,7 +51,10 @@ func TestJSONArraysOfStrings(t *testing.T) { } } for _, json := range invalidJSONArraysOfStrings { - if _, _, err := parseJSON(json); err != errDockerfileNotStringArray { + d := Directive{} + SetEscapeToken(DefaultEscapeToken, &d) + + if _, _, err := parseJSON(json, &d); err != errDockerfileNotStringArray { t.Fatalf("%q should be an invalid JSON array of strings, but wasn't!", json) } } diff --git a/frontend/dockerfile/parser/line_parsers.go b/frontend/dockerfile/parser/line_parsers.go index 5f484e49..dec7a757 100644 --- a/frontend/dockerfile/parser/line_parsers.go +++ b/frontend/dockerfile/parser/line_parsers.go @@ -21,7 +21,7 @@ var ( // ignore the current argument. This will still leave a command parsed, but // will not incorporate the arguments into the ast. -func parseIgnore(rest string) (*Node, map[string]bool, error) { +func parseIgnore(rest string, d *Directive) (*Node, map[string]bool, error) { return &Node{}, nil, nil } @@ -30,12 +30,12 @@ func parseIgnore(rest string) (*Node, map[string]bool, error) { // // ONBUILD RUN foo bar -> (onbuild (run foo bar)) // -func parseSubCommand(rest string) (*Node, map[string]bool, error) { +func parseSubCommand(rest string, d *Directive) (*Node, map[string]bool, error) { if rest == "" { return nil, nil, nil } - _, child, err := ParseLine(rest) + _, child, err := ParseLine(rest, d) if err != nil { return nil, nil, err } @@ -46,7 +46,7 @@ func parseSubCommand(rest string) (*Node, map[string]bool, error) { // helper to parse words (i.e space delimited or quoted strings) in a statement. // The quotes are preserved as part of this function and they are stripped later // as part of processWords(). -func parseWords(rest string) []string { +func parseWords(rest string, d *Directive) []string { const ( inSpaces = iota // looking for start of a word inWord @@ -96,7 +96,7 @@ func parseWords(rest string) []string { blankOK = true phase = inQuote } - if ch == tokenEscape { + if ch == d.EscapeToken { if pos+chWidth == len(rest) { continue // just skip an escape token at end of line } @@ -115,7 +115,7 @@ func parseWords(rest string) []string { phase = inWord } // The escape token is special except for ' quotes - can't escape anything for ' - if ch == tokenEscape && quote != '\'' { + if ch == d.EscapeToken && quote != '\'' { if pos+chWidth == len(rest) { phase = inWord continue // just skip the escape token at end @@ -133,14 +133,14 @@ func parseWords(rest string) []string { // parse environment like statements. Note that this does *not* handle // variable interpolation, which will be handled in the evaluator. -func parseNameVal(rest string, key string) (*Node, map[string]bool, error) { +func parseNameVal(rest string, key string, d *Directive) (*Node, map[string]bool, error) { // This is kind of tricky because we need to support the old // variant: KEY name value // as well as the new one: KEY name=value ... // The trigger to know which one is being used will be whether we hit // a space or = first. space ==> old, "=" ==> new - words := parseWords(rest) + words := parseWords(rest, d) if len(words) == 0 { return nil, nil, nil } @@ -187,12 +187,12 @@ func parseNameVal(rest string, key string) (*Node, map[string]bool, error) { return rootnode, nil, nil } -func parseEnv(rest string) (*Node, map[string]bool, error) { - return parseNameVal(rest, "ENV") +func parseEnv(rest string, d *Directive) (*Node, map[string]bool, error) { + return parseNameVal(rest, "ENV", d) } -func parseLabel(rest string) (*Node, map[string]bool, error) { - return parseNameVal(rest, "LABEL") +func parseLabel(rest string, d *Directive) (*Node, map[string]bool, error) { + return parseNameVal(rest, "LABEL", d) } // parses a statement containing one or more keyword definition(s) and/or @@ -203,8 +203,8 @@ func parseLabel(rest string) (*Node, map[string]bool, error) { // In addition, a keyword definition alone is of the form `keyword` like `name1` // above. And the assignments `name2=` and `name3=""` are equivalent and // assign an empty value to the respective keywords. -func parseNameOrNameVal(rest string) (*Node, map[string]bool, error) { - words := parseWords(rest) +func parseNameOrNameVal(rest string, d *Directive) (*Node, map[string]bool, error) { + words := parseWords(rest, d) if len(words) == 0 { return nil, nil, nil } @@ -229,7 +229,7 @@ func parseNameOrNameVal(rest string) (*Node, map[string]bool, error) { // parses a whitespace-delimited set of arguments. The result is effectively a // linked list of string arguments. -func parseStringsWhitespaceDelimited(rest string) (*Node, map[string]bool, error) { +func parseStringsWhitespaceDelimited(rest string, d *Directive) (*Node, map[string]bool, error) { if rest == "" { return nil, nil, nil } @@ -253,7 +253,7 @@ func parseStringsWhitespaceDelimited(rest string) (*Node, map[string]bool, error } // parsestring just wraps the string in quotes and returns a working node. -func parseString(rest string) (*Node, map[string]bool, error) { +func parseString(rest string, d *Directive) (*Node, map[string]bool, error) { if rest == "" { return nil, nil, nil } @@ -263,7 +263,7 @@ func parseString(rest string) (*Node, map[string]bool, error) { } // parseJSON converts JSON arrays to an AST. -func parseJSON(rest string) (*Node, map[string]bool, error) { +func parseJSON(rest string, d *Directive) (*Node, map[string]bool, error) { rest = strings.TrimLeftFunc(rest, unicode.IsSpace) if !strings.HasPrefix(rest, "[") { return nil, nil, fmt.Errorf(`Error parsing "%s" as a JSON array`, rest) @@ -296,12 +296,12 @@ func parseJSON(rest string) (*Node, map[string]bool, error) { // parseMaybeJSON determines if the argument appears to be a JSON array. If // so, passes to parseJSON; if not, quotes the result and returns a single // node. -func parseMaybeJSON(rest string) (*Node, map[string]bool, error) { +func parseMaybeJSON(rest string, d *Directive) (*Node, map[string]bool, error) { if rest == "" { return nil, nil, nil } - node, attrs, err := parseJSON(rest) + node, attrs, err := parseJSON(rest, d) if err == nil { return node, attrs, nil @@ -318,8 +318,8 @@ func parseMaybeJSON(rest string) (*Node, map[string]bool, error) { // parseMaybeJSONToList determines if the argument appears to be a JSON array. If // so, passes to parseJSON; if not, attempts to parse it as a whitespace // delimited string. -func parseMaybeJSONToList(rest string) (*Node, map[string]bool, error) { - node, attrs, err := parseJSON(rest) +func parseMaybeJSONToList(rest string, d *Directive) (*Node, map[string]bool, error) { + node, attrs, err := parseJSON(rest, d) if err == nil { return node, attrs, nil @@ -328,11 +328,11 @@ func parseMaybeJSONToList(rest string) (*Node, map[string]bool, error) { return nil, nil, err } - return parseStringsWhitespaceDelimited(rest) + return parseStringsWhitespaceDelimited(rest, d) } // The HEALTHCHECK command is like parseMaybeJSON, but has an extra type argument. -func parseHealthConfig(rest string) (*Node, map[string]bool, error) { +func parseHealthConfig(rest string, d *Directive) (*Node, map[string]bool, error) { // Find end of first argument var sep int for ; sep < len(rest); sep++ { @@ -352,7 +352,7 @@ func parseHealthConfig(rest string) (*Node, map[string]bool, error) { } typ := rest[:sep] - cmd, attrs, err := parseMaybeJSON(rest[next:]) + cmd, attrs, err := parseMaybeJSON(rest[next:], d) if err != nil { return nil, nil, err } diff --git a/frontend/dockerfile/parser/parser.go b/frontend/dockerfile/parser/parser.go index f34a8f85..6fb84d92 100644 --- a/frontend/dockerfile/parser/parser.go +++ b/frontend/dockerfile/parser/parser.go @@ -36,26 +36,32 @@ type Node struct { EndLine int // the line in the original dockerfile where the node ends } -const defaultTokenEscape = "\\" +// Directive is the structure used during a build run to hold the state of +// parsing directives. +type Directive struct { + EscapeToken rune // Current escape token + LineContinuationRegex *regexp.Regexp // Current line contination regex + LookingForDirectives bool // Whether we are currently looking for directives + EscapeSeen bool // Whether the escape directive has been seen +} var ( - dispatch map[string]func(string) (*Node, map[string]bool, error) - tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`) - tokenLineContinuation *regexp.Regexp - tokenEscape = rune(defaultTokenEscape[0]) - tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P.).*$`) - tokenComment = regexp.MustCompile(`^#.*$`) - lookingForDirectives bool - directiveEscapeSeen bool + dispatch map[string]func(string, *Directive) (*Node, map[string]bool, error) + tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`) + tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P.).*$`) + tokenComment = regexp.MustCompile(`^#.*$`) ) -// setTokenEscape sets the default token for escaping characters in a Dockerfile. -func setTokenEscape(s string) error { +// DefaultEscapeToken is the default escape token +const DefaultEscapeToken = "\\" + +// SetEscapeToken sets the default token for escaping characters in a Dockerfile. +func SetEscapeToken(s string, d *Directive) error { if s != "`" && s != "\\" { return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s) } - tokenEscape = rune(s[0]) - tokenLineContinuation = regexp.MustCompile(`\` + s + `[ \t]*$`) + d.EscapeToken = rune(s[0]) + d.LineContinuationRegex = regexp.MustCompile(`\` + s + `[ \t]*$`) return nil } @@ -66,7 +72,7 @@ func init() { // reformulating the arguments according to the rules in the parser // functions. Errors are propagated up by Parse() and the resulting AST can // be incorporated directly into the existing AST as a next. - dispatch = map[string]func(string) (*Node, map[string]bool, error){ + dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){ command.Add: parseMaybeJSONToList, command.Arg: parseNameOrNameVal, command.Cmd: parseMaybeJSON, @@ -89,36 +95,35 @@ func init() { } // ParseLine parse a line and return the remainder. -func ParseLine(line string) (string, *Node, error) { - +func ParseLine(line string, d *Directive) (string, *Node, error) { // Handle the parser directive '# escape=. Parser directives must precede // any builder instruction or other comments, and cannot be repeated. - if lookingForDirectives { + if d.LookingForDirectives { tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line)) if len(tecMatch) > 0 { - if directiveEscapeSeen == true { + if d.EscapeSeen == true { return "", nil, fmt.Errorf("only one escape parser directive can be used") } for i, n := range tokenEscapeCommand.SubexpNames() { if n == "escapechar" { - if err := setTokenEscape(tecMatch[i]); err != nil { + if err := SetEscapeToken(tecMatch[i], d); err != nil { return "", nil, err } - directiveEscapeSeen = true + d.EscapeSeen = true return "", nil, nil } } } } - lookingForDirectives = false + d.LookingForDirectives = false if line = stripComments(line); line == "" { return "", nil, nil } - if tokenLineContinuation.MatchString(line) { - line = tokenLineContinuation.ReplaceAllString(line, "") + if d.LineContinuationRegex.MatchString(line) { + line = d.LineContinuationRegex.ReplaceAllString(line, "") return line, nil, nil } @@ -130,7 +135,7 @@ func ParseLine(line string) (string, *Node, error) { node := &Node{} node.Value = cmd - sexp, attrs, err := fullDispatch(cmd, args) + sexp, attrs, err := fullDispatch(cmd, args, d) if err != nil { return "", nil, err } @@ -145,10 +150,7 @@ func ParseLine(line string) (string, *Node, error) { // Parse is the main parse routine. // It handles an io.ReadWriteCloser and returns the root of the AST. -func Parse(rwc io.Reader) (*Node, error) { - directiveEscapeSeen = false - lookingForDirectives = true - setTokenEscape(defaultTokenEscape) // Assume the default token for escape +func Parse(rwc io.Reader, d *Directive) (*Node, error) { currentLine := 0 root := &Node{} root.StartLine = -1 @@ -163,7 +165,7 @@ func Parse(rwc io.Reader) (*Node, error) { } scannedLine := strings.TrimLeftFunc(string(scannedBytes), unicode.IsSpace) currentLine++ - line, child, err := ParseLine(scannedLine) + line, child, err := ParseLine(scannedLine, d) if err != nil { return nil, err } @@ -178,7 +180,7 @@ func Parse(rwc io.Reader) (*Node, error) { continue } - line, child, err = ParseLine(line + newline) + line, child, err = ParseLine(line+newline, d) if err != nil { return nil, err } @@ -188,7 +190,7 @@ func Parse(rwc io.Reader) (*Node, error) { } } if child == nil && line != "" { - _, child, err = ParseLine(line) + _, child, err = ParseLine(line, d) if err != nil { return nil, err } diff --git a/frontend/dockerfile/parser/parser_test.go b/frontend/dockerfile/parser/parser_test.go index 3c65a4a0..e7b0c07c 100644 --- a/frontend/dockerfile/parser/parser_test.go +++ b/frontend/dockerfile/parser/parser_test.go @@ -39,7 +39,9 @@ func TestTestNegative(t *testing.T) { t.Fatalf("Dockerfile missing for %s: %v", dir, err) } - _, err = Parse(df) + d := Directive{LookingForDirectives: true} + SetEscapeToken(DefaultEscapeToken, &d) + _, err = Parse(df, &d) if err == nil { t.Fatalf("No error parsing broken dockerfile for %s", dir) } @@ -59,7 +61,9 @@ func TestTestData(t *testing.T) { } defer df.Close() - ast, err := Parse(df) + d := Directive{LookingForDirectives: true} + SetEscapeToken(DefaultEscapeToken, &d) + ast, err := Parse(df, &d) if err != nil { t.Fatalf("Error parsing %s's dockerfile: %v", dir, err) } @@ -119,7 +123,9 @@ func TestParseWords(t *testing.T) { } for _, test := range tests { - words := parseWords(test["input"][0]) + d := Directive{LookingForDirectives: true} + SetEscapeToken(DefaultEscapeToken, &d) + words := parseWords(test["input"][0], &d) if len(words) != len(test["expect"]) { t.Fatalf("length check failed. input: %v, expect: %q, output: %q", test["input"][0], test["expect"], words) } @@ -138,7 +144,9 @@ func TestLineInformation(t *testing.T) { } defer df.Close() - ast, err := Parse(df) + d := Directive{LookingForDirectives: true} + SetEscapeToken(DefaultEscapeToken, &d) + ast, err := Parse(df, &d) if err != nil { t.Fatalf("Error parsing dockerfile %s: %v", testFileLineInfo, err) } diff --git a/frontend/dockerfile/parser/utils.go b/frontend/dockerfile/parser/utils.go index b21eb62a..cd7af75e 100644 --- a/frontend/dockerfile/parser/utils.go +++ b/frontend/dockerfile/parser/utils.go @@ -36,7 +36,7 @@ func (node *Node) Dump() string { // performs the dispatch based on the two primal strings, cmd and args. Please // look at the dispatch table in parser.go to see how these dispatchers work. -func fullDispatch(cmd, args string) (*Node, map[string]bool, error) { +func fullDispatch(cmd, args string, d *Directive) (*Node, map[string]bool, error) { fn := dispatch[cmd] // Ignore invalid Dockerfile instructions @@ -44,7 +44,7 @@ func fullDispatch(cmd, args string) (*Node, map[string]bool, error) { fn = parseIgnore } - sexp, attrs, err := fn(args) + sexp, attrs, err := fn(args, d) if err != nil { return nil, nil, err }