diff --git a/README.md b/README.md index b6d7b23..1f45a49 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,7 @@ likely undiscovered bugs. - Numbers immediately followed by non-numeric characters - Trailing characters on a line after valid or expected content + - Malformed file header lines (lines that start with `diff --git`) 2. Errors for invalid input are generally more verbose and specific than those from `git apply`. diff --git a/gitdiff/file_header.go b/gitdiff/file_header.go index fadfc4e..e3185bd 100644 --- a/gitdiff/file_header.go +++ b/gitdiff/file_header.go @@ -172,24 +172,88 @@ func (p *parser) ParseTraditionalFileHeader() (*File, error) { // If the names in the header do not match because the patch is a rename, // return an empty default name. func parseGitHeaderName(header string) (string, error) { - firstName, n, err := parseName(header, -1, 1) - if err != nil { - return "", err + header = strings.TrimSuffix(header, "\n") + if len(header) == 0 { + return "", nil } - if n < len(header) && (header[n] == ' ' || header[n] == '\t') { - n++ - } + var err error + var first, second string - secondName, _, err := parseName(header[n:], -1, 1) - if err != nil { - return "", err + // there are 4 cases to account for: + // + // 1) unquoted unquoted + // 2) unquoted "quoted" + // 3) "quoted" unquoted + // 4) "quoted" "quoted" + // + quote := strings.IndexByte(header, '"') + switch { + case quote < 0: + // case 1 + first = header + + case quote > 0: + // case 2 + first = header[:quote-1] + if !isSpace(header[quote-1]) { + return "", fmt.Errorf("missing separator") + } + + second, _, err = parseQuotedName(header[quote:]) + if err != nil { + return "", err + } + + case quote == 0: + // case 3 or case 4 + var n int + first, n, err = parseQuotedName(header) + if err != nil { + return "", err + } + + // git accepts multiple spaces after a quoted name, but not after an + // unquoted name, since the name might end with one or more spaces + for n < len(header) && isSpace(header[n]) { + n++ + } + if n == len(header) { + return "", nil + } + + if header[n] == '"' { + second, _, err = parseQuotedName(header[n:]) + if err != nil { + return "", err + } + } else { + second = header[n:] + } } - if firstName != secondName { + first = trimTreePrefix(first, 1) + if second != "" { + if first == trimTreePrefix(second, 1) { + return first, nil + } return "", nil } - return firstName, nil + + // at this point, both names are unquoted (case 1) + // since names may contain spaces, we can't use a known separator + // instead, look for a split that produces two equal names + + for i := 0; i < len(first)-1; i++ { + if !isSpace(first[i]) { + continue + } + second = trimTreePrefix(first[i+1:], 1) + if name := first[:i]; name == second { + return name, nil + } + } + return "", nil } // parseGitHeaderData parses a single line of metadata from a Git file header. @@ -283,25 +347,25 @@ func parseGitHeaderCreatedMode(f *File, line, defaultName string) error { func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) { f.IsCopy = true - f.OldName, _, err = parseName(line, -1, 0) + f.OldName, _, err = parseName(line, 0, 0) return } func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) { f.IsCopy = true - f.NewName, _, err = parseName(line, -1, 0) + f.NewName, _, err = parseName(line, 0, 0) return } func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) { f.IsRename = true - f.OldName, _, err = parseName(line, -1, 0) + f.OldName, _, err = parseName(line, 0, 0) return } func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) { f.IsRename = true - f.NewName, _, err = parseName(line, -1, 0) + f.NewName, _, err = parseName(line, 0, 0) return } @@ -349,14 +413,14 @@ func parseMode(s string) (os.FileMode, error) { // parseName extracts a file name from the start of a string and returns the // name and the index of the first character after the name. If the name is -// unquoted and term is non-negative, parsing stops at the first occurrence of -// term. Otherwise parsing of unquoted names stops at the first space or tab. +// unquoted and term is non-zero, parsing stops at the first occurrence of +// term. // // If the name is exactly "/dev/null", no further processing occurs. Otherwise, // if dropPrefix is greater than zero, that number of prefix components // separated by forward slashes are dropped from the name and any duplicate // slashes are collapsed. -func parseName(s string, term rune, dropPrefix int) (name string, n int, err error) { +func parseName(s string, term byte, dropPrefix int) (name string, n int, err error) { if len(s) > 0 && s[0] == '"' { name, n, err = parseQuotedName(s) } else { @@ -387,15 +451,12 @@ func parseQuotedName(s string) (name string, n int, err error) { return name, n, err } -func parseUnquotedName(s string, term rune) (name string, n int, err error) { +func parseUnquotedName(s string, term byte) (name string, n int, err error) { for n = 0; n < len(s); n++ { if s[n] == '\n' { break } - if term >= 0 && rune(s[n]) == term { - break - } - if term < 0 && (s[n] == ' ' || s[n] == '\t') { + if term > 0 && s[n] == term { break } } @@ -440,6 +501,17 @@ func cleanName(name string, drop int) string { return b.String() } +// trimTreePrefix removes up to n leading directory components from name. +func trimTreePrefix(name string, n int) string { + i := 0 + for ; i < len(name) && n > 0; i++ { + if name[i] == '/' { + n-- + } + } + return name[i:] +} + // hasEpochTimestamp returns true if the string ends with a POSIX-formatted // timestamp for the UNIX epoch after a tab character. According to git, this // is used by GNU diff to mark creations and deletions. @@ -468,3 +540,7 @@ func hasEpochTimestamp(s string) bool { } return true } + +func isSpace(c byte) bool { + return c == ' ' || c == '\t' || c == '\n' +} diff --git a/gitdiff/file_header_test.go b/gitdiff/file_header_test.go index 52adda5..99cfed3 100644 --- a/gitdiff/file_header_test.go +++ b/gitdiff/file_header_test.go @@ -310,7 +310,7 @@ func TestCleanName(t *testing.T) { func TestParseName(t *testing.T) { tests := map[string]struct { Input string - Term rune + Term byte Drop int Output string N int @@ -334,14 +334,17 @@ func TestParseName(t *testing.T) { "dropPrefix": { Input: "a/dir/file.txt", Drop: 1, Output: "dir/file.txt", N: 14, }, - "multipleNames": { - Input: "dir/a.txt dir/b.txt", Term: -1, Output: "dir/a.txt", N: 9, + "unquotedWithSpaces": { + Input: "dir/with spaces.txt", Output: "dir/with spaces.txt", N: 19, + }, + "unquotedWithTrailingSpaces": { + Input: "dir/with spaces.space ", Output: "dir/with spaces.space ", N: 23, }, "devNull": { Input: "/dev/null", Term: '\t', Drop: 1, Output: "/dev/null", N: 9, }, - "newlineAlwaysSeparates": { - Input: "dir/file.txt\n", Term: 0, Output: "dir/file.txt", N: 12, + "newlineSeparates": { + Input: "dir/file.txt\n", Output: "dir/file.txt", N: 12, }, "emptyString": { Input: "", Err: true, @@ -630,9 +633,33 @@ func TestParseGitHeaderName(t *testing.T) { Input: "a/dir/foo.txt b/dir/bar.txt", Output: "", }, - "missingSecondName": { - Input: "a/dir/foo.txt", - Err: true, + "matchingNamesWithSpaces": { + Input: "a/dir/file with spaces.txt b/dir/file with spaces.txt", + Output: "dir/file with spaces.txt", + }, + "matchingNamesWithTrailingSpaces": { + Input: "a/dir/spaces b/dir/spaces ", + Output: "dir/spaces ", + }, + "matchingNamesQuoted": { + Input: `"a/dir/\"quotes\".txt" "b/dir/\"quotes\".txt"`, + Output: `dir/"quotes".txt`, + }, + "matchingNamesFirstQuoted": { + Input: `"a/dir/file.txt" b/dir/file.txt`, + Output: "dir/file.txt", + }, + "matchingNamesSecondQuoted": { + Input: `a/dir/file.txt "b/dir/file.txt"`, + Output: "dir/file.txt", + }, + "noSecondName": { + Input: "a/dir/foo.txt", + Output: "", + }, + "noSecondNameQuoted": { + Input: `"a/dir/foo.txt"`, + Output: "", }, "invalidName": { Input: `"a/dir/file.txt b/dir/file.txt`,