From debe110ec2217d6af2b6d820431da5d252e38422 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sat, 27 Jul 2024 11:03:43 -0700 Subject: [PATCH 01/11] Add String() methods to parsed types This enables clients to move back and forth between parsed objects and text patches. The generated patches are semantically equal to the parsed object and should parse to the same object, but may not be byte-for-byte identical to the original input. --- gitdiff/gitdiff.go | 135 +++++++++++++++++++++++++++++++++++++++++- gitdiff/quote.go | 52 ++++++++++++++++ gitdiff/quote_test.go | 28 +++++++++ 3 files changed, 214 insertions(+), 1 deletion(-) create mode 100644 gitdiff/quote.go create mode 100644 gitdiff/quote_test.go diff --git a/gitdiff/gitdiff.go b/gitdiff/gitdiff.go index 18645bd..0a21959 100644 --- a/gitdiff/gitdiff.go +++ b/gitdiff/gitdiff.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "os" + "strings" ) // File describes changes to a single file. It can be either a text file or a @@ -38,6 +39,119 @@ type File struct { ReverseBinaryFragment *BinaryFragment } +// String returns a git diff representation of this file. The value can be +// parsed by this library to obtain the same File, but may not be the same as +// the original input or the same as what Git would produces +func (f *File) String() string { + var diff strings.Builder + + diff.WriteString("diff --git ") + + var aName, bName string + switch { + case f.OldName == "": + aName = f.NewName + bName = f.NewName + + case f.NewName == "": + aName = f.OldName + bName = f.OldName + + default: + aName = f.OldName + bName = f.NewName + } + + writeQuotedName(&diff, "a/"+aName) + diff.WriteByte(' ') + writeQuotedName(&diff, "b/"+bName) + diff.WriteByte('\n') + + diff.WriteString("--- ") + if f.OldName == "" { + diff.WriteString("/dev/null") + } else { + writeQuotedName(&diff, f.OldName) + } + diff.WriteByte('\n') + + diff.WriteString("+++ ") + if f.NewName == "" { + diff.WriteString("/dev/null") + } else { + writeQuotedName(&diff, f.NewName) + } + diff.WriteByte('\n') + + if f.OldMode != 0 { + if f.IsDelete { + fmt.Fprintf(&diff, "deleted file mode %o\n", f.OldMode) + } else { + fmt.Fprintf(&diff, "old mode %o\n", f.OldMode) + } + } + + if f.NewMode != 0 { + if f.IsNew { + fmt.Fprintf(&diff, "new file mode %o\n", f.NewMode) + } else { + fmt.Fprintf(&diff, "new mode %o\n", f.NewMode) + } + } + + if f.Score > 0 { + if f.IsCopy || f.IsRename { + fmt.Fprintf(&diff, "similarity index %d%%\n", f.Score) + } else { + fmt.Fprintf(&diff, "dissimilarity index %d%%\n", f.Score) + } + } + + if f.IsCopy { + if f.OldName != "" { + diff.WriteString("copy from ") + writeQuotedName(&diff, f.OldName) + diff.WriteByte('\n') + } + if f.NewName != "" { + diff.WriteString("copy to ") + writeQuotedName(&diff, f.NewName) + diff.WriteByte('\n') + } + } + + if f.IsRename { + if f.OldName != "" { + diff.WriteString("rename from ") + writeQuotedName(&diff, f.OldName) + diff.WriteByte('\n') + } + if f.NewName != "" { + diff.WriteString("rename to ") + writeQuotedName(&diff, f.NewName) + diff.WriteByte('\n') + } + } + + if f.OldOIDPrefix != "" && f.NewOIDPrefix != "" { + fmt.Fprintf(&diff, "index %s..%s", f.OldOIDPrefix, f.NewOIDPrefix) + if f.OldMode != 0 { + fmt.Fprintf(&diff, " %o", f.OldMode) + } + diff.WriteByte('\n') + } + + if f.IsBinary { + // TODO(bkeyes): add string method for BinaryFragments + } else { + for _, frag := range f.TextFragments { + diff.WriteString(frag.String()) + } + } + + return diff.String() +} + // TextFragment describes changed lines starting at a specific line in a text file. type TextFragment struct { Comment string @@ -57,7 +171,26 @@ type TextFragment struct { Lines []Line } -// Header returns the canonical header of this fragment. +// String returns a git diff format of this fragment. See [File.String] for +// more details on this format. +func (f *TextFragment) String() string { + var diff strings.Builder + + diff.WriteString(f.Header()) + diff.WriteString("\n") + + for _, line := range f.Lines { + diff.WriteString(line.String()) + if line.NoEOL() { + diff.WriteString("\n\\ No newline at end of file\n") + } + } + + return diff.String() +} + +// Header returns a git diff header of this fragment. See [File.String] for +// more details on this format. func (f *TextFragment) Header() string { return fmt.Sprintf("@@ -%d,%d +%d,%d @@ %s", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines, f.Comment) } diff --git a/gitdiff/quote.go b/gitdiff/quote.go new file mode 100644 index 0000000..a9b2a28 --- /dev/null +++ b/gitdiff/quote.go @@ -0,0 +1,52 @@ +package gitdiff + +import ( + "strings" +) + +// writeQuotedName writes s to b, quoting it using C-style octal escapes if necessary. +func writeQuotedName(b *strings.Builder, s string) { + qpos := 0 + for i := 0; i < len(s); i++ { + ch := s[i] + if q, quoted := quoteByte(ch); quoted { + if qpos == 0 { + b.WriteByte('"') + } + b.WriteString(s[qpos:i]) + b.Write(q) + qpos = i + 1 + } + } + b.WriteString(s[qpos:]) + if qpos > 0 { + b.WriteByte('"') + } +} + +var quoteEscapeTable = map[byte]byte{ + '\a': 'a', + '\b': 'b', + '\t': 't', + '\n': 'n', + '\v': 'v', + '\f': 'f', + '\r': 'r', + '"': '"', + '\\': '\\', +} + +func quoteByte(b byte) ([]byte, bool) { + if q, ok := quoteEscapeTable[b]; ok { + return []byte{'\\', q}, true + } + if b < 0x20 || b >= 0x7F { + return []byte{ + '\\', + '0' + (b>>6)&0o3, + '0' + (b>>3)&0o7, + '0' + (b>>0)&0o7, + }, true + } + return nil, false +} diff --git a/gitdiff/quote_test.go b/gitdiff/quote_test.go new file mode 100644 index 0000000..9159060 --- /dev/null +++ b/gitdiff/quote_test.go @@ -0,0 +1,28 @@ +package gitdiff + +import ( + "strings" + "testing" +) + +func TestWriteQuotedName(t *testing.T) { + tests := []struct { + Input string + Expected string + }{ + {"noquotes.txt", `noquotes.txt`}, + {"no quotes.txt", `no quotes.txt`}, + {"new\nline", `"new\nline"`}, + {"escape\x1B null\x00", `"escape\033 null\000"`}, + {"snowman \u2603 snowman", `"snowman \342\230\203 snowman"`}, + {"\"already quoted\"", `"\"already quoted\""`}, + } + + for _, test := range tests { + var b strings.Builder + writeQuotedName(&b, test.Input) + if b.String() != test.Expected { + t.Errorf("expected %q, got %q", test.Expected, b.String()) + } + } +} From fe252ede763b1fd64909b959b9dab791484d9d14 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Fri, 9 Aug 2024 20:59:27 -0700 Subject: [PATCH 02/11] Fix inconsistencies in patch format Add "a/" and "b/" prefixes to names on "---" and "+++" lines. Reorder lines to match Git. Only print mode lines when the mode is changing. --- gitdiff/gitdiff.go | 51 ++++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/gitdiff/gitdiff.go b/gitdiff/gitdiff.go index 0a21959..c0762a0 100644 --- a/gitdiff/gitdiff.go +++ b/gitdiff/gitdiff.go @@ -67,26 +67,10 @@ func (f *File) String() string { writeQuotedName(&diff, "b/"+bName) diff.WriteByte('\n') - diff.WriteString("--- ") - if f.OldName == "" { - diff.WriteString("/dev/null") - } else { - writeQuotedName(&diff, f.OldName) - } - diff.WriteByte('\n') - - diff.WriteString("+++ ") - if f.NewName == "" { - diff.WriteString("/dev/null") - } else { - writeQuotedName(&diff, f.NewName) - } - diff.WriteByte('\n') - if f.OldMode != 0 { if f.IsDelete { fmt.Fprintf(&diff, "deleted file mode %o\n", f.OldMode) - } else { + } else if f.NewMode != 0 { fmt.Fprintf(&diff, "old mode %o\n", f.OldMode) } } @@ -94,7 +78,7 @@ func (f *File) String() string { if f.NewMode != 0 { if f.IsNew { fmt.Fprintf(&diff, "new file mode %o\n", f.NewMode) - } else { + } else if f.OldMode != 0 { fmt.Fprintf(&diff, "new mode %o\n", f.NewMode) } } @@ -135,12 +119,31 @@ func (f *File) String() string { if f.OldOIDPrefix != "" && f.NewOIDPrefix != "" { fmt.Fprintf(&diff, "index %s..%s", f.OldOIDPrefix, f.NewOIDPrefix) - if f.OldMode != 0 { + if f.OldMode != 0 && !f.IsDelete { fmt.Fprintf(&diff, " %o", f.OldMode) } diff.WriteByte('\n') } + // The "---" and "+++" lines only appear for patches with fragments + if len(f.TextFragments) > 0 || f.BinaryFragment != nil { + diff.WriteString("--- ") + if f.OldName == "" { + diff.WriteString("/dev/null") + } else { + writeQuotedName(&diff, "a/"+f.OldName) + } + diff.WriteByte('\n') + + diff.WriteString("+++ ") + if f.NewName == "" { + diff.WriteString("/dev/null") + } else { + writeQuotedName(&diff, "b/"+f.NewName) + } + diff.WriteByte('\n') + } + if f.IsBinary { // TODO(bkeyes): add string method for BinaryFragments } else { @@ -192,7 +195,15 @@ func (f *TextFragment) String() string { // Header returns a git diff header of this fragment. See [File.String] for // more details on this format. func (f *TextFragment) Header() string { - return fmt.Sprintf("@@ -%d,%d +%d,%d @@ %s", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines, f.Comment) + var hdr strings.Builder + + fmt.Fprintf(&hdr, "@@ -%d,%d +%d,%d @@", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines) + if f.Comment != "" { + hdr.WriteByte(' ') + hdr.WriteString(f.Comment) + } + + return hdr.String() } // Validate checks that the fragment is self-consistent and appliable. Validate From 9d11cc282ac3f3c960c934208535d64d0c8aabfa Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Fri, 9 Aug 2024 21:00:28 -0700 Subject: [PATCH 03/11] Add basic tests for patch formatting For now, assert that we produce exactly the same output as the original parsed input. I think these tests would ideally assert that the re-parsed value is the same, but it's kind of annoying to compare File objects without adding an external dependency. --- gitdiff/gitdiff_string_test.go | 45 +++++++++++++++++++++ gitdiff/testdata/string/delete.patch | 16 ++++++++ gitdiff/testdata/string/mode.patch | 3 ++ gitdiff/testdata/string/modify.patch | 16 ++++++++ gitdiff/testdata/string/new.patch | 16 ++++++++ gitdiff/testdata/string/new_empty.patch | 3 ++ gitdiff/testdata/string/new_mode.patch | 16 ++++++++ gitdiff/testdata/string/rename.patch | 4 ++ gitdiff/testdata/string/rename_modify.patch | 18 +++++++++ 9 files changed, 137 insertions(+) create mode 100644 gitdiff/gitdiff_string_test.go create mode 100644 gitdiff/testdata/string/delete.patch create mode 100644 gitdiff/testdata/string/mode.patch create mode 100644 gitdiff/testdata/string/modify.patch create mode 100644 gitdiff/testdata/string/new.patch create mode 100644 gitdiff/testdata/string/new_empty.patch create mode 100644 gitdiff/testdata/string/new_mode.patch create mode 100644 gitdiff/testdata/string/rename.patch create mode 100644 gitdiff/testdata/string/rename_modify.patch diff --git a/gitdiff/gitdiff_string_test.go b/gitdiff/gitdiff_string_test.go new file mode 100644 index 0000000..96d8aa7 --- /dev/null +++ b/gitdiff/gitdiff_string_test.go @@ -0,0 +1,45 @@ +package gitdiff + +import ( + "bytes" + "os" + "testing" +) + +func TestFile_String(t *testing.T) { + sources := []string{ + "testdata/string/delete.patch", + "testdata/string/mode.patch", + "testdata/string/modify.patch", + "testdata/string/new.patch", + "testdata/string/new_empty.patch", + "testdata/string/new_mode.patch", + "testdata/string/rename.patch", + "testdata/string/rename_modify.patch", + } + + for _, src := range sources { + b, err := os.ReadFile(src) + if err != nil { + t.Fatalf("failed to read %s: %v", src, err) + } + + original := assertParseSingleFile(t, src, b) + str := original.String() + + if string(b) != str { + t.Errorf("%s: incorrect patch\nexpected: %q\n actual: %q\n", src, string(b), str) + } + } +} + +func assertParseSingleFile(t *testing.T, src string, b []byte) *File { + files, _, err := Parse(bytes.NewReader(b)) + if err != nil { + t.Fatalf("failed to parse patch %s: %v", src, err) + } + if len(files) != 1 { + t.Fatalf("expected %s to contain a single files, but found %d", src, len(files)) + } + return files[0] +} diff --git a/gitdiff/testdata/string/delete.patch b/gitdiff/testdata/string/delete.patch new file mode 100644 index 0000000..f32dc25 --- /dev/null +++ b/gitdiff/testdata/string/delete.patch @@ -0,0 +1,16 @@ +diff --git a/file.txt b/file.txt +deleted file mode 100644 +index c9e9e05..0000000 +--- a/file.txt ++++ /dev/null +@@ -1,10 +0,0 @@ +-one +-two +-three +-four +-five +-six +-seven +-eight +-nine +-ten diff --git a/gitdiff/testdata/string/mode.patch b/gitdiff/testdata/string/mode.patch new file mode 100644 index 0000000..953ab25 --- /dev/null +++ b/gitdiff/testdata/string/mode.patch @@ -0,0 +1,3 @@ +diff --git a/file.txt b/file.txt +old mode 100644 +new mode 100755 diff --git a/gitdiff/testdata/string/modify.patch b/gitdiff/testdata/string/modify.patch new file mode 100644 index 0000000..9d89753 --- /dev/null +++ b/gitdiff/testdata/string/modify.patch @@ -0,0 +1,16 @@ +diff --git a/file.txt b/file.txt +index c9e9e05..7d5fdc6 100644 +--- a/file.txt ++++ b/file.txt +@@ -3,8 +3,10 @@ two + three + four + five +-six ++six six six six six six + seven + eight + nine + ten ++eleven ++twelve diff --git a/gitdiff/testdata/string/new.patch b/gitdiff/testdata/string/new.patch new file mode 100644 index 0000000..941fe25 --- /dev/null +++ b/gitdiff/testdata/string/new.patch @@ -0,0 +1,16 @@ +diff --git a/file.txt b/file.txt +new file mode 100644 +index 0000000..c9e9e05 +--- /dev/null ++++ b/file.txt +@@ -0,0 +1,10 @@ ++one ++two ++three ++four ++five ++six ++seven ++eight ++nine ++ten diff --git a/gitdiff/testdata/string/new_empty.patch b/gitdiff/testdata/string/new_empty.patch new file mode 100644 index 0000000..5cc7cf7 --- /dev/null +++ b/gitdiff/testdata/string/new_empty.patch @@ -0,0 +1,3 @@ +diff --git a/file.txt b/file.txt +new file mode 100644 +index 0000000..e69de29 diff --git a/gitdiff/testdata/string/new_mode.patch b/gitdiff/testdata/string/new_mode.patch new file mode 100644 index 0000000..f9d7f1f --- /dev/null +++ b/gitdiff/testdata/string/new_mode.patch @@ -0,0 +1,16 @@ +diff --git a/file.sh b/file.sh +new file mode 100755 +index 0000000..c9e9e05 +--- /dev/null ++++ b/file.sh +@@ -0,0 +1,10 @@ ++one ++two ++three ++four ++five ++six ++seven ++eight ++nine ++ten diff --git a/gitdiff/testdata/string/rename.patch b/gitdiff/testdata/string/rename.patch new file mode 100644 index 0000000..3c0ca6f --- /dev/null +++ b/gitdiff/testdata/string/rename.patch @@ -0,0 +1,4 @@ +diff --git a/file.txt b/numbers.txt +similarity index 100% +rename from file.txt +rename to numbers.txt diff --git a/gitdiff/testdata/string/rename_modify.patch b/gitdiff/testdata/string/rename_modify.patch new file mode 100644 index 0000000..52a32af --- /dev/null +++ b/gitdiff/testdata/string/rename_modify.patch @@ -0,0 +1,18 @@ +diff --git a/file.txt b/numbers.txt +similarity index 77% +rename from file.txt +rename to numbers.txt +index c9e9e05..a6b31d6 100644 +--- a/file.txt ++++ b/numbers.txt +@@ -3,8 +3,9 @@ two + three + four + five +-six ++ six + seven + eight + nine + ten ++eleven From ac2e2c5504bc8b050c4f7db6122af3fe3fcbc362 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Fri, 9 Aug 2024 23:00:36 -0700 Subject: [PATCH 04/11] Fix mode on index lines, add more test cases --- gitdiff/gitdiff.go | 5 ++++- gitdiff/gitdiff_string_test.go | 3 +++ gitdiff/testdata/string/copy.patch | 4 ++++ gitdiff/testdata/string/copy_modify.patch | 21 +++++++++++++++++++++ gitdiff/testdata/string/mode_modify.patch | 10 ++++++++++ 5 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 gitdiff/testdata/string/copy.patch create mode 100644 gitdiff/testdata/string/copy_modify.patch create mode 100644 gitdiff/testdata/string/mode_modify.patch diff --git a/gitdiff/gitdiff.go b/gitdiff/gitdiff.go index c0762a0..1257559 100644 --- a/gitdiff/gitdiff.go +++ b/gitdiff/gitdiff.go @@ -119,9 +119,12 @@ func (f *File) String() string { if f.OldOIDPrefix != "" && f.NewOIDPrefix != "" { fmt.Fprintf(&diff, "index %s..%s", f.OldOIDPrefix, f.NewOIDPrefix) - if f.OldMode != 0 && !f.IsDelete { + + // Mode is only included on the index line when it is not changing + if f.OldMode != 0 && ((f.NewMode == 0 && !f.IsDelete) || f.OldMode == f.NewMode) { fmt.Fprintf(&diff, " %o", f.OldMode) } + diff.WriteByte('\n') } diff --git a/gitdiff/gitdiff_string_test.go b/gitdiff/gitdiff_string_test.go index 96d8aa7..ed7d143 100644 --- a/gitdiff/gitdiff_string_test.go +++ b/gitdiff/gitdiff_string_test.go @@ -8,14 +8,17 @@ import ( func TestFile_String(t *testing.T) { sources := []string{ + "testdata/string/copy.patch", "testdata/string/delete.patch", "testdata/string/mode.patch", + "testdata/string/mode_modify.patch", "testdata/string/modify.patch", "testdata/string/new.patch", "testdata/string/new_empty.patch", "testdata/string/new_mode.patch", "testdata/string/rename.patch", "testdata/string/rename_modify.patch", + "testdata/string/copy_modify.patch", } for _, src := range sources { diff --git a/gitdiff/testdata/string/copy.patch b/gitdiff/testdata/string/copy.patch new file mode 100644 index 0000000..f002f07 --- /dev/null +++ b/gitdiff/testdata/string/copy.patch @@ -0,0 +1,4 @@ +diff --git a/file.txt b/numbers.txt +similarity index 100% +copy from file.txt +copy to numbers.txt diff --git a/gitdiff/testdata/string/copy_modify.patch b/gitdiff/testdata/string/copy_modify.patch new file mode 100644 index 0000000..558a511 --- /dev/null +++ b/gitdiff/testdata/string/copy_modify.patch @@ -0,0 +1,21 @@ +diff --git a/file.txt b/numbers.txt +similarity index 57% +copy from file.txt +copy to numbers.txt +index c9e9e05..6c4a3e0 100644 +--- a/file.txt ++++ b/numbers.txt +@@ -1,6 +1,6 @@ + one + two +-three ++three three three + four + five + six +@@ -8,3 +8,5 @@ seven + eight + nine + ten ++eleven ++twelve diff --git a/gitdiff/testdata/string/mode_modify.patch b/gitdiff/testdata/string/mode_modify.patch new file mode 100644 index 0000000..f1554a7 --- /dev/null +++ b/gitdiff/testdata/string/mode_modify.patch @@ -0,0 +1,10 @@ +diff --git a/script.sh b/script.sh +old mode 100644 +new mode 100755 +index 7a870bd..68d501e +--- a/script.sh ++++ b/script.sh +@@ -1,2 +1,2 @@ + #!/bin/bash +-echo "Hello World" ++echo "Hello, World!" From 7aac9c232ef49a5d2505bd4a8b521812e324e6d1 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sat, 10 Aug 2024 16:47:02 -0700 Subject: [PATCH 05/11] Add String() to BinaryFragment Implement base85 encoding and improve how binary patches are displayed. This fails the tests due to differences between the Git and Go implementation of zlib. I need to decide how to resolve this. --- gitdiff/base85.go | 43 +++++++- gitdiff/base85_test.go | 60 +++++++++++ gitdiff/gitdiff.go | 104 ++++++++++++++++++-- gitdiff/gitdiff_string_test.go | 21 +++- gitdiff/testdata/string/binary_modify.patch | 9 ++ gitdiff/testdata/string/binary_new.patch | 11 +++ 6 files changed, 239 insertions(+), 9 deletions(-) create mode 100644 gitdiff/testdata/string/binary_modify.patch create mode 100644 gitdiff/testdata/string/binary_new.patch diff --git a/gitdiff/base85.go b/gitdiff/base85.go index 385aa64..86db117 100644 --- a/gitdiff/base85.go +++ b/gitdiff/base85.go @@ -19,8 +19,8 @@ func init() { } // base85Decode decodes Base85-encoded data from src into dst. It uses the -// alphabet defined by base85.c in the Git source tree, which appears to be -// unique. src must contain at least len(dst) bytes of encoded data. +// alphabet defined by base85.c in the Git source tree. src must contain at +// least len(dst) bytes of encoded data. func base85Decode(dst, src []byte) error { var v uint32 var n, ndst int @@ -50,3 +50,42 @@ func base85Decode(dst, src []byte) error { } return nil } + +// base85Encode encodes src in Base85, writing the result to dst. It uses the +// alphabet defined by base85.c in the Git source tree. +func base85Encode(dst, src []byte) { + var di, si int + + encode := func(v uint32) { + dst[di+0] = b85Alpha[(v/(85*85*85*85))%85] + dst[di+1] = b85Alpha[(v/(85*85*85))%85] + dst[di+2] = b85Alpha[(v/(85*85))%85] + dst[di+3] = b85Alpha[(v/85)%85] + dst[di+4] = b85Alpha[v%85] + } + + n := (len(src) / 4) * 4 + for si < n { + encode(uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3])) + si += 4 + di += 5 + } + + var v uint32 + switch len(src) - si { + case 3: + v |= uint32(src[si+2]) << 8 + fallthrough + case 2: + v |= uint32(src[si+1]) << 16 + fallthrough + case 1: + v |= uint32(src[si+0]) << 24 + encode(v) + } +} + +// base85Len returns the length of n bytes of Base85 encoded data. +func base85Len(n int) int { + return (n + 3) / 4 * 5 +} diff --git a/gitdiff/base85_test.go b/gitdiff/base85_test.go index 068da63..f4d1339 100644 --- a/gitdiff/base85_test.go +++ b/gitdiff/base85_test.go @@ -1,6 +1,9 @@ package gitdiff import ( + "bytes" + "fmt" + "math/rand" "testing" ) @@ -58,3 +61,60 @@ func TestBase85Decode(t *testing.T) { }) } } + +func TestBase85Encode(t *testing.T) { + tests := map[string]struct { + Input []byte + Output string + }{ + "zeroBytes": { + Input: []byte{}, + Output: "", + }, + "twoBytes": { + Input: []byte{0xCA, 0xFE}, + Output: "%KiWV", + }, + "fourBytes": { + Input: []byte{0x0, 0x0, 0xCA, 0xFE}, + Output: "007GV", + }, + "sixBytes": { + Input: []byte{0x0, 0x0, 0xCA, 0xFE, 0xCA, 0xFE}, + Output: "007GV%KiWV", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + dst := make([]byte, len(test.Output)) + base85Encode(dst, test.Input) + for i, b := range test.Output { + if dst[i] != byte(b) { + t.Errorf("incorrect character at index %d: expected '%c', actual '%c'", i, b, dst[i]) + } + } + }) + } +} + +func TestBase85Roundtrip(t *testing.T) { + r := rand.New(rand.NewSource(72)) // chosen by fair dice roll + + for _, size := range []int{64, 85, 1025} { + t.Run(fmt.Sprintf("size%d", size), func(t *testing.T) { + in := make([]byte, size) + r.Read(in) + + dst := make([]byte, base85Len(size)) + out := make([]byte, size) + + base85Encode(dst, in) + base85Decode(out, dst) + + if !bytes.Equal(in, out) { + t.Errorf("decoded data differed from input data:\n input: %x\n output: %x\nencoding: %s\n", in, out, string(dst)) + } + }) + } +} diff --git a/gitdiff/gitdiff.go b/gitdiff/gitdiff.go index 1257559..6ba261e 100644 --- a/gitdiff/gitdiff.go +++ b/gitdiff/gitdiff.go @@ -1,9 +1,12 @@ package gitdiff import ( + "bytes" + "compress/zlib" "errors" "fmt" "os" + "strconv" "strings" ) @@ -128,8 +131,21 @@ func (f *File) String() string { diff.WriteByte('\n') } - // The "---" and "+++" lines only appear for patches with fragments - if len(f.TextFragments) > 0 || f.BinaryFragment != nil { + if f.IsBinary { + if f.BinaryFragment == nil { + diff.WriteString("Binary files differ\n") + } else { + diff.WriteString("GIT binary patch\n") + diff.WriteString(f.BinaryFragment.String()) + if f.ReverseBinaryFragment != nil { + diff.WriteByte('\n') + diff.WriteString(f.ReverseBinaryFragment.String()) + } + } + } + + // The "---" and "+++" lines only appear for text patches with fragments + if len(f.TextFragments) > 0 { diff.WriteString("--- ") if f.OldName == "" { diff.WriteString("/dev/null") @@ -145,11 +161,7 @@ func (f *File) String() string { writeQuotedName(&diff, "b/"+f.NewName) } diff.WriteByte('\n') - } - if f.IsBinary { - // TODO(bkeyes): add string method for BinaryFragments - } else { for _, frag := range f.TextFragments { diff.WriteString(frag.String()) } @@ -344,3 +356,83 @@ const ( // BinaryPatchLiteral indicates the data is the exact file content BinaryPatchLiteral ) + +func (f *BinaryFragment) String() string { + const ( + maxBytesPerLine = 52 + ) + + var diff strings.Builder + + switch f.Method { + case BinaryPatchDelta: + diff.WriteString("delta ") + case BinaryPatchLiteral: + diff.WriteString("literal ") + } + diff.Write(strconv.AppendInt(nil, f.Size, 10)) + diff.WriteByte('\n') + + data := deflateBinaryChunk(f.Data) + n := (len(data) / maxBytesPerLine) * maxBytesPerLine + + buf := make([]byte, base85Len(maxBytesPerLine)) + for i := 0; i < n; i += maxBytesPerLine { + base85Encode(buf, data[i:i+maxBytesPerLine]) + diff.WriteByte('z') + diff.Write(buf) + diff.WriteByte('\n') + } + if remainder := len(data) - n; remainder > 0 { + buf = buf[0:base85Len(remainder)] + + sizeChar := byte(remainder) + if remainder <= 26 { + sizeChar = 'A' + sizeChar - 1 + } else { + sizeChar = 'a' + sizeChar - 27 + } + + base85Encode(buf, data[n:]) + diff.WriteByte(sizeChar) + diff.Write(buf) + diff.WriteByte('\n') + } + + return diff.String() +} + +// TODO(bkeyes): The 'compress/flate' package does not produce minimal output +// streams. Instead of flagging that the last block of data represents the end +// of the stream, it always writes a final empty block to mark the end. Git's +// implementation using the 'zlib' C library does not do this, which means that +// what we produce for binary patches does not match the input, even though it +// is valid. +// +// This is mostly a problem for my tests, where I compare the input and output +// bytes. This comparison isn't required, but is helpful to catch invalid +// output that might otherwise still parse. +// +// Options for fixing this: +// +// 1. Fix the tests to compare parsed objects instead of raw patches, at least +// for binary patches. This means writing something to do reasonable +// comparisons of File structs. +// +// 2. Add my own deflate function. By default, Git appears to use no +// compression on binary patch data, which means "delfate" is just adding +// the appropriate headers and checksums around the data. This would fix my +// tests but means we could never emit compressed data, so we'd differ from +// Git in other situations. +// +// Either way, there will be situations in which re-formatted binary patches +// differ from the original inputs. +func deflateBinaryChunk(data []byte) []byte { + var b bytes.Buffer + + zw, _ := zlib.NewWriterLevel(&b, zlib.NoCompression) + _, _ = zw.Write(data) + _ = zw.Close() + + return b.Bytes() +} diff --git a/gitdiff/gitdiff_string_test.go b/gitdiff/gitdiff_string_test.go index ed7d143..a47399c 100644 --- a/gitdiff/gitdiff_string_test.go +++ b/gitdiff/gitdiff_string_test.go @@ -8,7 +8,10 @@ import ( func TestFile_String(t *testing.T) { sources := []string{ + "testdata/string/binary_modify.patch", + "testdata/string/binary_new.patch", "testdata/string/copy.patch", + "testdata/string/copy_modify.patch", "testdata/string/delete.patch", "testdata/string/mode.patch", "testdata/string/mode_modify.patch", @@ -18,7 +21,6 @@ func TestFile_String(t *testing.T) { "testdata/string/new_mode.patch", "testdata/string/rename.patch", "testdata/string/rename_modify.patch", - "testdata/string/copy_modify.patch", } for _, src := range sources { @@ -46,3 +48,20 @@ func assertParseSingleFile(t *testing.T, src string, b []byte) *File { } return files[0] } + +/* +func TestDecode(t *testing.T) { + actual := []byte("cmV-O") + mine := []byte("cmV)N") + + dst := make([]byte, 4) + + base85Decode(dst, actual) + t.Logf("actual: %x / %b", dst, dst) + + base85Decode(dst, mine) + t.Logf(" mine: %x / %b", dst, dst) + + t.FailNow() +} +*/ diff --git a/gitdiff/testdata/string/binary_modify.patch b/gitdiff/testdata/string/binary_modify.patch new file mode 100644 index 0000000..12ddad5 --- /dev/null +++ b/gitdiff/testdata/string/binary_modify.patch @@ -0,0 +1,9 @@ +diff --git a/file.bin b/file.bin +index a7f4d5d6975ec021016c02b6d58345ebf434f38c..bdc9a70f055892146612dcdb413f0e339faaa0df 100644 +GIT binary patch +delta 66 +QcmeZhVVvM$!$1K50C&Ox;s5{u + +delta 5 +McmZo+^qAlQ00i9urT_o{ + diff --git a/gitdiff/testdata/string/binary_new.patch b/gitdiff/testdata/string/binary_new.patch new file mode 100644 index 0000000..c56f35e --- /dev/null +++ b/gitdiff/testdata/string/binary_new.patch @@ -0,0 +1,11 @@ +diff --git a/file.bin b/file.bin +new file mode 100644 +index 0000000000000000000000000000000000000000..a7f4d5d6975ec021016c02b6d58345ebf434f38c +GIT binary patch +literal 72 +zcmV-O0Jr~td-`u6JcK&{KDK= Date: Sat, 10 Aug 2024 20:24:56 -0700 Subject: [PATCH 06/11] Assert roundtrip for patch formats Re-parse our string representation and make sure it is equal. This is important for binary patches due to inconsistencies in DEFLATE implementations, but we do it for text patches too. --- gitdiff/gitdiff.go | 31 ++-------- gitdiff/gitdiff_string_test.go | 103 +++++++++++++++++++++++++++++---- go.mod | 2 +- 3 files changed, 96 insertions(+), 40 deletions(-) diff --git a/gitdiff/gitdiff.go b/gitdiff/gitdiff.go index 6ba261e..cdc3ba7 100644 --- a/gitdiff/gitdiff.go +++ b/gitdiff/gitdiff.go @@ -137,9 +137,11 @@ func (f *File) String() string { } else { diff.WriteString("GIT binary patch\n") diff.WriteString(f.BinaryFragment.String()) + diff.WriteByte('\n') + if f.ReverseBinaryFragment != nil { - diff.WriteByte('\n') diff.WriteString(f.ReverseBinaryFragment.String()) + diff.WriteByte('\n') } } } @@ -402,35 +404,10 @@ func (f *BinaryFragment) String() string { return diff.String() } -// TODO(bkeyes): The 'compress/flate' package does not produce minimal output -// streams. Instead of flagging that the last block of data represents the end -// of the stream, it always writes a final empty block to mark the end. Git's -// implementation using the 'zlib' C library does not do this, which means that -// what we produce for binary patches does not match the input, even though it -// is valid. -// -// This is mostly a problem for my tests, where I compare the input and output -// bytes. This comparison isn't required, but is helpful to catch invalid -// output that might otherwise still parse. -// -// Options for fixing this: -// -// 1. Fix the tests to compare parsed objects instead of raw patches, at least -// for binary patches. This means writing something to do reasonable -// comparisons of File structs. -// -// 2. Add my own deflate function. By default, Git appears to use no -// compression on binary patch data, which means "delfate" is just adding -// the appropriate headers and checksums around the data. This would fix my -// tests but means we could never emit compressed data, so we'd differ from -// Git in other situations. -// -// Either way, there will be situations in which re-formatted binary patches -// differ from the original inputs. func deflateBinaryChunk(data []byte) []byte { var b bytes.Buffer - zw, _ := zlib.NewWriterLevel(&b, zlib.NoCompression) + zw := zlib.NewWriter(&b) _, _ = zw.Write(data) _ = zw.Close() diff --git a/gitdiff/gitdiff_string_test.go b/gitdiff/gitdiff_string_test.go index a47399c..1a34bb3 100644 --- a/gitdiff/gitdiff_string_test.go +++ b/gitdiff/gitdiff_string_test.go @@ -2,11 +2,13 @@ package gitdiff import ( "bytes" + "fmt" "os" + "slices" "testing" ) -func TestFile_String(t *testing.T) { +func TestParseRoundtrip(t *testing.T) { sources := []string{ "testdata/string/binary_modify.patch", "testdata/string/binary_new.patch", @@ -35,6 +37,11 @@ func TestFile_String(t *testing.T) { if string(b) != str { t.Errorf("%s: incorrect patch\nexpected: %q\n actual: %q\n", src, string(b), str) } + + reparsed := assertParseSingleFile(t, fmt.Sprintf("Parse(%q).String()", src), []byte(str)) + + // TODO(bkeyes): include source in these messages (via subtest?) + assertFilesEqual(t, original, reparsed) } } @@ -49,19 +56,91 @@ func assertParseSingleFile(t *testing.T, src string, b []byte) *File { return files[0] } -/* -func TestDecode(t *testing.T) { - actual := []byte("cmV-O") - mine := []byte("cmV)N") +func assertFilesEqual(t *testing.T, expected, actual *File) { + assertEqual(t, expected.OldName, actual.OldName, "OldName") + assertEqual(t, expected.NewName, actual.NewName, "NewName") + + assertEqual(t, expected.IsNew, actual.IsNew, "IsNew") + assertEqual(t, expected.IsDelete, actual.IsDelete, "IsDelete") + assertEqual(t, expected.IsCopy, actual.IsCopy, "IsCopy") + assertEqual(t, expected.IsRename, actual.IsRename, "IsRename") + + assertEqual(t, expected.OldMode, actual.OldMode, "OldMode") + assertEqual(t, expected.NewMode, actual.NewMode, "NewMode") + + assertEqual(t, expected.OldOIDPrefix, actual.OldOIDPrefix, "OldOIDPrefix") + assertEqual(t, expected.NewOIDPrefix, actual.NewOIDPrefix, "NewOIDPrefix") + assertEqual(t, expected.Score, actual.Score, "Score") + + if len(expected.TextFragments) == len(actual.TextFragments) { + for i := range expected.TextFragments { + prefix := fmt.Sprintf("TextFragments[%d].", i) + ef := expected.TextFragments[i] + af := actual.TextFragments[i] + + assertEqual(t, ef.Comment, af.Comment, prefix+"Comment") + + assertEqual(t, ef.OldPosition, af.OldPosition, prefix+"OldPosition") + assertEqual(t, ef.OldLines, af.OldLines, prefix+"OldLines") + + assertEqual(t, ef.NewPosition, af.NewPosition, prefix+"NewPosition") + assertEqual(t, ef.NewLines, af.NewLines, prefix+"NewLines") + + assertEqual(t, ef.LinesAdded, af.LinesAdded, prefix+"LinesAdded") + assertEqual(t, ef.LinesDeleted, af.LinesDeleted, prefix+"LinesDeleted") + + assertEqual(t, ef.LeadingContext, af.LeadingContext, prefix+"LeadingContext") + assertEqual(t, ef.TrailingContext, af.TrailingContext, prefix+"TrailingContext") + + if !slices.Equal(ef.Lines, af.Lines) { + t.Errorf("%sLines: expected %#v, actual %#v", prefix, ef.Lines, af.Lines) + } + } + } else { + t.Errorf("TextFragments: expected length %d, actual length %d", len(expected.TextFragments), len(actual.TextFragments)) + } + + assertEqual(t, expected.IsBinary, actual.IsBinary, "IsBinary") + + if expected.BinaryFragment != nil { + if actual.BinaryFragment == nil { + t.Errorf("BinaryFragment: expected non-nil, actual is nil") + } else { + ef := expected.BinaryFragment + af := expected.BinaryFragment + + assertEqual(t, ef.Method, af.Method, "BinaryFragment.Method") + assertEqual(t, ef.Size, af.Size, "BinaryFragment.Size") + + if !slices.Equal(ef.Data, af.Data) { + t.Errorf("BinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) + } + } + } else if actual.BinaryFragment != nil { + t.Errorf("BinaryFragment: expected nil, actual is non-nil") + } - dst := make([]byte, 4) + if expected.ReverseBinaryFragment != nil { + if actual.ReverseBinaryFragment == nil { + t.Errorf("ReverseBinaryFragment: expected non-nil, actual is nil") + } else { + ef := expected.ReverseBinaryFragment + af := expected.ReverseBinaryFragment - base85Decode(dst, actual) - t.Logf("actual: %x / %b", dst, dst) + assertEqual(t, ef.Method, af.Method, "ReverseBinaryFragment.Method") + assertEqual(t, ef.Size, af.Size, "ReverseBinaryFragment.Size") - base85Decode(dst, mine) - t.Logf(" mine: %x / %b", dst, dst) + if !slices.Equal(ef.Data, af.Data) { + t.Errorf("ReverseBinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) + } + } + } else if actual.ReverseBinaryFragment != nil { + t.Errorf("ReverseBinaryFragment: expected nil, actual is non-nil") + } +} - t.FailNow() +func assertEqual[T comparable](t *testing.T, expected, actual T, name string) { + if expected != actual { + t.Errorf("%s: expected %#v, actual %#v", name, expected, actual) + } } -*/ diff --git a/go.mod b/go.mod index f35826e..39280fa 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ module github.com/bluekeyes/go-gitdiff -go 1.13 +go 1.20 From 43a7035a109420d8aef00bbce6537758809552c8 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sun, 11 Aug 2024 10:19:59 -0700 Subject: [PATCH 07/11] Conver base85 roundtrip to fuzz test --- gitdiff/base85_test.go | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/gitdiff/base85_test.go b/gitdiff/base85_test.go index f4d1339..1805d56 100644 --- a/gitdiff/base85_test.go +++ b/gitdiff/base85_test.go @@ -2,8 +2,6 @@ package gitdiff import ( "bytes" - "fmt" - "math/rand" "testing" ) @@ -98,23 +96,22 @@ func TestBase85Encode(t *testing.T) { } } -func TestBase85Roundtrip(t *testing.T) { - r := rand.New(rand.NewSource(72)) // chosen by fair dice roll +func FuzzBase85Roundtrip(f *testing.F) { + f.Add([]byte{0x2b, 0x0d}) + f.Add([]byte{0xbc, 0xb4, 0x3f}) + f.Add([]byte{0xfa, 0x62, 0x05, 0x83, 0x24, 0x39, 0xd5, 0x25}) + f.Add([]byte{0x31, 0x59, 0x02, 0xa0, 0x61, 0x12, 0xd9, 0x43, 0xb8, 0x23, 0x1a, 0xb4, 0x02, 0xae, 0xfa, 0xcc, 0x22, 0xad, 0x41, 0xb9, 0xb8}) - for _, size := range []int{64, 85, 1025} { - t.Run(fmt.Sprintf("size%d", size), func(t *testing.T) { - in := make([]byte, size) - r.Read(in) + f.Fuzz(func(t *testing.T, in []byte) { + n := len(in) + dst := make([]byte, base85Len(n)) + out := make([]byte, n) - dst := make([]byte, base85Len(size)) - out := make([]byte, size) + base85Encode(dst, in) + base85Decode(out, dst) - base85Encode(dst, in) - base85Decode(out, dst) - - if !bytes.Equal(in, out) { - t.Errorf("decoded data differed from input data:\n input: %x\n output: %x\nencoding: %s\n", in, out, string(dst)) - } - }) - } + if !bytes.Equal(in, out) { + t.Errorf("decoded data differed from input data:\n input: %x\n output: %x\nencoding: %s\n", in, out, string(dst)) + } + }) } From 7cec3fed41d1af9ccac1ef1cf1511a31549aa08a Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sun, 11 Aug 2024 10:30:23 -0700 Subject: [PATCH 08/11] Use subtests in parse roundtrip tests --- gitdiff/gitdiff_string_test.go | 72 +++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 31 deletions(-) diff --git a/gitdiff/gitdiff_string_test.go b/gitdiff/gitdiff_string_test.go index 1a34bb3..579dd02 100644 --- a/gitdiff/gitdiff_string_test.go +++ b/gitdiff/gitdiff_string_test.go @@ -4,54 +4,64 @@ import ( "bytes" "fmt" "os" + "path/filepath" "slices" "testing" ) func TestParseRoundtrip(t *testing.T) { - sources := []string{ - "testdata/string/binary_modify.patch", - "testdata/string/binary_new.patch", - "testdata/string/copy.patch", - "testdata/string/copy_modify.patch", - "testdata/string/delete.patch", - "testdata/string/mode.patch", - "testdata/string/mode_modify.patch", - "testdata/string/modify.patch", - "testdata/string/new.patch", - "testdata/string/new_empty.patch", - "testdata/string/new_mode.patch", - "testdata/string/rename.patch", - "testdata/string/rename_modify.patch", + patches := []struct { + File string + SkipTextCompare bool + }{ + {File: "copy.patch"}, + {File: "copy_modify.patch"}, + {File: "delete.patch"}, + {File: "mode.patch"}, + {File: "mode_modify.patch"}, + {File: "modify.patch"}, + {File: "new.patch"}, + {File: "new_empty.patch"}, + {File: "new_mode.patch"}, + {File: "rename.patch"}, + {File: "rename_modify.patch"}, + + // Due to differences between Go's 'encoding/zlib' package and the zlib + // C library, binary patches cannot be compared directly as the patch + // data is slightly different when re-encoded by Go. + {File: "binary_modify.patch", SkipTextCompare: true}, + {File: "binary_new.patch", SkipTextCompare: true}, } - for _, src := range sources { - b, err := os.ReadFile(src) - if err != nil { - t.Fatalf("failed to read %s: %v", src, err) - } + for _, patch := range patches { + t.Run(patch.File, func(t *testing.T) { + b, err := os.ReadFile(filepath.Join("testdata", "string", patch.File)) + if err != nil { + t.Fatalf("failed to read patch: %v", err) + } - original := assertParseSingleFile(t, src, b) - str := original.String() + original := assertParseSingleFile(t, b, "patch") + str := original.String() - if string(b) != str { - t.Errorf("%s: incorrect patch\nexpected: %q\n actual: %q\n", src, string(b), str) - } - - reparsed := assertParseSingleFile(t, fmt.Sprintf("Parse(%q).String()", src), []byte(str)) + if !patch.SkipTextCompare { + if string(b) != str { + t.Errorf("incorrect patch text\nexpected: %q\n actual: %q\n", string(b), str) + } + } - // TODO(bkeyes): include source in these messages (via subtest?) - assertFilesEqual(t, original, reparsed) + reparsed := assertParseSingleFile(t, []byte(str), "formatted patch") + assertFilesEqual(t, original, reparsed) + }) } } -func assertParseSingleFile(t *testing.T, src string, b []byte) *File { +func assertParseSingleFile(t *testing.T, b []byte, kind string) *File { files, _, err := Parse(bytes.NewReader(b)) if err != nil { - t.Fatalf("failed to parse patch %s: %v", src, err) + t.Fatalf("failed to parse %s: %v", kind, err) } if len(files) != 1 { - t.Fatalf("expected %s to contain a single files, but found %d", src, len(files)) + t.Fatalf("expected %s to contain a single files, but found %d", kind, len(files)) } return files[0] } From 3eb61e6916f4bdfdf516f420cf9c7621d27173f4 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sun, 11 Aug 2024 10:51:49 -0700 Subject: [PATCH 09/11] Move formatting functions to formatter This cleans up the code a bit and allows writing everything to a single buffer when formatting a full file. --- gitdiff/format.go | 277 ++++++++++++++++++ ...tring_test.go => format_roundtrip_test.go} | 2 +- gitdiff/{quote_test.go => format_test.go} | 4 +- gitdiff/gitdiff.go | 197 +------------ gitdiff/quote.go | 52 ---- 5 files changed, 284 insertions(+), 248 deletions(-) create mode 100644 gitdiff/format.go rename gitdiff/{gitdiff_string_test.go => format_roundtrip_test.go} (99%) rename gitdiff/{quote_test.go => format_test.go} (84%) delete mode 100644 gitdiff/quote.go diff --git a/gitdiff/format.go b/gitdiff/format.go new file mode 100644 index 0000000..5653f6f --- /dev/null +++ b/gitdiff/format.go @@ -0,0 +1,277 @@ +package gitdiff + +import ( + "bytes" + "compress/zlib" + "fmt" + "io" + "strconv" +) + +type formatter struct { + w io.Writer + err error +} + +func newFormatter(w io.Writer) *formatter { + return &formatter{w: w} +} + +func (fm *formatter) Write(p []byte) (int, error) { + if fm.err != nil { + return len(p), nil + } + if _, err := fm.w.Write(p); err != nil { + fm.err = err + } + return len(p), nil +} + +func (fm *formatter) WriteString(s string) (int, error) { + fm.Write([]byte(s)) + return len(s), nil +} + +func (fm *formatter) WriteByte(c byte) error { + fm.Write([]byte{c}) + return nil +} + +func (fm *formatter) WriteQuotedName(s string) { + qpos := 0 + for i := 0; i < len(s); i++ { + ch := s[i] + if q, quoted := quoteByte(ch); quoted { + if qpos == 0 { + fm.WriteByte('"') + } + fm.WriteString(s[qpos:i]) + fm.Write(q) + qpos = i + 1 + } + } + fm.WriteString(s[qpos:]) + if qpos > 0 { + fm.WriteByte('"') + } +} + +var quoteEscapeTable = map[byte]byte{ + '\a': 'a', + '\b': 'b', + '\t': 't', + '\n': 'n', + '\v': 'v', + '\f': 'f', + '\r': 'r', + '"': '"', + '\\': '\\', +} + +func quoteByte(b byte) ([]byte, bool) { + if q, ok := quoteEscapeTable[b]; ok { + return []byte{'\\', q}, true + } + if b < 0x20 || b >= 0x7F { + return []byte{ + '\\', + '0' + (b>>6)&0o3, + '0' + (b>>3)&0o7, + '0' + (b>>0)&0o7, + }, true + } + return nil, false +} + +func (fm *formatter) FormatFile(f *File) { + fm.WriteString("diff --git ") + + var aName, bName string + switch { + case f.OldName == "": + aName = f.NewName + bName = f.NewName + + case f.NewName == "": + aName = f.OldName + bName = f.OldName + + default: + aName = f.OldName + bName = f.NewName + } + + fm.WriteQuotedName("a/" + aName) + fm.WriteByte(' ') + fm.WriteQuotedName("b/" + bName) + fm.WriteByte('\n') + + if f.OldMode != 0 { + if f.IsDelete { + fmt.Fprintf(fm, "deleted file mode %o\n", f.OldMode) + } else if f.NewMode != 0 { + fmt.Fprintf(fm, "old mode %o\n", f.OldMode) + } + } + + if f.NewMode != 0 { + if f.IsNew { + fmt.Fprintf(fm, "new file mode %o\n", f.NewMode) + } else if f.OldMode != 0 { + fmt.Fprintf(fm, "new mode %o\n", f.NewMode) + } + } + + if f.Score > 0 { + if f.IsCopy || f.IsRename { + fmt.Fprintf(fm, "similarity index %d%%\n", f.Score) + } else { + fmt.Fprintf(fm, "dissimilarity index %d%%\n", f.Score) + } + } + + if f.IsCopy { + if f.OldName != "" { + fm.WriteString("copy from ") + fm.WriteQuotedName(f.OldName) + fm.WriteByte('\n') + } + if f.NewName != "" { + fm.WriteString("copy to ") + fm.WriteQuotedName(f.NewName) + fm.WriteByte('\n') + } + } + + if f.IsRename { + if f.OldName != "" { + fm.WriteString("rename from ") + fm.WriteQuotedName(f.OldName) + fm.WriteByte('\n') + } + if f.NewName != "" { + fm.WriteString("rename to ") + fm.WriteQuotedName(f.NewName) + fm.WriteByte('\n') + } + } + + if f.OldOIDPrefix != "" && f.NewOIDPrefix != "" { + fmt.Fprintf(fm, "index %s..%s", f.OldOIDPrefix, f.NewOIDPrefix) + + // Mode is only included on the index line when it is not changing + if f.OldMode != 0 && ((f.NewMode == 0 && !f.IsDelete) || f.OldMode == f.NewMode) { + fmt.Fprintf(fm, " %o", f.OldMode) + } + + fm.WriteByte('\n') + } + + if f.IsBinary { + if f.BinaryFragment == nil { + fm.WriteString("Binary files fmer\n") + } else { + fm.WriteString("GIT binary patch\n") + fm.FormatBinaryFragment(f.BinaryFragment) + if f.ReverseBinaryFragment != nil { + fm.FormatBinaryFragment(f.ReverseBinaryFragment) + } + } + } + + // The "---" and "+++" lines only appear for text patches with fragments + if len(f.TextFragments) > 0 { + fm.WriteString("--- ") + if f.OldName == "" { + fm.WriteString("/dev/null") + } else { + fm.WriteQuotedName("a/" + f.OldName) + } + fm.WriteByte('\n') + + fm.WriteString("+++ ") + if f.NewName == "" { + fm.WriteString("/dev/null") + } else { + fm.WriteQuotedName("b/" + f.NewName) + } + fm.WriteByte('\n') + + for _, frag := range f.TextFragments { + fm.FormatTextFragment(frag) + } + } +} + +func (fm *formatter) FormatTextFragment(f *TextFragment) { + fm.FormatTextFragmentHeader(f) + fm.WriteByte('\n') + + for _, line := range f.Lines { + fm.WriteString(line.Op.String()) + fm.WriteString(line.Line) + if line.NoEOL() { + fm.WriteString("\n\\ No newline at end of file\n") + } + } +} + +func (fm *formatter) FormatTextFragmentHeader(f *TextFragment) { + fmt.Fprintf(fm, "@@ -%d,%d +%d,%d @@", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines) + if f.Comment != "" { + fm.WriteByte(' ') + fm.WriteString(f.Comment) + } +} + +func (fm *formatter) FormatBinaryFragment(f *BinaryFragment) { + const ( + maxBytesPerLine = 52 + ) + + switch f.Method { + case BinaryPatchDelta: + fm.WriteString("delta ") + case BinaryPatchLiteral: + fm.WriteString("literal ") + } + fm.Write(strconv.AppendInt(nil, f.Size, 10)) + fm.WriteByte('\n') + + data := deflateBinaryChunk(f.Data) + n := (len(data) / maxBytesPerLine) * maxBytesPerLine + + buf := make([]byte, base85Len(maxBytesPerLine)) + for i := 0; i < n; i += maxBytesPerLine { + base85Encode(buf, data[i:i+maxBytesPerLine]) + fm.WriteByte('z') + fm.Write(buf) + fm.WriteByte('\n') + } + if remainder := len(data) - n; remainder > 0 { + buf = buf[0:base85Len(remainder)] + + sizeChar := byte(remainder) + if remainder <= 26 { + sizeChar = 'A' + sizeChar - 1 + } else { + sizeChar = 'a' + sizeChar - 27 + } + + base85Encode(buf, data[n:]) + fm.WriteByte(sizeChar) + fm.Write(buf) + fm.WriteByte('\n') + } + fm.WriteByte('\n') +} + +func deflateBinaryChunk(data []byte) []byte { + var b bytes.Buffer + + zw := zlib.NewWriter(&b) + _, _ = zw.Write(data) + _ = zw.Close() + + return b.Bytes() +} diff --git a/gitdiff/gitdiff_string_test.go b/gitdiff/format_roundtrip_test.go similarity index 99% rename from gitdiff/gitdiff_string_test.go rename to gitdiff/format_roundtrip_test.go index 579dd02..58cff97 100644 --- a/gitdiff/gitdiff_string_test.go +++ b/gitdiff/format_roundtrip_test.go @@ -9,7 +9,7 @@ import ( "testing" ) -func TestParseRoundtrip(t *testing.T) { +func TestFormatRoundtrip(t *testing.T) { patches := []struct { File string SkipTextCompare bool diff --git a/gitdiff/quote_test.go b/gitdiff/format_test.go similarity index 84% rename from gitdiff/quote_test.go rename to gitdiff/format_test.go index 9159060..3325296 100644 --- a/gitdiff/quote_test.go +++ b/gitdiff/format_test.go @@ -5,7 +5,7 @@ import ( "testing" ) -func TestWriteQuotedName(t *testing.T) { +func TestFormatter_WriteQuotedName(t *testing.T) { tests := []struct { Input string Expected string @@ -20,7 +20,7 @@ func TestWriteQuotedName(t *testing.T) { for _, test := range tests { var b strings.Builder - writeQuotedName(&b, test.Input) + newFormatter(&b).WriteQuotedName(test.Input) if b.String() != test.Expected { t.Errorf("expected %q, got %q", test.Expected, b.String()) } diff --git a/gitdiff/gitdiff.go b/gitdiff/gitdiff.go index cdc3ba7..f1d9673 100644 --- a/gitdiff/gitdiff.go +++ b/gitdiff/gitdiff.go @@ -1,12 +1,9 @@ package gitdiff import ( - "bytes" - "compress/zlib" "errors" "fmt" "os" - "strconv" "strings" ) @@ -47,128 +44,7 @@ type File struct { // the original input or the same as what Git would produces func (f *File) String() string { var diff strings.Builder - - diff.WriteString("diff --git ") - - var aName, bName string - switch { - case f.OldName == "": - aName = f.NewName - bName = f.NewName - - case f.NewName == "": - aName = f.OldName - bName = f.OldName - - default: - aName = f.OldName - bName = f.NewName - } - - writeQuotedName(&diff, "a/"+aName) - diff.WriteByte(' ') - writeQuotedName(&diff, "b/"+bName) - diff.WriteByte('\n') - - if f.OldMode != 0 { - if f.IsDelete { - fmt.Fprintf(&diff, "deleted file mode %o\n", f.OldMode) - } else if f.NewMode != 0 { - fmt.Fprintf(&diff, "old mode %o\n", f.OldMode) - } - } - - if f.NewMode != 0 { - if f.IsNew { - fmt.Fprintf(&diff, "new file mode %o\n", f.NewMode) - } else if f.OldMode != 0 { - fmt.Fprintf(&diff, "new mode %o\n", f.NewMode) - } - } - - if f.Score > 0 { - if f.IsCopy || f.IsRename { - fmt.Fprintf(&diff, "similarity index %d%%\n", f.Score) - } else { - fmt.Fprintf(&diff, "dissimilarity index %d%%\n", f.Score) - } - } - - if f.IsCopy { - if f.OldName != "" { - diff.WriteString("copy from ") - writeQuotedName(&diff, f.OldName) - diff.WriteByte('\n') - } - if f.NewName != "" { - diff.WriteString("copy to ") - writeQuotedName(&diff, f.NewName) - diff.WriteByte('\n') - } - } - - if f.IsRename { - if f.OldName != "" { - diff.WriteString("rename from ") - writeQuotedName(&diff, f.OldName) - diff.WriteByte('\n') - } - if f.NewName != "" { - diff.WriteString("rename to ") - writeQuotedName(&diff, f.NewName) - diff.WriteByte('\n') - } - } - - if f.OldOIDPrefix != "" && f.NewOIDPrefix != "" { - fmt.Fprintf(&diff, "index %s..%s", f.OldOIDPrefix, f.NewOIDPrefix) - - // Mode is only included on the index line when it is not changing - if f.OldMode != 0 && ((f.NewMode == 0 && !f.IsDelete) || f.OldMode == f.NewMode) { - fmt.Fprintf(&diff, " %o", f.OldMode) - } - - diff.WriteByte('\n') - } - - if f.IsBinary { - if f.BinaryFragment == nil { - diff.WriteString("Binary files differ\n") - } else { - diff.WriteString("GIT binary patch\n") - diff.WriteString(f.BinaryFragment.String()) - diff.WriteByte('\n') - - if f.ReverseBinaryFragment != nil { - diff.WriteString(f.ReverseBinaryFragment.String()) - diff.WriteByte('\n') - } - } - } - - // The "---" and "+++" lines only appear for text patches with fragments - if len(f.TextFragments) > 0 { - diff.WriteString("--- ") - if f.OldName == "" { - diff.WriteString("/dev/null") - } else { - writeQuotedName(&diff, "a/"+f.OldName) - } - diff.WriteByte('\n') - - diff.WriteString("+++ ") - if f.NewName == "" { - diff.WriteString("/dev/null") - } else { - writeQuotedName(&diff, "b/"+f.NewName) - } - diff.WriteByte('\n') - - for _, frag := range f.TextFragments { - diff.WriteString(frag.String()) - } - } - + newFormatter(&diff).FormatFile(f) return diff.String() } @@ -195,17 +71,7 @@ type TextFragment struct { // more details on this format. func (f *TextFragment) String() string { var diff strings.Builder - - diff.WriteString(f.Header()) - diff.WriteString("\n") - - for _, line := range f.Lines { - diff.WriteString(line.String()) - if line.NoEOL() { - diff.WriteString("\n\\ No newline at end of file\n") - } - } - + newFormatter(&diff).FormatTextFragment(f) return diff.String() } @@ -213,13 +79,7 @@ func (f *TextFragment) String() string { // more details on this format. func (f *TextFragment) Header() string { var hdr strings.Builder - - fmt.Fprintf(&hdr, "@@ -%d,%d +%d,%d @@", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines) - if f.Comment != "" { - hdr.WriteByte(' ') - hdr.WriteString(f.Comment) - } - + newFormatter(&hdr).FormatTextFragmentHeader(f) return hdr.String() } @@ -360,56 +220,7 @@ const ( ) func (f *BinaryFragment) String() string { - const ( - maxBytesPerLine = 52 - ) - var diff strings.Builder - - switch f.Method { - case BinaryPatchDelta: - diff.WriteString("delta ") - case BinaryPatchLiteral: - diff.WriteString("literal ") - } - diff.Write(strconv.AppendInt(nil, f.Size, 10)) - diff.WriteByte('\n') - - data := deflateBinaryChunk(f.Data) - n := (len(data) / maxBytesPerLine) * maxBytesPerLine - - buf := make([]byte, base85Len(maxBytesPerLine)) - for i := 0; i < n; i += maxBytesPerLine { - base85Encode(buf, data[i:i+maxBytesPerLine]) - diff.WriteByte('z') - diff.Write(buf) - diff.WriteByte('\n') - } - if remainder := len(data) - n; remainder > 0 { - buf = buf[0:base85Len(remainder)] - - sizeChar := byte(remainder) - if remainder <= 26 { - sizeChar = 'A' + sizeChar - 1 - } else { - sizeChar = 'a' + sizeChar - 27 - } - - base85Encode(buf, data[n:]) - diff.WriteByte(sizeChar) - diff.Write(buf) - diff.WriteByte('\n') - } - + newFormatter(&diff).FormatBinaryFragment(f) return diff.String() } - -func deflateBinaryChunk(data []byte) []byte { - var b bytes.Buffer - - zw := zlib.NewWriter(&b) - _, _ = zw.Write(data) - _ = zw.Close() - - return b.Bytes() -} diff --git a/gitdiff/quote.go b/gitdiff/quote.go deleted file mode 100644 index a9b2a28..0000000 --- a/gitdiff/quote.go +++ /dev/null @@ -1,52 +0,0 @@ -package gitdiff - -import ( - "strings" -) - -// writeQuotedName writes s to b, quoting it using C-style octal escapes if necessary. -func writeQuotedName(b *strings.Builder, s string) { - qpos := 0 - for i := 0; i < len(s); i++ { - ch := s[i] - if q, quoted := quoteByte(ch); quoted { - if qpos == 0 { - b.WriteByte('"') - } - b.WriteString(s[qpos:i]) - b.Write(q) - qpos = i + 1 - } - } - b.WriteString(s[qpos:]) - if qpos > 0 { - b.WriteByte('"') - } -} - -var quoteEscapeTable = map[byte]byte{ - '\a': 'a', - '\b': 'b', - '\t': 't', - '\n': 'n', - '\v': 'v', - '\f': 'f', - '\r': 'r', - '"': '"', - '\\': '\\', -} - -func quoteByte(b byte) ([]byte, bool) { - if q, ok := quoteEscapeTable[b]; ok { - return []byte{'\\', q}, true - } - if b < 0x20 || b >= 0x7F { - return []byte{ - '\\', - '0' + (b>>6)&0o3, - '0' + (b>>3)&0o7, - '0' + (b>>0)&0o7, - }, true - } - return nil, false -} From 9d2fdae7948bfe6fa6235c318a7492cfc4fa97ee Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sun, 11 Aug 2024 11:02:16 -0700 Subject: [PATCH 10/11] Add errcheck excludes for fomatter --- .golangci.yml | 8 +++++++- gitdiff/base85_test.go | 5 +++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 82dbad2..153b8b0 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -19,6 +19,12 @@ linters: issues: exclude-use-default: false -linter-settings: +linters-settings: goimports: local-prefixes: github.com/bluekeyes/go-gitdiff + errcheck: + exclude-functions: + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).Write + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteString + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteByte + - fmt.Fprintf(*github.com/bluekeyes/go-gitdiff/gitdiff.formatter) diff --git a/gitdiff/base85_test.go b/gitdiff/base85_test.go index 1805d56..672c471 100644 --- a/gitdiff/base85_test.go +++ b/gitdiff/base85_test.go @@ -108,8 +108,9 @@ func FuzzBase85Roundtrip(f *testing.F) { out := make([]byte, n) base85Encode(dst, in) - base85Decode(out, dst) - + if err := base85Decode(out, dst); err != nil { + t.Fatalf("unexpected error decoding base85 data: %v", err) + } if !bytes.Equal(in, out) { t.Errorf("decoded data differed from input data:\n input: %x\n output: %x\nencoding: %s\n", in, out, string(dst)) } From 6765cc68befbf68edf5a11dedcca38320316d545 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sun, 11 Aug 2024 11:07:31 -0700 Subject: [PATCH 11/11] Add comment to BinaryFragment.String --- gitdiff/gitdiff.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gitdiff/gitdiff.go b/gitdiff/gitdiff.go index f1d9673..5365645 100644 --- a/gitdiff/gitdiff.go +++ b/gitdiff/gitdiff.go @@ -41,7 +41,7 @@ type File struct { // String returns a git diff representation of this file. The value can be // parsed by this library to obtain the same File, but may not be the same as -// the original input or the same as what Git would produces +// the original input. func (f *File) String() string { var diff strings.Builder newFormatter(&diff).FormatFile(f) @@ -219,6 +219,10 @@ const ( BinaryPatchLiteral ) +// String returns a git diff format of this fragment. Due to differences in +// zlib implementation between Go and Git, encoded binary data in the result +// will likely differ from what Git produces for the same input. See +// [File.String] for more details on this format. func (f *BinaryFragment) String() string { var diff strings.Builder newFormatter(&diff).FormatBinaryFragment(f)