Skip to content

Commit 33f60cc

Browse files
nrnrkgkampitakis
authored andcommitted
fix: use common lineHash to share indice between text1 and text2
Use common cache of line contents between two texts in `DiffLinesToChars` to get line diffs correctly. In some cases, line diffs cannot be retrieved correctly in the standard way (https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs#line-mode). In the below case, we failed to get line diffs correctly before this fix. ```go:main.go package main import ( "fmt" "github.com/sergi/go-diff/diffmatchpatch" ) const ( text1 = `hoge: step11: - arrayitem1 - arrayitem2 step12: step21: hoge step22: -93 fuga: flatitem ` text2 = `hoge: step11: - arrayitem4 - arrayitem2 - arrayitem3 step12: step21: hoge step22: -92 fuga: flatitem ` ) func main() { dmp := diffmatchpatch.New() a, b, c := dmp.DiffLinesToChars(text1, text2) diffs := dmp.DiffMain(a, b, false) diffs = dmp.DiffCharsToLines(diffs, c) // diffs = dmp.DiffCleanupSemantic(diffs) fmt.Println(diffs) } ``` ```text:output [{Insert hoge: step11: hoge: } {Equal hoge: } {Insert hoge: } {Equal step11: } {Insert hoge: } {Equal - arrayitem1 } {Insert hoge: } {Equal - arrayitem2 } {Insert hoge: } {Equal step12: } {Insert hoge: } {Equal step21: hoge } {Insert hoge: } {Equal step22: -93 } {Delete fuga: flatitem }] ``` Note: This fix corresponds to a javascript implementation. (ref: https://github.com/google/diff-match-patch/blob/62f2e689f498f9c92dbc588c58750addec9b1654/javascript/diff_match_patch_uncompressed.js#L466)
1 parent 46eaa6f commit 33f60cc

File tree

2 files changed

+11
-7
lines changed

2 files changed

+11
-7
lines changed

diffmatchpatch/diff.go

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1338,19 +1338,21 @@ func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, stri
13381338
// So we'll insert a junk entry to avoid generating a null character.
13391339
lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
13401340

1341+
lineHash := make(map[string]int)
13411342
// Each string has the index of lineArray which it points to
1342-
strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray)
1343-
strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray)
1343+
strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray, lineHash)
1344+
strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray, lineHash)
13441345

13451346
return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray
13461347
}
13471348

13481349
// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
1349-
func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []uint32 {
1350-
// Walk the text, pulling out a substring for each line. text.split('\n')
1351-
// would would temporarily double our memory footprint.
1352-
// Modifying text would create many large strings to garbage collect.
1353-
lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
1350+
func (dmp *DiffMatchPatch) diffLinesToStringsMunge(
1351+
text string,
1352+
lineArray *[]string,
1353+
lineHash map[string]int,
1354+
) []uint32 {
1355+
// Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
13541356
lineStart := 0
13551357
lineEnd := -1
13561358
strs := []uint32{}

diffmatchpatch/diff_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,8 @@ func TestDiffLinesToChars(t *testing.T) {
306306
{"a", "b", "1", "2", []string{"", "a", "b"}},
307307
// Omit final newline.
308308
{"alpha\nbeta\nalpha", "", "1,2,3", "", []string{"", "alpha\n", "beta\n", "alpha"}},
309+
// Same lines in Text1 and Text2
310+
{"abc\ndefg\n12345\n", "abc\ndef\n12345\n678", "1,2,3", "1,4,3,5", []string{"", "abc\n", "defg\n", "12345\n", "def\n", "678"}},
309311
} {
310312
actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(tc.Text1, tc.Text2)
311313
assertEqual(t, tc.ExpectedChars1, actualChars1, fmt.Sprintf("Test case #%d, %#v", i, tc))

0 commit comments

Comments
 (0)