Skip to content

Commit d7cda67

Browse files
committed
fix: revert regressions
1 parent 46eaa6f commit d7cda67

File tree

6 files changed

+124
-164647
lines changed

6 files changed

+124
-164647
lines changed

diffmatchpatch/diff.go

Lines changed: 56 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@ const (
3434
DiffInsert Operation = 1
3535
// DiffEqual item represents an equal diff.
3636
DiffEqual Operation = 0
37-
// IndexSeparator is used to seperate the array indexes in an index string
38-
IndexSeparator = ","
3937
)
4038

4139
// Diff represents one diff operation
@@ -205,7 +203,7 @@ func (dmp *DiffMatchPatch) diffCompute(
205203
// then rediff the parts for greater accuracy. This speedup can produce non-minimal diffs.
206204
func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time) []Diff {
207205
// Scan the text on a line-by-line basis first.
208-
text1, text2, linearray := dmp.DiffLinesToRunes(string(text1), string(text2))
206+
text1, text2, linearray := dmp.diffLinesToRunes(text1, text2)
209207

210208
diffs := dmp.diffMainRunes(text1, text2, false, deadline)
211209

@@ -406,28 +404,73 @@ func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int,
406404
// a string of hashes where each Unicode character represents one line.
407405
// It's slightly faster to call DiffLinesToRunes first, followed by DiffMainRunes.
408406
func (dmp *DiffMatchPatch) DiffLinesToChars(text1, text2 string) (string, string, []string) {
409-
chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
410-
return chars1, chars2, lineArray
407+
chars1, chars2, lineArray := dmp.DiffLinesToRunes(text1, text2)
408+
return string(chars1), string(chars2), lineArray
411409
}
412410

413-
// DiffLinesToRunes splits two texts into a list of runes.
411+
// DiffLinesToRunes splits two texts into a list of runes. Each rune represents one line.
414412
func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, []string) {
415-
chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
416-
return []rune(chars1), []rune(chars2), lineArray
413+
// '\x00' is a valid character, but various debuggers don't like it.
414+
// So we'll insert a junk entry to avoid generating a null character.
415+
lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
416+
lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
417+
418+
chars1 := dmp.diffLinesToRunesMunge(text1, &lineArray, lineHash)
419+
chars2 := dmp.diffLinesToRunesMunge(text2, &lineArray, lineHash)
420+
421+
return chars1, chars2, lineArray
422+
}
423+
424+
func (dmp *DiffMatchPatch) diffLinesToRunes(text1, text2 []rune) ([]rune, []rune, []string) {
425+
return dmp.DiffLinesToRunes(string(text1), string(text2))
426+
}
427+
428+
// diffLinesToRunesMunge splits a text into an array of strings, and reduces the texts to a []rune
429+
// where each Unicode character represents one line.
430+
// We use strings instead of []runes as input mainly because you can't use []rune as a map key.
431+
func (dmp *DiffMatchPatch) diffLinesToRunesMunge(
432+
text string,
433+
lineArray *[]string,
434+
lineHash map[string]int,
435+
) []rune {
436+
// Walk the text, pulling out a substring for each line. text.split('\n')
437+
// would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
438+
lineStart := 0
439+
lineEnd := -1
440+
runes := []rune{}
441+
442+
for lineEnd < len(text)-1 {
443+
lineEnd = indexOf(text, "\n", lineStart)
444+
445+
if lineEnd == -1 {
446+
lineEnd = len(text) - 1
447+
}
448+
449+
line := text[lineStart : lineEnd+1]
450+
lineStart = lineEnd + 1
451+
lineValue, ok := lineHash[line]
452+
453+
if ok {
454+
runes = append(runes, rune(lineValue))
455+
} else {
456+
*lineArray = append(*lineArray, line)
457+
lineHash[line] = len(*lineArray) - 1
458+
runes = append(runes, rune(len(*lineArray)-1))
459+
}
460+
}
461+
462+
return runes
417463
}
418464

419465
// DiffCharsToLines rehydrates the text in a diff from a string of line hashes to real lines of text.
420466
func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff {
421467
hydrated := make([]Diff, 0, len(diffs))
422468
for _, aDiff := range diffs {
423-
chars := strings.Split(aDiff.Text, IndexSeparator)
469+
chars := aDiff.Text
424470
text := make([]string, len(chars))
425471

426472
for i, r := range chars {
427-
i1, err := strconv.Atoi(r)
428-
if err == nil {
429-
text[i] = lineArray[i1]
430-
}
473+
text[i] = lineArray[r]
431474
}
432475

433476
aDiff.Text = strings.Join(text, "")
@@ -1331,49 +1374,3 @@ func (dmp *DiffMatchPatch) DiffFromDelta(text1, delta string) (diffs []Diff, err
13311374

13321375
return diffs, nil
13331376
}
1334-
1335-
// diffLinesToStrings splits two texts into a list of strings. Each string represents one line.
1336-
func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, string, []string) {
1337-
// '\x00' is a valid character, but various debuggers don't like it.
1338-
// So we'll insert a junk entry to avoid generating a null character.
1339-
lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
1340-
1341-
// Each string has the index of lineArray which it points to
1342-
strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray)
1343-
strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray)
1344-
1345-
return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray
1346-
}
1347-
1348-
// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
1349-
func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []uint32 {
1350-
// Walk the text, pulling out a substring for each line. text.split('\n')
1351-
// would would temporarily double our memory footprint.
1352-
// Modifying text would create many large strings to garbage collect.
1353-
lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
1354-
lineStart := 0
1355-
lineEnd := -1
1356-
strs := []uint32{}
1357-
1358-
for lineEnd < len(text)-1 {
1359-
lineEnd = indexOf(text, "\n", lineStart)
1360-
1361-
if lineEnd == -1 {
1362-
lineEnd = len(text) - 1
1363-
}
1364-
1365-
line := text[lineStart : lineEnd+1]
1366-
lineStart = lineEnd + 1
1367-
lineValue, ok := lineHash[line]
1368-
1369-
if ok {
1370-
strs = append(strs, uint32(lineValue))
1371-
} else {
1372-
*lineArray = append(*lineArray, line)
1373-
lineHash[line] = len(*lineArray) - 1
1374-
strs = append(strs, uint32(len(*lineArray)-1))
1375-
}
1376-
}
1377-
1378-
return strs
1379-
}

diffmatchpatch/diff_test.go

Lines changed: 12 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@ package diffmatchpatch
1010

1111
import (
1212
"fmt"
13-
"io"
14-
"os"
1513
"reflect"
1614
"strconv"
1715
"strings"
@@ -302,10 +300,10 @@ func TestDiffLinesToChars(t *testing.T) {
302300
dmp := New()
303301

304302
for i, tc := range []TestCase{
305-
{"", "alpha\r\nbeta\r\n\r\n\r\n", "", "1,2,3,3", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
306-
{"a", "b", "1", "2", []string{"", "a", "b"}},
303+
{"", "alpha\r\nbeta\r\n\r\n\r\n", "", "\u0001\u0002\u0003\u0003", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
304+
{"a", "b", "\u0001", "\u0002", []string{"", "a", "b"}},
307305
// Omit final newline.
308-
{"alpha\nbeta\nalpha", "", "1,2,3", "", []string{"", "alpha\n", "beta\n", "alpha"}},
306+
{"alpha\nbeta\nalpha", "", "\u0001\u0002\u0003", "", []string{"", "alpha\n", "beta\n", "alpha"}},
309307
} {
310308
actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(tc.Text1, tc.Text2)
311309
assertEqual(t, tc.ExpectedChars1, actualChars1, fmt.Sprintf("Test case #%d, %#v", i, tc))
@@ -318,14 +316,14 @@ func TestDiffLinesToChars(t *testing.T) {
318316
lineList := []string{
319317
"", // Account for the initial empty element of the lines array.
320318
}
321-
var charList []string
319+
var charList []rune
322320
for x := 1; x < n+1; x++ {
323321
lineList = append(lineList, strconv.Itoa(x)+"\n")
324-
charList = append(charList, strconv.Itoa(x))
322+
charList = append(charList, rune(x))
325323
}
326324
lines := strings.Join(lineList, "")
327-
chars := strings.Join(charList[:], ",")
328-
assertEqual(t, n, len(strings.Split(chars, ",")))
325+
chars := string(charList)
326+
assertEqual(t, n, utf8.RuneCountInString(chars))
329327

330328
actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(lines, "")
331329
assertEqual(t, chars, actualChars1)
@@ -345,8 +343,8 @@ func TestDiffCharsToLines(t *testing.T) {
345343
for i, tc := range []TestCase{
346344
{
347345
Diffs: []Diff{
348-
{DiffEqual, "1,2,1"},
349-
{DiffInsert, "2,1,2"},
346+
{DiffEqual, "\u0001\u0002\u0001"},
347+
{DiffInsert, "\u0002\u0001\u0002"},
350348
},
351349
Lines: []string{"", "alpha\n", "beta\n"},
352350

@@ -365,15 +363,14 @@ func TestDiffCharsToLines(t *testing.T) {
365363
lineList := []string{
366364
"", // Account for the initial empty element of the lines array.
367365
}
368-
charList := []string{}
366+
charList := []rune{}
369367
for x := 1; x <= n; x++ {
370368
lineList = append(lineList, strconv.Itoa(x)+"\n")
371-
charList = append(charList, strconv.Itoa(x))
369+
charList = append(charList, rune(x))
372370
}
373371
assertEqual(t, n, len(charList))
374-
chars := strings.Join(charList[:], ",")
375372

376-
actual := dmp.DiffCharsToLines([]Diff{{DiffDelete, chars}}, lineList)
373+
actual := dmp.DiffCharsToLines([]Diff{{DiffDelete, string(charList)}}, lineList)
377374
assertEqual(t, []Diff{{DiffDelete, strings.Join(lineList, "")}}, actual)
378375
}
379376

@@ -1507,19 +1504,6 @@ func TestDiffMainWithCheckLines(t *testing.T) {
15071504
}
15081505
}
15091506

1510-
func TestMassiveRuneDiffConversion(t *testing.T) {
1511-
sNew, err := os.ReadFile("../testdata/fixture.go")
1512-
if err != nil {
1513-
panic(err)
1514-
}
1515-
1516-
dmp := New()
1517-
t1, t2, tt := dmp.DiffLinesToChars("", string(sNew))
1518-
diffs := dmp.DiffMain(t1, t2, false)
1519-
diffs = dmp.DiffCharsToLines(diffs, tt)
1520-
assertEqual(t, true, len(diffs) > 0)
1521-
}
1522-
15231507
func BenchmarkDiffMain(bench *testing.B) {
15241508
var r []Diff
15251509

@@ -1579,22 +1563,3 @@ func BenchmarkDiffMainRunesLargeLines(b *testing.B) {
15791563

15801564
SinkSliceDiff = r
15811565
}
1582-
1583-
func BenchmarkDiffMainRunesLargeDiffLines(b *testing.B) {
1584-
var r []Diff
1585-
1586-
fp, _ := os.Open("../testdata/diff10klinestest.txt")
1587-
defer fp.Close()
1588-
data, _ := io.ReadAll(fp)
1589-
dmp := New()
1590-
1591-
b.ResetTimer()
1592-
1593-
for i := 0; i < b.N; i++ {
1594-
text1, text2, linearray := dmp.DiffLinesToRunes(string(data), "")
1595-
r = dmp.DiffMainRunes(text1, text2, false)
1596-
r = dmp.DiffCharsToLines(r, linearray)
1597-
}
1598-
1599-
SinkSliceDiff = r
1600-
}

diffmatchpatch/patch_test.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,3 +567,59 @@ func TestPatchApply(t *testing.T) {
567567
)
568568
}
569569
}
570+
571+
func TestIssues(t *testing.T) {
572+
t.Run("https://github.com/sergi/go-diff/issues/127", func(t *testing.T) {
573+
text1 := `
574+
1111111111111 000000
575+
------------- ------
576+
xxxxxxxxxxxxx ------
577+
xxxxxxxxxxxxx ------
578+
xxxxxxxxxxxxx xxxxxx
579+
xxxxxxxxxxxxx ......
580+
xxxxxxxxxxxxx 111111
581+
xxxxxxxxxxxxx ??????
582+
xxxxxxxxxxxxx 333333
583+
xxxxxxxxxxxxx 555555
584+
xxxxxxxxxx xxxxx
585+
xxxxxxxxxx xxxxx
586+
xxxxxxxxxx xxxxx
587+
xxxxxxxxxx xxxxx
588+
`
589+
text2 := `
590+
2222222222222 000000
591+
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`
592+
593+
patches := New().PatchMake(text1, text2)
594+
assertEqual(t, 6, len(patches), "Issue https://github.com/sergi/go-diff/issues/127")
595+
})
596+
597+
t.Run("https://github.com/sergi/go-diff/issues/4", func(t *testing.T) {
598+
// doesn't panic
599+
text1 := "1\n2\n3\n4\n5\n6\n7\n3\n8\n9\n3\n10\n3\n11\n3\n12\n13\n14\n15\n12\n13" +
600+
"\n16\n13\n13\n17\n18\n19\n20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n30\n31\n32\n33\n34" +
601+
"\n35\n12\n36\n37\n38\n39\n40\n41\n42\n13\n43\n44\n13\n45\n46\n47\n13\n13\n48\n49\n50" +
602+
"\n51\n52\n13\n53\n54\n55\n56\n57\n58\n59\n60\n61\n62\n63\n64\n65\n66\n67\n68\n69\n13\n" +
603+
"70\n71\n72\n73\n74\n13\n75\n13\n76\n77\n78\n79\n80\n81\n82\n83\n84\n85\n86\n87\n88\n89\n" +
604+
"90\n67\n91\n92\n93\n81\n68\n13\n94\n71\n95\n96\n97\n98\n99\n100\n101\n102\n63\n103\n67\n104" +
605+
"\n105\n13\n106\n107\n108\n109\n110\n111\n112\n113\n114\n115\n90\n116\n67\n13\n117\n72\n73\n" +
606+
"74\n13\n75\n13\n76\n118\n119\n120\n78\n68\n121\n13\n122\n123\n124\n125\n93\n126\n68\n127\n13" +
607+
"\n128\n129\n130\n131\n132\n133\n134\n135\n13\n136\n137\n138\n13\n78\n68\n13\n139\n140\n141\n142" +
608+
"\n68\n13\n143\n144\n145\n146\n13\n147\n148\n13\n149\n150\n151\n152\n153\n150\n154\n13\n155\n156\n"
609+
text2 := "1\n2\n3\n4\n5\n6\n7\n3\n157\n9\n3\n10\n3\n11\n3\n12\n13\n14\n15\n12\n13\n16\n13\n13" +
610+
"\n17\n18\n19\n20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n30\n31\n32\n33\n34\n35\n12\n36\n37\n38\n39\n40" +
611+
"\n41\n42\n13\n158\n159\n13\n45\n46\n47\n13\n13\n48\n49\n50\n51\n13\n53\n54\n55\n56\n57\n160\n59\n60" +
612+
"\n61\n62\n63\n64\n161\n66\n67\n68\n69\n13\n70\n71\n72\n73\n74\n13\n75\n13\n162\n77\n78\n79\n80\n81\n" +
613+
"82\n83\n84\n85\n86\n88\n89\n90\n67\n91\n92\n93\n81\n68\n13\n94\n71\n95\n96\n97\n98\n99\n100\n101\n102" +
614+
"\n63\n103\n67\n104\n105\n13\n106\n107\n108\n109\n110\n111\n112\n113\n114\n115\n90\n116\n67\n13\n117\n72" +
615+
"\n73\n74\n13\n75\n13\n163\n119\n120\n78\n68\n121\n13\n122\n123\n124\n125\n93\n126\n68\n127\n13\n128\n164" +
616+
"\n130\n131\n132\n133\n134\n135\n13\n136\n137\n138\n13\n78\n68\n13\n139\n140\n165\n68\n13\n143\n144\n145\n" +
617+
"146\n13\n147\n148\n13\n149\n150\n151\n166\n153\n150\n154\n13\n155\n156\n"
618+
619+
dmp := New()
620+
t1, t2, lineArray := dmp.DiffLinesToChars(text1, text2)
621+
diffs := dmp.DiffMain(t1, t2, false)
622+
diffs = dmp.DiffCharsToLines(diffs, lineArray)
623+
_ = diffs
624+
})
625+
}

diffmatchpatch/stringutil.go

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
package diffmatchpatch
1010

1111
import (
12-
"strconv"
1312
"strings"
1413
"unicode/utf8"
1514
)
@@ -92,20 +91,3 @@ func runesIndex(r1, r2 []rune) int {
9291
}
9392
return -1
9493
}
95-
96-
func intArrayToString(ns []uint32) string {
97-
if len(ns) == 0 {
98-
return ""
99-
}
100-
101-
indexSeparator := IndexSeparator[0]
102-
103-
// Appr. 3 chars per num plus the comma.
104-
b := []byte{}
105-
for _, n := range ns {
106-
b = strconv.AppendInt(b, int64(n), 10)
107-
b = append(b, indexSeparator)
108-
}
109-
b = b[:len(b)-1]
110-
return string(b)
111-
}

0 commit comments

Comments
 (0)