From 1af414b27341bd2a265ea658e13c2a543a830b84 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Sat, 25 Feb 2012 03:02:19 -0800 Subject: [PATCH 01/14] (core::str) add chars_iteri --- src/libcore/str.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 057c0b0d187fe..3da0979f33e04 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -57,6 +57,7 @@ export map, bytes_iter, chars_iter, + chars_iteri, split_char_iter, splitn_char_iter, words_iter, @@ -582,13 +583,19 @@ fn bytes_iter(ss: str, it: fn(u8)) { } #[doc = "Iterate over the characters in a string"] -fn chars_iter(s: str, it: fn(char)) { +fn chars_iter(ss: str, it: fn(char)) { + chars_iteri(ss, {|_ii, ch| it(ch)}) +} + +#[doc = "Iterate over the characters in a string"] +fn chars_iteri(ss: str, it: fn(uint,char)) { let mut pos = 0u; let len = len(s); + while (pos < len) { - let {ch, next} = char_range_at(s, pos); + let {ch, next} = char_range_at(ss, pos); + it(pos, ch); pos = next; - it(ch); } } From e95006d6dec2e79bb8e918993b519fdd5a743f6d Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Sat, 25 Feb 2012 19:55:40 -0800 Subject: [PATCH 02/14] (core::vec) add ends_with --- src/libcore/vec.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/libcore/vec.rs b/src/libcore/vec.rs index c760813bf7ff0..be381fc5ee267 100644 --- a/src/libcore/vec.rs +++ b/src/libcore/vec.rs @@ -869,6 +869,9 @@ fn permute(v: [T], put: fn([T])) { } } +// Function: windowed +// +// Return all sub-vectors of size `nn` fn windowed (nn: uint, xx: [const TT]) -> [[TT]] { let mut ww = []; @@ -886,6 +889,26 @@ fn windowed (nn: uint, xx: [const TT]) -> [[TT]] { ret ww; } +#[doc = " +Return true if the first vector ends with the second +(including if the second is []) +"] +fn ends_with (vvv: [TT], vv: [TT]) -> bool { + let lll = vec::len(vvv); + let ll = vec::len(vv); + + if lll < ll { ret false; } + + let delta = lll - ll; + let res = true; + + vec::riteri(vv) {|ii, elem| + if elem != vvv[delta + ii] { res = false; } + } + + ret res; +} + #[doc = " Work with the buffer of a vector. @@ -1760,6 +1783,18 @@ mod tests { unshift(x, 0); assert x == [0, 1, 2, 3]; } + + #[test] + fn test_ends_with() { + assert true == ends_with([7,7,7,0,1,2], [0,1,2]); + assert false == ends_with([7,7,7,0,1,2,7], [0,1,2]); + assert true == ends_with([0,1,2], [0,1,2]); + assert false == ends_with([0,1,2], [0,1,2,3]); + assert true == ends_with([0,1,2], []); + let empty : [uint] = []; // just feed a type into this thing + assert true == ends_with(empty, empty); + assert false == ends_with([],[0,1,2,3]); + } } // Local Variables: From ef827fbc846c4dc49b8dfca0dc70c8b44195d9c6 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Sat, 3 Mar 2012 19:10:22 -0800 Subject: [PATCH 03/14] (core::str) add Boyer-Moore string searching --- src/libcore/str.rs | 345 ++++++++++++++++++++++++++++++----- src/rustdoc/markdown_pass.rs | 1 + 2 files changed, 305 insertions(+), 41 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 3da0979f33e04..429b5078f84d4 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -69,6 +69,7 @@ export find_char, find_char_from, find_char_between, rfind_char, rfind_char_from, rfind_char_between, find_str, find_str_from, find_str_between, + findn_str, contains, starts_with, ends_with, @@ -401,31 +402,12 @@ fn split_inner(s: str, sepfn: fn(cc: char) -> bool, count: uint, result } -// FIXME use Boyer-Moore -fn iter_matches(s: str, sep: str, f: fn(uint, uint)) { - let sep_len = len(sep), l = len(s); +fn iter_matches(ss: str, sep: str, f: fn(uint, uint)) { + let sep_len = len(sep); assert sep_len > 0u; - let mut i = 0u, match_start = 0u, match_i = 0u; - - while i < l { - if s[i] == sep[match_i] { - if match_i == 0u { match_start = i; } - match_i += 1u; - // Found a match - if match_i == sep_len { - f(match_start, i + 1u); - match_i = 0u; - } - i += 1u; - } else { - // Failed match, backtrack - if match_i > 0u { - match_i = 0u; - i = match_start + 1u; - } else { - i += 1u; - } - } + + for match in findn_str(ss, sep, len(ss)) { + f(match, match + sep_len); } } @@ -1012,19 +994,203 @@ or equal to `len(s)`. "] fn find_str_between(haystack: str, needle: str, start: uint, end:uint) -> option { - // FIXME: Boyer-Moore should be significantly faster - assert end <= len(haystack); - let needle_len = len(needle); - if needle_len == 0u { ret some(start); } - if needle_len > end { ret none; } + let found = findn_str_between(haystack, needle, 1u, start, end); + alt vec::len(found) { + 0u { ret option::none; } + _nn { ret option::some(found[0u]); } + } +} - let mut i = start; - let e = end - needle_len; - while i <= e { - if match_at(haystack, needle, i) { ret some(i); } - i += 1u; +// Function: findn_str +// +// Returns up to `nn` byte positions of matched substrings +fn findn_str(haystack: str, needle: str, nn: uint) -> [uint] { + findn_str_between(haystack, needle, nn, 0u, str::len(haystack)) +} + +// Function: findn_str_between +// +// Returns up to `nn` byte positions of matched substrings +// between `start` and `end` +fn findn_str_between (haystack: str, needle: str, + nn: uint, + start: uint, end: uint) -> [uint] { + + boyer_moore_search(haystack, needle, nn, start, end) +} + +// Returns up to `nn` byte positions of matched substrings +// between `start` and `end` +// (using Boyer-Moore) +fn boyer_moore_search (haystack: str, needle: str, + nn: uint, + start: uint, end: uint) -> [uint] { + let results = []; + + let nlen = str::len(needle); + + assert start <= end; + assert end <= str::len(haystack); + let hlen = end - start; + + // empty needle + if nlen == 0u { + ret [start]; } - ret none; + + // haystack empty, or smaller than needle + if hlen == 0u || hlen < nlen { + ret []; + } + + // generate the tables + let ct = boyer_moore_unmatched_chars(needle); + let pt = boyer_moore_matching_suffixes(needle); + + // query both tables based on position + // within the needle and character in haystack + let getShift = fn@(pos: uint, ch: u8) -> uint { + let matchedSoFar = nlen - 1u - pos; + let rawCharShift = ct[ch as uint]; + let prefShift = pt[matchedSoFar]; + + if rawCharShift >= matchedSoFar { + let adjCharShift = rawCharShift - matchedSoFar; + + if adjCharShift > prefShift { + ret adjCharShift; + } + } + + ret prefShift; + }; + + // step up through the haystack + let outerii = start; + while outerii + nlen <= end { + + // step back through needle + // (checking outer range again) + let windowii = nlen; + while 0u < windowii { + + windowii -= 1u; + + // matching byte? + if needle[windowii] == haystack[outerii+windowii] { + + // needle fully matched? + // note: last decremented windowii + if windowii == 0u { + vec::push(results, outerii); + + if vec::len(results) >= nn { ret results; } + + outerii += nlen; + } + + // if not fully matched, leave outerii alone + // but decrement the windowii + + } else { + // no match or a partial match + outerii += getShift(windowii, haystack[outerii+windowii]); + break; + } + } + } + + ret results; +} + +// compute the table used to choose a shift based on +// an unmatched character's possible position within the search string +// (a.k.a. the bad-character table) +fn boyer_moore_unmatched_chars(needle: str) -> [uint] { + let len = str::len(needle); + let mm = vec::to_mut(vec::init_elt(255u, len)); + + let jj = len - 1u; // drop the last byte + + //assert 0u <= jj; + //assert jj < str::len(needle); + + // from last-1 to first + while jj > 0u { + jj -= 1u; + + let key = needle[jj] as uint; + + // if we haven't set it yet, set it now + // (besides default) + if mm[key] == len { + mm[key] = len - 1u - jj; + } + } + + ret vec::from_mut(mm); +} + +// compute the table used to choose a shift based on +// a partially matched suffix of the search string +// (a.k.a. the good-suffix table) +fn boyer_moore_matching_suffixes(needle_str: str) -> [uint] { + let needle = str::bytes(needle_str); + + let len = vec::len(needle); + //assert 0u < len; + + // initialize len chars to len + let mm = vec::to_mut(vec::init_elt(len, len)); + + // step to larger suffixes + let sii = 0u; + while sii < len { + + // tail of the needle we seek + let suffix = vec::slice(needle, len - sii, len); + let suffix_plus = vec::slice(needle, len - sii - 1u, len); + let slen = vec::len(suffix); + + // step to smaller prefixes + let pii = len - 1u; + while pii > 0u { + + // a prefix of the needle + let prefix = vec::slice(needle, 0u, pii); + let plen = vec::len(prefix); + + // if suffix fully matched, or + // prefix is bigger than suffix: only tail matched + // (which we might jump to) + if + (plen <= slen + && vec::ends_with(suffix, prefix)) + || + (slen < plen + && vec::ends_with(prefix, suffix) + && !vec::ends_with(prefix, suffix_plus)) + { + // if we haven't set it yet, set it now + // (besides default) + if mm[sii] == len { + mm[sii] = len-pii; + } + } + + pii -= 1u; + } + + // if it hasn't been set, there was no matching prefix, + // so set it now + if mm[sii] == len { + mm[sii] = len-pii; + } + + sii += 1u; + } + + ret vec::from_mut(mm); } #[doc = " @@ -1036,7 +1202,7 @@ Returns true if one string contains another * needle - The string to look for "] fn contains(haystack: str, needle: str) -> bool { - option::is_some(find_str(haystack, needle)) + option::is_some(find_str_between(haystack, needle, 0u, len(haystack))) } #[doc = " @@ -1887,14 +2053,23 @@ mod tests { #[test] fn test_find_str_between() { - // byte positions assert find_str_between("", "", 0u, 0u) == some(0u); + assert find_str_between("", "pow", 0u, 0u) == none; + assert find_str_between("donatello", "don", 0u, 9u) == some(0u); + //assert find_str_between("don", "donatello", 0u, 10u) == none; //was OK + assert find_str_between("don", "donatello", 0u, 3u) == none; //OK + } - let data = "abcabc"; - assert find_str_between(data, "ab", 0u, 6u) == some(0u); - assert find_str_between(data, "ab", 2u, 6u) == some(3u); - assert find_str_between(data, "ab", 2u, 4u) == none; + #[test] + fn test_find_str_between_ascii() { + let data0 = "abcabc"; + assert find_str_between(data0, "ab", 0u, 6u) == some(0u); + assert find_str_between(data0, "ab", 2u, 6u) == some(3u); + assert find_str_between(data0, "ab", 2u, 4u) == none; + } + #[test] + fn test_find_str_between_utf8() { let mut data = "ประเทศไทย中华Việt Nam"; data += data; assert find_str_between(data, "", 0u, 43u) == some(0u); @@ -1913,6 +2088,43 @@ mod tests { assert find_str_between(data, "Nam", 43u, 86u) == some(83u); } + #[test] + fn test_findn_str() { + assert [] == str::findn_str("banana", "apple pie", 1u); + assert [0u] == str::findn_str("abcxxxxxx", "abc", 1u); + assert [3u] == str::findn_str("xxxabcxxx", "abc", 1u); + assert [6u] == str::findn_str("xxxxxxabc", "abc", 1u); + assert [3u] == str::findn_str("xxxabcabc", "abc", 1u); + assert [3u, 6u] == str::findn_str("xxxabcabc", "abc", 5u); + assert [3u, 7u] == str::findn_str("xxxabcxabc", "abc", 5u); + assert [3u, 8u] == str::findn_str("xxxabcxxabc", "abc", 5u); + } + + #[test] + fn test_find_str_ascii() { + assert option::some(0u) == find_str("", ""); + assert option::none == find_str("banana", "apple pie"); + assert option::some(0u) == find_str("abcxxxxxx", "abc"); + assert option::some(3u) == find_str("xxxabcxxx", "abc"); + assert option::some(6u) == find_str("xxxxxxabc", "abc"); + } + + #[test] + fn test_find_str_utf8() { + let data = "ประเทศไทย中华Việt Nam"; + + assert option::some( 0u) == find_str(data, ""); + assert option::none == find_str(data, "ไท华"); + assert option::some( 0u) == find_str(data, "ประเ"); + assert option::some( 3u) == find_str(data, "ระ"); + assert option::some( 6u) == find_str(data, "ะเ"); + assert option::some(15u) == find_str(data, "ศไทย中华"); + assert option::some(18u) == find_str(data, "ไทย中华"); + assert option::some(24u) == find_str(data, "ย中华"); + assert option::some(27u) == find_str(data, "中华"); + } + + #[test] fn test_substr() { fn t(a: str, b: str, start: int) { @@ -2287,6 +2499,57 @@ mod tests { } } + #[test] + fn test_unmatched_chars_ascii () { + let ct = boyer_moore_unmatched_chars("ANPANMAN"); + + assert 1u == ct['A' as uint]; + assert 2u == ct['M' as uint]; + assert 3u == ct['N' as uint]; + assert 5u == ct['P' as uint]; + + // others + assert 8u == ct['z' as uint]; + assert 8u == ct['w' as uint]; + assert 8u == ct['x' as uint]; + } + + #[test] + fn test_unmatched_chars_utf8() { + let ct = boyer_moore_unmatched_chars("ะเ"); //e0b8b0 e0b980 + + assert 2u == ct[0x_e0_u]; + assert 4u == ct[0x_b8_u]; + assert 3u == ct[0x_b0_u]; + assert 2u == ct[0x_e0_u]; + assert 1u == ct[0x_b9_u]; + assert 6u == ct[0x_80_u]; + } + + #[test] + fn test_matching_suffixes_ascii() { + let pt = boyer_moore_matching_suffixes("ANPANMAN"); + + assert 1u == pt[0u]; // (n) + assert 8u == pt[1u]; // (a)n + assert 3u == pt[2u]; // (m)an + assert 6u == pt[3u]; // (n)man + assert 6u == pt[4u]; // (a)nman + assert 6u == pt[5u]; // (p)anman + assert 6u == pt[6u]; // (n)panman + assert 6u == pt[7u]; // (a)npanman + } + + #[test] + fn test_matching_suffixes_utf8() { + let pt = boyer_moore_matching_suffixes("ประเ"); + + assert 1u == pt[0u]; + assert 12u == pt[3u]; + assert 12u == pt[6u]; + assert 12u == pt[9u]; + } + #[test] fn test_contains() { assert contains("abcde", "bcd"); diff --git a/src/rustdoc/markdown_pass.rs b/src/rustdoc/markdown_pass.rs index ad72e55ba1054..8e26997023871 100644 --- a/src/rustdoc/markdown_pass.rs +++ b/src/rustdoc/markdown_pass.rs @@ -57,6 +57,7 @@ fn should_write_modules_last() { types of items, or else the header nesting will end up wrong, with modules appearing to contain items that they do not. */ + let markdown = test::render( "mod a { }\ fn b() { }\ From e8fb664dad609b20ecbd66620935b69c59f3f040 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Tue, 6 Mar 2012 00:18:13 -0800 Subject: [PATCH 04/14] touchups --- src/libcore/str.rs | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 429b5078f84d4..9f70751b9cb95 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -1202,7 +1202,7 @@ Returns true if one string contains another * needle - The string to look for "] fn contains(haystack: str, needle: str) -> bool { - option::is_some(find_str_between(haystack, needle, 0u, len(haystack))) + option::is_some(find_str(haystack, needle)) } #[doc = " @@ -2039,7 +2039,6 @@ mod tests { #[test] fn test_find_str() { - // byte positions assert find_str("banana", "apple pie") == none; assert find_str("", "") == some(0u); @@ -2052,16 +2051,12 @@ mod tests { } #[test] - fn test_find_str_between() { + fn test_find_str_between_ascii() { assert find_str_between("", "", 0u, 0u) == some(0u); assert find_str_between("", "pow", 0u, 0u) == none; assert find_str_between("donatello", "don", 0u, 9u) == some(0u); - //assert find_str_between("don", "donatello", 0u, 10u) == none; //was OK - assert find_str_between("don", "donatello", 0u, 3u) == none; //OK - } + assert find_str_between("don", "donatello", 0u, 3u) == none; - #[test] - fn test_find_str_between_ascii() { let data0 = "abcabc"; assert find_str_between(data0, "ab", 0u, 6u) == some(0u); assert find_str_between(data0, "ab", 2u, 6u) == some(3u); From ababa8857c791c6841b41356968e82222d5cb65e Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Tue, 6 Mar 2012 00:55:20 -0800 Subject: [PATCH 05/14] (core::str) export findn_str_between --- src/libcore/str.rs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 9f70751b9cb95..13af9891d0c67 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -69,7 +69,7 @@ export find_char, find_char_from, find_char_between, rfind_char, rfind_char_from, rfind_char_between, find_str, find_str_from, find_str_between, - findn_str, + findn_str, findn_str_between, contains, starts_with, ends_with, @@ -2057,10 +2057,10 @@ mod tests { assert find_str_between("donatello", "don", 0u, 9u) == some(0u); assert find_str_between("don", "donatello", 0u, 3u) == none; - let data0 = "abcabc"; - assert find_str_between(data0, "ab", 0u, 6u) == some(0u); - assert find_str_between(data0, "ab", 2u, 6u) == some(3u); - assert find_str_between(data0, "ab", 2u, 4u) == none; + let data = "abcabc"; + assert find_str_between(data, "ab", 0u, 6u) == some(0u); + assert find_str_between(data, "ab", 2u, 6u) == some(3u); + assert find_str_between(data, "ab", 2u, 4u) == none; } #[test] @@ -2083,6 +2083,14 @@ mod tests { assert find_str_between(data, "Nam", 43u, 86u) == some(83u); } + #[test] + fn test_findn_str_between() { + let data = "abcabc"; + assert findn_str_between(data, "ab", 2u, 0u, 6u) == [0u, 3u]; + assert findn_str_between(data, "ab", 1u, 0u, 6u) == [0u]; + assert findn_str_between(data, "ax", 1u, 0u, 6u) == []; + } + #[test] fn test_findn_str() { assert [] == str::findn_str("banana", "apple pie", 1u); From d1a26a47dcba13104c507eacaad1e0710ad175bb Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Thu, 8 Mar 2012 20:52:38 -0800 Subject: [PATCH 06/14] (core::str) add simple_search and temporarily export it and boyer_moore_search for testing --- src/libcore/str.rs | 71 ++++++++++++++++++++++++++++++++++++++++------ src/libcore/vec.rs | 2 +- 2 files changed, 64 insertions(+), 9 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 13af9891d0c67..6d30b938d1f69 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -69,7 +69,10 @@ export find_char, find_char_from, find_char_between, rfind_char, rfind_char_from, rfind_char_between, find_str, find_str_from, find_str_between, - findn_str, findn_str_between, + findn_str, + findn_str_between, + simple_search, // temp, called by findn_str_between + boyer_moore_search, // temp, called by findn_str_between contains, starts_with, ends_with, @@ -572,7 +575,7 @@ fn chars_iter(ss: str, it: fn(char)) { #[doc = "Iterate over the characters in a string"] fn chars_iteri(ss: str, it: fn(uint,char)) { let mut pos = 0u; - let len = len(s); + let len = len(ss); while (pos < len) { let {ch, next} = char_range_at(ss, pos); @@ -1019,13 +1022,65 @@ fn findn_str_between (haystack: str, needle: str, boyer_moore_search(haystack, needle, nn, start, end) } +// Returns up to `nn` byte positions of matched substrings +// between `start` and `end` +// (using a naive search algorithm) +fn simple_search (haystack: str, needle: str, + nn: uint, + start: uint, end: uint) -> [uint] { + let mut results = []; + + let nlen = str::len(needle); + + assert start <= end; + assert end <= str::len(haystack); + let hlen = end - start; + + // empty needle + if nlen == 0u { + ret [start]; + } + + // haystack empty, or smaller than needle + if hlen == 0u || hlen < nlen { + ret []; + } + + let mut ii = start, match_start = 0u, match_i = 0u; + + while ii < end { + if haystack[ii] == needle[match_i] { + if match_i == 0u { match_start = ii; } + match_i += 1u; + // Found a match + if match_i == nlen { + vec::push(results, match_start); + match_i = 0u; + + if vec::len(results) >= nn { ret results; } + } + ii += 1u; + } else { + // Failed match, backtrack + if match_i > 0u { + match_i = 0u; + ii = match_start + 1u; + } else { + ii += 1u; + } + } + } + + ret results; +} + // Returns up to `nn` byte positions of matched substrings // between `start` and `end` // (using Boyer-Moore) fn boyer_moore_search (haystack: str, needle: str, nn: uint, start: uint, end: uint) -> [uint] { - let results = []; + let mut results = []; let nlen = str::len(needle); @@ -1066,12 +1121,12 @@ fn boyer_moore_search (haystack: str, needle: str, }; // step up through the haystack - let outerii = start; + let mut outerii = start; while outerii + nlen <= end { // step back through needle // (checking outer range again) - let windowii = nlen; + let mut windowii = nlen; while 0u < windowii { windowii -= 1u; @@ -1110,7 +1165,7 @@ fn boyer_moore_unmatched_chars(needle: str) -> [uint] { let len = str::len(needle); let mm = vec::to_mut(vec::init_elt(255u, len)); - let jj = len - 1u; // drop the last byte + let mut jj = len - 1u; // drop the last byte //assert 0u <= jj; //assert jj < str::len(needle); @@ -1144,7 +1199,7 @@ fn boyer_moore_matching_suffixes(needle_str: str) -> [uint] { let mm = vec::to_mut(vec::init_elt(len, len)); // step to larger suffixes - let sii = 0u; + let mut sii = 0u; while sii < len { // tail of the needle we seek @@ -1153,7 +1208,7 @@ fn boyer_moore_matching_suffixes(needle_str: str) -> [uint] { let slen = vec::len(suffix); // step to smaller prefixes - let pii = len - 1u; + let mut pii = len - 1u; while pii > 0u { // a prefix of the needle diff --git a/src/libcore/vec.rs b/src/libcore/vec.rs index be381fc5ee267..aeb112a840d1a 100644 --- a/src/libcore/vec.rs +++ b/src/libcore/vec.rs @@ -900,7 +900,7 @@ fn ends_with (vvv: [TT], vv: [TT]) -> bool { if lll < ll { ret false; } let delta = lll - ll; - let res = true; + let mut res = true; vec::riteri(vv) {|ii, elem| if elem != vvv[delta + ii] { res = false; } From ce62b60b42853bc75b08978cda2e5d2a4734b44e Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Thu, 8 Mar 2012 20:57:24 -0800 Subject: [PATCH 07/14] touchups --- src/rustdoc/markdown_pass.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/rustdoc/markdown_pass.rs b/src/rustdoc/markdown_pass.rs index 8e26997023871..ad72e55ba1054 100644 --- a/src/rustdoc/markdown_pass.rs +++ b/src/rustdoc/markdown_pass.rs @@ -57,7 +57,6 @@ fn should_write_modules_last() { types of items, or else the header nesting will end up wrong, with modules appearing to contain items that they do not. */ - let markdown = test::render( "mod a { }\ fn b() { }\ From 80bcb8ca10a957af8d35dbafc0180bac965fec44 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Fri, 9 Mar 2012 01:22:20 -0800 Subject: [PATCH 08/14] (core::str) tweaking some assertions --- src/libcore/str.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 6d30b938d1f69..4843bdbb12e8e 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -1165,11 +1165,9 @@ fn boyer_moore_unmatched_chars(needle: str) -> [uint] { let len = str::len(needle); let mm = vec::to_mut(vec::init_elt(255u, len)); + assert 0u < len; let mut jj = len - 1u; // drop the last byte - //assert 0u <= jj; - //assert jj < str::len(needle); - // from last-1 to first while jj > 0u { jj -= 1u; @@ -1193,7 +1191,6 @@ fn boyer_moore_matching_suffixes(needle_str: str) -> [uint] { let needle = str::bytes(needle_str); let len = vec::len(needle); - //assert 0u < len; // initialize len chars to len let mm = vec::to_mut(vec::init_elt(len, len)); From ae3c3bb3a519dd0e5ae0637955c82b131fe6b30e Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Fri, 9 Mar 2012 02:22:37 -0800 Subject: [PATCH 09/14] (core::str) simplifying boyer_moore_matching_suffixes... --- src/libcore/str.rs | 49 +++++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 4843bdbb12e8e..ad2c576581033 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -928,6 +928,8 @@ fn rfind_between(s: str, start: uint, end: uint, f: fn(char) -> bool) } // Utility used by various searching functions +// Returns true if the whole needle is present in the haystack +// beginning at haystack[at] fn match_at(haystack: str, needle: str, at: uint) -> bool { let mut i = at; for c in needle { if haystack[i] != c { ret false; } i += 1u; } @@ -1190,38 +1192,55 @@ fn boyer_moore_unmatched_chars(needle: str) -> [uint] { fn boyer_moore_matching_suffixes(needle_str: str) -> [uint] { let needle = str::bytes(needle_str); - let len = vec::len(needle); + let len = vec::len(needle); // initialize len chars to len - let mm = vec::to_mut(vec::init_elt(len, len)); + let mm = vec::to_mut(vec::init_elt(len, len)); + + let range_ends_with = fn@(vvv0: uint, vvv1: uint, + vv0: uint, vv1: uint) -> bool { + // needle: [u8] + + let shortLen = vv1 - vv0; + + let mut iii = vvv1 - shortLen; + let mut ii = vv0; + + while ii < vv1 { + if needle[ii] != needle[iii] { ret false; } + ii += 1u; + iii += 1u; + } + + ret true; + }; // step to larger suffixes let mut sii = 0u; while sii < len { // tail of the needle we seek - let suffix = vec::slice(needle, len - sii, len); - let suffix_plus = vec::slice(needle, len - sii - 1u, len); - let slen = vec::len(suffix); + //let suffix = vec::slice(needle, len - sii, len); + //let suffix_plus = vec::slice(needle, len - sii - 1u, len); // step to smaller prefixes - let mut pii = len - 1u; - while pii > 0u { + let mut pii = len; + while 0u < pii { + pii -= 1u; // a prefix of the needle - let prefix = vec::slice(needle, 0u, pii); - let plen = vec::len(prefix); + //let prefix = vec::slice(needle, 0u, pii); // 0 -> pii // if suffix fully matched, or // prefix is bigger than suffix: only tail matched // (which we might jump to) if - (plen <= slen - && vec::ends_with(suffix, prefix)) + (pii <= sii + && range_ends_with(len-sii, len, 0u, pii)) || - (slen < plen - && vec::ends_with(prefix, suffix) - && !vec::ends_with(prefix, suffix_plus)) + (sii < pii + && range_ends_with(0u, pii, len-sii, len) + && needle[pii - sii -1u] != needle[len-sii - 1u]) { // if we haven't set it yet, set it now // (besides default) @@ -1229,8 +1248,6 @@ fn boyer_moore_matching_suffixes(needle_str: str) -> [uint] { mm[sii] = len-pii; } } - - pii -= 1u; } // if it hasn't been set, there was no matching prefix, From cb4e9680840ab809af278d3fed2764647bd75485 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Fri, 9 Mar 2012 02:50:35 -0800 Subject: [PATCH 10/14] (core::str) demonstrate that the suffix table currently sucks, i.e., this is currently Boyer-Moore-Horspool --- src/libcore/str.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index ad2c576581033..9a51b70965886 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -1102,24 +1102,25 @@ fn boyer_moore_search (haystack: str, needle: str, // generate the tables let ct = boyer_moore_unmatched_chars(needle); - let pt = boyer_moore_matching_suffixes(needle); + //let pt = boyer_moore_matching_suffixes(needle); // query both tables based on position // within the needle and character in haystack let getShift = fn@(pos: uint, ch: u8) -> uint { let matchedSoFar = nlen - 1u - pos; let rawCharShift = ct[ch as uint]; - let prefShift = pt[matchedSoFar]; +// let prefShift = pt[matchedSoFar]; if rawCharShift >= matchedSoFar { let adjCharShift = rawCharShift - matchedSoFar; - if adjCharShift > prefShift { +// if adjCharShift > prefShift { ret adjCharShift; - } +// } } - ret prefShift; +// ret prefShift; + ret 1u; }; // step up through the haystack From 90e903aed12a3c5ff0a9ddda112853ee52dd87c3 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Sat, 10 Mar 2012 13:20:10 -0800 Subject: [PATCH 11/14] (core::str) significantly improved boyer-moore, still testing... --- src/libcore/str.rs | 153 +++++++++++++++++++++++++-------------------- src/libcore/vec.rs | 4 +- 2 files changed, 87 insertions(+), 70 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 9a51b70965886..80d175cee33a5 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -1006,27 +1006,28 @@ fn find_str_between(haystack: str, needle: str, start: uint, end:uint) } } -// Function: findn_str -// -// Returns up to `nn` byte positions of matched substrings +#[doc = "Returns up to `nn` byte positions of matched substrings"] fn findn_str(haystack: str, needle: str, nn: uint) -> [uint] { findn_str_between(haystack, needle, nn, 0u, str::len(haystack)) } -// Function: findn_str_between -// -// Returns up to `nn` byte positions of matched substrings -// between `start` and `end` +#[doc = " +Returns up to `nn` byte positions of matched substrings +between `start` and `end` +"] fn findn_str_between (haystack: str, needle: str, nn: uint, start: uint, end: uint) -> [uint] { boyer_moore_search(haystack, needle, nn, start, end) + //simple_search(haystack, needle, nn, start, end) } -// Returns up to `nn` byte positions of matched substrings -// between `start` and `end` -// (using a naive search algorithm) +#[doc = " +Returns up to `nn` byte positions of matched substrings +between `start` and `end` +(using a naive search algorithm) +"] fn simple_search (haystack: str, needle: str, nn: uint, start: uint, end: uint) -> [uint] { @@ -1076,9 +1077,11 @@ fn simple_search (haystack: str, needle: str, ret results; } -// Returns up to `nn` byte positions of matched substrings -// between `start` and `end` -// (using Boyer-Moore) +#[doc = " +Returns up to `nn` byte positions of matched substrings +between `start` and `end` +(using Boyer-Moore) +"] fn boyer_moore_search (haystack: str, needle: str, nn: uint, start: uint, end: uint) -> [uint] { @@ -1102,25 +1105,24 @@ fn boyer_moore_search (haystack: str, needle: str, // generate the tables let ct = boyer_moore_unmatched_chars(needle); - //let pt = boyer_moore_matching_suffixes(needle); + let pt = boyer_moore_matching_suffixes(needle); // query both tables based on position // within the needle and character in haystack let getShift = fn@(pos: uint, ch: u8) -> uint { let matchedSoFar = nlen - 1u - pos; let rawCharShift = ct[ch as uint]; -// let prefShift = pt[matchedSoFar]; + let prefShift = pt[matchedSoFar]; if rawCharShift >= matchedSoFar { let adjCharShift = rawCharShift - matchedSoFar; -// if adjCharShift > prefShift { + if adjCharShift > prefShift { ret adjCharShift; -// } + } } -// ret prefShift; - ret 1u; + ret prefShift; }; // step up through the haystack @@ -1166,7 +1168,7 @@ fn boyer_moore_search (haystack: str, needle: str, // (a.k.a. the bad-character table) fn boyer_moore_unmatched_chars(needle: str) -> [uint] { let len = str::len(needle); - let mm = vec::to_mut(vec::init_elt(255u, len)); + let mm = vec::to_mut(vec::from_elem(255u, len)); assert 0u < len; let mut jj = len - 1u; // drop the last byte @@ -1196,70 +1198,87 @@ fn boyer_moore_matching_suffixes(needle_str: str) -> [uint] { let len = vec::len(needle); // initialize len chars to len - let mm = vec::to_mut(vec::init_elt(len, len)); - - let range_ends_with = fn@(vvv0: uint, vvv1: uint, - vv0: uint, vv1: uint) -> bool { - // needle: [u8] + let mm = vec::to_mut(vec::from_elem(len, len)); + + // is the suffix from here a prefix of the needle? + let is_prefix = fn@(pos: uint) -> bool { + let suffixlen = len - pos; + let mut ii = 0u; + while ii < suffixlen { + if needle[ii] != needle[pos + ii] { ret false; } + ii += 1u; + } + ret true; + }; - let shortLen = vv1 - vv0; + // if this is the end of a suffix of the word, how long is it? + let longest_suffix = fn@(pos: uint) -> uint { + let mut jj = 0u; - let mut iii = vvv1 - shortLen; - let mut ii = vv0; + // count up while matching larger suffixes with this prefix + while needle[pos - jj] == needle[len - 1u - jj] + && jj < pos + { + jj += 1u; - while ii < vv1 { - if needle[ii] != needle[iii] { ret false; } - ii += 1u; - iii += 1u; + assert pos >= jj; + assert len-1u >= jj; } - ret true; + ret jj; }; + + // step to smaller prefixes + // for the case where each suffix could contain a prefix of the needle + // i.e., suffix ends with prefix? + let mut pii = len; + let mut last_prefix_index = len - 1u; + while 0u < pii { + pii -= 1u; + + // FIXME: possible +1 issue + + // find if each possible suffix is a prefix + if is_prefix(pii + 1u) { last_prefix_index = pii + 1u; }; + ////log(error, "pref idx ->"); + ////log(error, last_prefix_index); + + ////log(error, "prefix(pii..len):"); + ////log(error, str::from_bytes(vec::slice(needle, pii, len))); + //mm[pii] = last_prefix_index + len - 1u - pii; + mm[len - 1u - pii] = last_prefix_index; + } + + + ////log(error, mm); + // step to larger suffixes + // for the case where each suffix could be part of the needle + // i.e., prefix ends with suffix? let mut sii = 0u; while sii < len { + let slen = longest_suffix(sii); + assert sii >= slen; - // tail of the needle we seek - //let suffix = vec::slice(needle, len - sii, len); - //let suffix_plus = vec::slice(needle, len - sii - 1u, len); - - // step to smaller prefixes - let mut pii = len; - while 0u < pii { - pii -= 1u; - - // a prefix of the needle - //let prefix = vec::slice(needle, 0u, pii); // 0 -> pii - - // if suffix fully matched, or - // prefix is bigger than suffix: only tail matched - // (which we might jump to) - if - (pii <= sii - && range_ends_with(len-sii, len, 0u, pii)) - || - (sii < pii - && range_ends_with(0u, pii, len-sii, len) - && needle[pii - sii -1u] != needle[len-sii - 1u]) - { - // if we haven't set it yet, set it now - // (besides default) - if mm[sii] == len { - mm[sii] = len-pii; - } - } - } + if needle[sii - slen] != needle[len - 1u - slen] { + ////log(error, "suffix(len-1-slen)"); + ////log(error, str::from_bytes(vec::slice(needle,len-slen, len))); - // if it hasn't been set, there was no matching prefix, - // so set it now - if mm[sii] == len { - mm[sii] = len-pii; + ////log(error, "sii:"); + ////log(error, sii); + + ////log(error, "slen:"); + ////log(error, slen); + + mm[slen] = len - 1u - sii; } sii += 1u; } + ////log(error, mm); + ret vec::from_mut(mm); } diff --git a/src/libcore/vec.rs b/src/libcore/vec.rs index aeb112a840d1a..9d98c61766390 100644 --- a/src/libcore/vec.rs +++ b/src/libcore/vec.rs @@ -869,9 +869,7 @@ fn permute(v: [T], put: fn([T])) { } } -// Function: windowed -// -// Return all sub-vectors of size `nn` +#[doc = "Return all sub-vectors of size `nn`"] fn windowed (nn: uint, xx: [const TT]) -> [[TT]] { let mut ww = []; From 95f5b36c8631e03f06fbe0e4d20ca27946fd1bb2 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Thu, 22 Mar 2012 01:43:40 -0700 Subject: [PATCH 12/14] (core::str) updated Boyer-Moore again, with faster good-suffix calc --- src/libcore/str.rs | 226 +++++++++++++++++++++++++++++---------------- 1 file changed, 146 insertions(+), 80 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 80d175cee33a5..09bc777cb87ea 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -1019,8 +1019,10 @@ fn findn_str_between (haystack: str, needle: str, nn: uint, start: uint, end: uint) -> [uint] { - boyer_moore_search(haystack, needle, nn, start, end) - //simple_search(haystack, needle, nn, start, end) + let BM = boyer_moore_search(haystack, needle, nn, start, end); + let SS = simple_search(haystack, needle, nn, start, end); + assert SS == BM; + ret SS; } #[doc = " @@ -1168,7 +1170,7 @@ fn boyer_moore_search (haystack: str, needle: str, // (a.k.a. the bad-character table) fn boyer_moore_unmatched_chars(needle: str) -> [uint] { let len = str::len(needle); - let mm = vec::to_mut(vec::from_elem(255u, len)); + let deltas = vec::to_mut(vec::from_elem(255u, len)); assert 0u < len; let mut jj = len - 1u; // drop the last byte @@ -1181,105 +1183,132 @@ fn boyer_moore_unmatched_chars(needle: str) -> [uint] { // if we haven't set it yet, set it now // (besides default) - if mm[key] == len { - mm[key] = len - 1u - jj; + if deltas[key] == len { + deltas[key] = len - 1u - jj; } } - ret vec::from_mut(mm); + ret vec::from_mut(deltas); } -// compute the table used to choose a shift based on -// a partially matched suffix of the search string -// (a.k.a. the good-suffix table) -fn boyer_moore_matching_suffixes(needle_str: str) -> [uint] { - let needle = str::bytes(needle_str); +// for each prefix of the search string +// find the largest suffix which is a suffix of the search string +fn boyer_moore_largest_suffixes(needle: str) -> [uint] { + let len = str::len(needle); - let len = vec::len(needle); + if len == 0u { ret []; } - // initialize len chars to len - let mm = vec::to_mut(vec::from_elem(len, len)); + let mut suffs = vec::to_mut(vec::from_elem(len, 0u)); + suffs[len - 1u] = len; - // is the suffix from here a prefix of the needle? - let is_prefix = fn@(pos: uint) -> bool { - let suffixlen = len - pos; - let mut ii = 0u; - while ii < suffixlen { - if needle[ii] != needle[pos + ii] { ret false; } - ii += 1u; - } - ret true; - }; + let mut ii = len - 1u; + let mut head = len; // index starting the previous found suffix + let mut tail = len; // index after the previous found suffix - // if this is the end of a suffix of the word, how long is it? - let longest_suffix = fn@(pos: uint) -> uint { - let mut jj = 0u; + // loop through each smaller prefix, + // keeping track of the last suffix of a prefix + // which was found to be a suffix of the needle + while 0u < ii { + ii -= 1u; - // count up while matching larger suffixes with this prefix - while needle[pos - jj] == needle[len - 1u - jj] - && jj < pos + if head < ii + 1u + && suffs[(len - 1u) - ((tail - 1u) - ii)] + head < ii + 1u { - jj += 1u; - - assert pos >= jj; - assert len-1u >= jj; - } - - ret jj; - }; - + // The needle is a suffix of itself, stored before this loop, + // so each prefix of that is matched + // with its largest possible suffix... + // + // So (bear with me) when considering prefixes + // of another matched prefix (i.e., when head <= ii < tail) + // if the corresponding maximum prefix's match is + // smaller than the space left within the current match, + // then we know this prefix's matching suffix is the same. + + // Consider: + // 01234567 + // heyyheyy + // ^ ^ + // + // When testing i=2, a match from 0-3 has already been found + // ("heyy"), and the match at i=6 ("y") fits + // in the remaining space within the current match, + // we know that suffs[2]=sufs[6]. + // + // If, however, sufs[6] was much larger, we'd have to work more. + + suffs[ii] = suffs[(len - 1u) - ((tail-1u) - ii)]; - // step to smaller prefixes - // for the case where each suffix could contain a prefix of the needle - // i.e., suffix ends with prefix? - let mut pii = len; - let mut last_prefix_index = len - 1u; - while 0u < pii { - pii -= 1u; + } else { + // Here, find the largest suffix of the needle which matches + // the prefix ending at ii. + + // move the head left + // + // Note that if the head is already further left, + // we've already explored that far and eliminated the possibility + // of smaller match, above. + if ii + 1u <= head { + head = ii + 1u; + } - // FIXME: possible +1 issue + // put the tail here (the ending of this suffix) + tail = ii + 1u; - // find if each possible suffix is a prefix - if is_prefix(pii + 1u) { last_prefix_index = pii + 1u; }; - ////log(error, "pref idx ->"); - ////log(error, last_prefix_index); + // move the head left until it is before the matching suffix + while 1u <= head + && needle[head-1u] == needle[(len - 1u) - (tail - head)] + { + head -= 1u; + } - ////log(error, "prefix(pii..len):"); - ////log(error, str::from_bytes(vec::slice(needle, pii, len))); - //mm[pii] = last_prefix_index + len - 1u - pii; - mm[len - 1u - pii] = last_prefix_index; + // store the length of this suffix + suffs[ii] = tail - head; + } } + ret vec::from_mut(suffs); +} - ////log(error, mm); - - // step to larger suffixes - // for the case where each suffix could be part of the needle - // i.e., prefix ends with suffix? - let mut sii = 0u; - while sii < len { - let slen = longest_suffix(sii); - assert sii >= slen; - - if needle[sii - slen] != needle[len - 1u - slen] { - ////log(error, "suffix(len-1-slen)"); - ////log(error, str::from_bytes(vec::slice(needle,len-slen, len))); - - ////log(error, "sii:"); - ////log(error, sii); - - ////log(error, "slen:"); - ////log(error, slen); - - mm[slen] = len - 1u - sii; +// compute the table used to choose a shift based on +// a partially matched suffix of the search string +// (a.k.a. the good-suffix table) +fn boyer_moore_matching_suffixes(needle: str) -> [uint] { + let len = str::len(needle); + + // compute the largest suffix of each prefix + let suffs = boyer_moore_largest_suffixes(needle); + + // (1) initialize deltas + let deltas = vec::to_mut(vec::from_elem(len, len)); + + // (2) step to smaller suffixes ending with ii, and + // if a whole prefix is a suffix + // set all the deltas for indexes smaller than length - 1 - ii + // to length - 1 - ii + let mut ii = len; + let mut jj = 0u; + while 0u < ii { + ii -= 1u; + + if suffs[ii] == ii + 1u { + // do not reset jj, only do this once + while ii < len - 1u - jj { + if deltas[len - 1u - jj] == len { + deltas[len - 1u - jj] = len - 1u - ii; + } + jj += 1u; + } } - - sii += 1u; } - ////log(error, mm); + // (3) then for each different matched suffix size, set the delta + let mut kk = 0u; + while 2u <= len && kk <= len - 2u { + deltas[suffs[kk]] = len - 1u - kk; + kk += 1u; + } - ret vec::from_mut(mm); + ret vec::from_mut(deltas); } #[doc = " @@ -2180,6 +2209,22 @@ mod tests { assert findn_str_between(data, "ax", 1u, 0u, 6u) == []; } + #[test] + fn test_simple_search() { + let data = "abcabc"; + assert simple_search(data, "ab", 2u, 0u, 6u) == [0u, 3u]; + assert simple_search(data, "ab", 1u, 0u, 6u) == [0u]; + assert simple_search(data, "ax", 1u, 0u, 6u) == []; + } + + #[test] + fn test_boyer_moore_search() { + let data = "abcabc"; + assert boyer_moore_search(data, "ab", 2u, 0u, 6u) == [0u, 3u]; + assert boyer_moore_search(data, "ab", 1u, 0u, 6u) == [0u]; + assert boyer_moore_search(data, "ax", 1u, 0u, 6u) == []; + } + #[test] fn test_findn_str() { assert [] == str::findn_str("banana", "apple pie", 1u); @@ -2618,8 +2663,29 @@ mod tests { assert 6u == ct[0x_80_u]; } + #[test] + fn test_boyer_moore_largest_suffixes() { + assert boyer_moore_largest_suffixes("") + == []; + + assert boyer_moore_largest_suffixes("x") + == [1u]; + + assert boyer_moore_largest_suffixes("heyyheyyheyy") + == [0u,0u,1u,4u,0u,0u,1u,8u,0u,0u,1u,12u]; + + assert boyer_moore_largest_suffixes("gcagagag") + == [1u,0u,0u,2u,0u,4u,0u,8u]; + } + #[test] fn test_matching_suffixes_ascii() { + assert [] == boyer_moore_matching_suffixes(""); + + let test1 = boyer_moore_matching_suffixes("gcagagag"); + assert test1 == [1u,7u,4u,7u,2u,7u,7u,7u]; + + let pt = boyer_moore_matching_suffixes("ANPANMAN"); assert 1u == pt[0u]; // (n) From 6e8de238d8307fd5f46f80ea9630cbc328d95261 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Mon, 26 Mar 2012 04:06:43 -0700 Subject: [PATCH 13/14] (core::str) based on testing so far, choose boyer-moore when it can be faster --- src/libcore/str.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 09bc777cb87ea..94e89adc3cf53 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -1019,10 +1019,15 @@ fn findn_str_between (haystack: str, needle: str, nn: uint, start: uint, end: uint) -> [uint] { - let BM = boyer_moore_search(haystack, needle, nn, start, end); - let SS = simple_search(haystack, needle, nn, start, end); - assert SS == BM; - ret SS; + let hl = str::len(haystack); + let nl = str::len(needle); + + // numbers subject to change... + if hl > 10*nl + 1500 && nl > 10 { + ret boyer_moore_search(haystack, needle, nn, start, end); + } else { + ret simple_search(haystack, needle, nn, start, end); + } } #[doc = " From f7aa6b23e49dbb131e6b6d7e9b3e5c86ac77c2f9 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Mon, 26 Mar 2012 04:11:08 -0700 Subject: [PATCH 14/14] touchups --- src/libcore/str.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 94e89adc3cf53..fc2858de0c117 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -1023,7 +1023,9 @@ fn findn_str_between (haystack: str, needle: str, let nl = str::len(needle); // numbers subject to change... - if hl > 10*nl + 1500 && nl > 10 { + if hl > 10u * nl + 1500u + && nl > 10u + { ret boyer_moore_search(haystack, needle, nn, start, end); } else { ret simple_search(haystack, needle, nn, start, end);