From 1af414b27341bd2a265ea658e13c2a543a830b84 Mon Sep 17 00:00:00 2001
From: Kevin Cantu <me@kevincantu.org>
Date: Sat, 25 Feb 2012 03:02:19 -0800
Subject: [PATCH 01/14] (core::str) add chars_iteri

---
 src/libcore/str.rs | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 057c0b0d187fe..3da0979f33e04 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -57,6 +57,7 @@ export
    map,
    bytes_iter,
    chars_iter,
+   chars_iteri,
    split_char_iter,
    splitn_char_iter,
    words_iter,
@@ -582,13 +583,19 @@ fn bytes_iter(ss: str, it: fn(u8)) {
 }
 
 #[doc = "Iterate over the characters in a string"]
-fn chars_iter(s: str, it: fn(char)) {
+fn chars_iter(ss: str, it: fn(char)) {
+    chars_iteri(ss, {|_ii, ch| it(ch)})
+}
+
+#[doc = "Iterate over the characters in a string"]
+fn chars_iteri(ss: str, it: fn(uint,char)) {
     let mut pos = 0u;
     let len = len(s);
+
     while (pos < len) {
-        let {ch, next} = char_range_at(s, pos);
+        let {ch, next} = char_range_at(ss, pos);
+        it(pos, ch);
         pos = next;
-        it(ch);
     }
 }
 

From e95006d6dec2e79bb8e918993b519fdd5a743f6d Mon Sep 17 00:00:00 2001
From: Kevin Cantu <me@kevincantu.org>
Date: Sat, 25 Feb 2012 19:55:40 -0800
Subject: [PATCH 02/14] (core::vec) add ends_with

---
 src/libcore/vec.rs | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/src/libcore/vec.rs b/src/libcore/vec.rs
index c760813bf7ff0..be381fc5ee267 100644
--- a/src/libcore/vec.rs
+++ b/src/libcore/vec.rs
@@ -869,6 +869,9 @@ fn permute<T: copy>(v: [T], put: fn([T])) {
   }
 }
 
+// Function: windowed
+//
+// Return all sub-vectors of size `nn`
 fn windowed <TT: copy> (nn: uint, xx: [const TT]) -> [[TT]] {
    let mut ww = [];
 
@@ -886,6 +889,26 @@ fn windowed <TT: copy> (nn: uint, xx: [const TT]) -> [[TT]] {
    ret ww;
 }
 
+#[doc = "
+Return true if the first vector ends with the second
+(including if the second is [])
+"]
+fn ends_with <TT> (vvv: [TT], vv: [TT]) -> bool {
+    let lll = vec::len(vvv);
+    let ll  = vec::len(vv);
+
+    if lll < ll { ret false; }
+
+    let delta = lll - ll;
+    let res = true;
+
+    vec::riteri(vv) {|ii, elem|
+        if elem != vvv[delta + ii] { res = false; }
+    }
+
+    ret res;
+}
+
 #[doc = "
 Work with the buffer of a vector.
 
@@ -1760,6 +1783,18 @@ mod tests {
         unshift(x, 0);
         assert x == [0, 1, 2, 3];
     }
+
+    #[test]
+    fn test_ends_with() {
+        assert true  == ends_with([7,7,7,0,1,2], [0,1,2]);
+        assert false == ends_with([7,7,7,0,1,2,7], [0,1,2]);
+        assert true  == ends_with([0,1,2], [0,1,2]);
+        assert false == ends_with([0,1,2], [0,1,2,3]);
+        assert true  == ends_with([0,1,2], []);
+        let empty : [uint] = []; // just feed a type into this thing
+        assert true  == ends_with(empty, empty);
+        assert false == ends_with([],[0,1,2,3]);
+    }
 }
 
 // Local Variables:

From ef827fbc846c4dc49b8dfca0dc70c8b44195d9c6 Mon Sep 17 00:00:00 2001
From: Kevin Cantu <me@kevincantu.org>
Date: Sat, 3 Mar 2012 19:10:22 -0800
Subject: [PATCH 03/14] (core::str) add Boyer-Moore string searching

---
 src/libcore/str.rs           | 345 ++++++++++++++++++++++++++++++-----
 src/rustdoc/markdown_pass.rs |   1 +
 2 files changed, 305 insertions(+), 41 deletions(-)

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 3da0979f33e04..429b5078f84d4 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -69,6 +69,7 @@ export
    find_char, find_char_from, find_char_between,
    rfind_char, rfind_char_from, rfind_char_between,
    find_str, find_str_from, find_str_between,
+   findn_str,
    contains,
    starts_with,
    ends_with,
@@ -401,31 +402,12 @@ fn split_inner(s: str, sepfn: fn(cc: char) -> bool, count: uint,
     result
 }
 
-// FIXME use Boyer-Moore
-fn iter_matches(s: str, sep: str, f: fn(uint, uint)) {
-    let sep_len = len(sep), l = len(s);
+fn iter_matches(ss: str, sep: str, f: fn(uint, uint)) {
+    let sep_len = len(sep);
     assert sep_len > 0u;
-    let mut i = 0u, match_start = 0u, match_i = 0u;
-
-    while i < l {
-        if s[i] == sep[match_i] {
-            if match_i == 0u { match_start = i; }
-            match_i += 1u;
-            // Found a match
-            if match_i == sep_len {
-                f(match_start, i + 1u);
-                match_i = 0u;
-            }
-            i += 1u;
-        } else {
-            // Failed match, backtrack
-            if match_i > 0u {
-                match_i = 0u;
-                i = match_start + 1u;
-            } else {
-                i += 1u;
-            }
-        }
+
+    for match in findn_str(ss, sep, len(ss)) {
+        f(match, match + sep_len);
     }
 }
 
@@ -1012,19 +994,203 @@ or equal to `len(s)`.
 "]
 fn find_str_between(haystack: str, needle: str, start: uint, end:uint)
   -> option<uint> {
-    // FIXME: Boyer-Moore should be significantly faster
-    assert end <= len(haystack);
-    let needle_len = len(needle);
-    if needle_len == 0u { ret some(start); }
-    if needle_len > end { ret none; }
+    let found = findn_str_between(haystack, needle, 1u, start, end);
+    alt vec::len(found) {
+        0u  { ret option::none; }
+        _nn { ret option::some(found[0u]); }
+    }
+}
 
-    let mut i = start;
-    let e = end - needle_len;
-    while i <= e {
-        if match_at(haystack, needle, i) { ret some(i); }
-        i += 1u;
+// Function: findn_str
+//
+// Returns up to `nn` byte positions of matched substrings
+fn findn_str(haystack: str, needle: str, nn: uint) -> [uint] {
+    findn_str_between(haystack, needle, nn, 0u, str::len(haystack))
+}
+
+// Function: findn_str_between
+//
+// Returns up to `nn` byte positions of matched substrings
+// between `start` and `end`
+fn findn_str_between (haystack: str, needle: str,
+                      nn: uint,
+                      start: uint, end: uint) -> [uint] {
+
+    boyer_moore_search(haystack, needle, nn, start, end)
+}
+
+// Returns up to `nn` byte positions of matched substrings
+// between `start` and `end`
+// (using Boyer-Moore)
+fn boyer_moore_search (haystack: str, needle: str,
+                      nn: uint,
+                      start: uint, end: uint) -> [uint] {
+    let results = [];
+
+    let nlen = str::len(needle);
+
+    assert start <= end;
+    assert end <= str::len(haystack);
+    let hlen = end - start;
+
+    // empty needle
+    if nlen == 0u {
+        ret [start];
     }
-    ret none;
+
+    // haystack empty, or smaller than needle
+    if hlen == 0u || hlen < nlen {
+        ret [];
+    }
+
+    // generate the tables
+    let ct = boyer_moore_unmatched_chars(needle);
+    let pt = boyer_moore_matching_suffixes(needle);
+
+    // query both tables based on position
+    // within the needle and character in haystack
+    let getShift = fn@(pos: uint, ch: u8) -> uint {
+        let matchedSoFar = nlen - 1u - pos;
+        let rawCharShift = ct[ch as uint];
+        let prefShift    = pt[matchedSoFar];
+
+        if rawCharShift >= matchedSoFar {
+           let adjCharShift = rawCharShift - matchedSoFar;
+
+           if adjCharShift > prefShift {
+               ret adjCharShift;
+           }
+        }
+
+        ret prefShift;
+    };
+
+    // step up through the haystack
+    let outerii = start;
+    while outerii + nlen <= end {
+
+        // step back through needle
+        // (checking outer range again)
+        let windowii = nlen;
+        while 0u < windowii {
+
+            windowii -= 1u;
+
+            // matching byte?
+            if needle[windowii] == haystack[outerii+windowii] {
+
+                // needle fully matched?
+                // note: last decremented windowii
+                if windowii == 0u {
+                    vec::push(results, outerii);
+
+                    if vec::len(results) >= nn { ret results; }
+
+                    outerii += nlen;
+                }
+
+                // if not fully matched, leave outerii alone
+                // but decrement the windowii
+
+            } else {
+                // no match or a partial match
+                outerii += getShift(windowii, haystack[outerii+windowii]);
+                break;
+            }
+        }
+    }
+
+    ret results;
+}
+
+// compute the table used to choose a shift based on
+// an unmatched character's possible position within the search string
+// (a.k.a. the bad-character table)
+fn boyer_moore_unmatched_chars(needle: str) -> [uint] {
+    let len = str::len(needle);
+    let mm  = vec::to_mut(vec::init_elt(255u, len));
+
+    let jj = len - 1u; // drop the last byte
+
+    //assert 0u <= jj;
+    //assert       jj < str::len(needle);
+
+    // from last-1 to first
+    while jj > 0u {
+        jj -= 1u;
+
+        let key = needle[jj] as uint;
+
+        // if we haven't set it yet, set it now
+        // (besides default)
+        if mm[key] == len {
+            mm[key] = len - 1u - jj;
+        }
+    }
+
+    ret vec::from_mut(mm);
+}
+
+// compute the table used to choose a shift based on
+// a partially matched suffix of the search string
+// (a.k.a. the good-suffix table)
+fn boyer_moore_matching_suffixes(needle_str: str) -> [uint] {
+    let needle = str::bytes(needle_str);
+
+    let len   = vec::len(needle);
+    //assert 0u < len;
+
+    // initialize len chars to len
+    let mm  = vec::to_mut(vec::init_elt(len, len));
+
+    // step to larger suffixes
+    let sii = 0u;
+    while sii < len {
+
+        // tail of the needle we seek
+        let suffix      = vec::slice(needle, len - sii,      len);
+        let suffix_plus = vec::slice(needle, len - sii - 1u, len);
+        let slen = vec::len(suffix);
+
+        // step to smaller prefixes
+        let pii = len - 1u;
+        while pii > 0u {
+
+            // a prefix of the needle
+            let prefix = vec::slice(needle, 0u, pii);
+            let plen = vec::len(prefix);
+
+            // if suffix fully matched, or
+            // prefix is bigger than suffix: only tail matched
+            // (which we might jump to)
+            if
+                (plen <= slen
+                 && vec::ends_with(suffix, prefix))
+            ||
+                (slen < plen
+                 && vec::ends_with(prefix, suffix)
+                 && !vec::ends_with(prefix, suffix_plus))
+            {
+                // if we haven't set it yet, set it now
+                // (besides default)
+                if mm[sii] == len {
+                    mm[sii] = len-pii;
+                }
+            }
+
+            pii -= 1u;
+        }
+
+        // if it hasn't been set, there was no matching prefix,
+        // so set it now
+        if mm[sii] == len {
+            mm[sii] = len-pii;
+        }
+
+        sii += 1u;
+    }
+
+    ret vec::from_mut(mm);
 }
 
 #[doc = "
@@ -1036,7 +1202,7 @@ Returns true if one string contains another
 * needle - The string to look for
 "]
 fn contains(haystack: str, needle: str) -> bool {
-    option::is_some(find_str(haystack, needle))
+    option::is_some(find_str_between(haystack, needle, 0u, len(haystack)))
 }
 
 #[doc = "
@@ -1887,14 +2053,23 @@ mod tests {
 
     #[test]
     fn test_find_str_between() {
-        // byte positions
         assert find_str_between("", "", 0u, 0u) == some(0u);
+        assert find_str_between("", "pow", 0u, 0u) == none;
+        assert find_str_between("donatello", "don", 0u, 9u) == some(0u);
+      //assert find_str_between("don", "donatello", 0u, 10u) == none; //was OK
+        assert find_str_between("don", "donatello", 0u, 3u) == none; //OK
+    }
 
-        let data = "abcabc";
-        assert find_str_between(data, "ab", 0u, 6u) == some(0u);
-        assert find_str_between(data, "ab", 2u, 6u) == some(3u);
-        assert find_str_between(data, "ab", 2u, 4u) == none;
+    #[test]
+    fn test_find_str_between_ascii() {
+        let data0 = "abcabc";
+        assert find_str_between(data0, "ab", 0u, 6u) == some(0u);
+        assert find_str_between(data0, "ab", 2u, 6u) == some(3u);
+        assert find_str_between(data0, "ab", 2u, 4u) == none;
+    }
 
+    #[test]
+    fn test_find_str_between_utf8() {
         let mut data = "ประเทศไทย中华Việt Nam";
         data += data;
         assert find_str_between(data, "", 0u, 43u) == some(0u);
@@ -1913,6 +2088,43 @@ mod tests {
         assert find_str_between(data, "Nam", 43u, 86u) == some(83u);
     }
 
+    #[test]
+    fn test_findn_str() {
+        assert []       == str::findn_str("banana", "apple pie", 1u);
+        assert [0u]     == str::findn_str("abcxxxxxx", "abc", 1u);
+        assert [3u]     == str::findn_str("xxxabcxxx", "abc", 1u);
+        assert [6u]     == str::findn_str("xxxxxxabc", "abc", 1u);
+        assert [3u]     == str::findn_str("xxxabcabc", "abc", 1u);
+        assert [3u, 6u] == str::findn_str("xxxabcabc", "abc", 5u);
+        assert [3u, 7u] == str::findn_str("xxxabcxabc", "abc", 5u);
+        assert [3u, 8u] == str::findn_str("xxxabcxxabc", "abc", 5u);
+    }
+
+    #[test]
+    fn test_find_str_ascii() {
+        assert option::some(0u) == find_str("", "");
+        assert option::none     == find_str("banana", "apple pie");
+        assert option::some(0u) == find_str("abcxxxxxx", "abc");
+        assert option::some(3u) == find_str("xxxabcxxx", "abc");
+        assert option::some(6u) == find_str("xxxxxxabc", "abc");
+    }
+
+    #[test]
+    fn test_find_str_utf8() {
+        let data = "ประเทศไทย中华Việt Nam";
+
+        assert option::some( 0u) == find_str(data, "");
+        assert option::none      == find_str(data, "ไท华");
+        assert option::some( 0u) == find_str(data, "ประเ");
+        assert option::some( 3u) == find_str(data, "ระ");
+        assert option::some( 6u) == find_str(data, "ะเ");
+        assert option::some(15u) == find_str(data, "ศไทย中华");
+        assert option::some(18u) == find_str(data, "ไทย中华");
+        assert option::some(24u) == find_str(data, "ย中华");
+        assert option::some(27u) == find_str(data, "中华");
+    }
+
+
     #[test]
     fn test_substr() {
         fn t(a: str, b: str, start: int) {
@@ -2287,6 +2499,57 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_unmatched_chars_ascii () {
+        let ct = boyer_moore_unmatched_chars("ANPANMAN");
+
+        assert 1u == ct['A' as uint];
+        assert 2u == ct['M' as uint];
+        assert 3u == ct['N' as uint];
+        assert 5u == ct['P' as uint];
+
+        // others
+        assert 8u == ct['z' as uint];
+        assert 8u == ct['w' as uint];
+        assert 8u == ct['x' as uint];
+    }
+
+    #[test]
+    fn test_unmatched_chars_utf8() {
+        let ct = boyer_moore_unmatched_chars("ะเ"); //e0b8b0 e0b980
+
+        assert 2u == ct[0x_e0_u];
+        assert 4u == ct[0x_b8_u];
+        assert 3u == ct[0x_b0_u];
+        assert 2u == ct[0x_e0_u];
+        assert 1u == ct[0x_b9_u];
+        assert 6u == ct[0x_80_u];
+    }
+
+    #[test]
+    fn test_matching_suffixes_ascii() {
+        let pt = boyer_moore_matching_suffixes("ANPANMAN");
+
+        assert 1u == pt[0u]; //        (n)
+        assert 8u == pt[1u]; //       (a)n
+        assert 3u == pt[2u]; //      (m)an
+        assert 6u == pt[3u]; //     (n)man
+        assert 6u == pt[4u]; //    (a)nman
+        assert 6u == pt[5u]; //   (p)anman
+        assert 6u == pt[6u]; //  (n)panman
+        assert 6u == pt[7u]; // (a)npanman
+    }
+
+    #[test]
+    fn test_matching_suffixes_utf8() {
+        let pt = boyer_moore_matching_suffixes("ประเ");
+
+        assert  1u == pt[0u];
+        assert 12u == pt[3u];
+        assert 12u == pt[6u];
+        assert 12u == pt[9u];
+    }
+
     #[test]
     fn test_contains() {
         assert contains("abcde", "bcd");
diff --git a/src/rustdoc/markdown_pass.rs b/src/rustdoc/markdown_pass.rs
index ad72e55ba1054..8e26997023871 100644
--- a/src/rustdoc/markdown_pass.rs
+++ b/src/rustdoc/markdown_pass.rs
@@ -57,6 +57,7 @@ fn should_write_modules_last() {
     types of items, or else the header nesting will end up wrong, with
     modules appearing to contain items that they do not.
     */
+
     let markdown = test::render(
         "mod a { }\
          fn b() { }\

From e8fb664dad609b20ecbd66620935b69c59f3f040 Mon Sep 17 00:00:00 2001
From: Kevin Cantu <me@kevincantu.org>
Date: Tue, 6 Mar 2012 00:18:13 -0800
Subject: [PATCH 04/14] touchups

---
 src/libcore/str.rs | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 429b5078f84d4..9f70751b9cb95 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -1202,7 +1202,7 @@ Returns true if one string contains another
 * needle - The string to look for
 "]
 fn contains(haystack: str, needle: str) -> bool {
-    option::is_some(find_str_between(haystack, needle, 0u, len(haystack)))
+    option::is_some(find_str(haystack, needle))
 }
 
 #[doc = "
@@ -2039,7 +2039,6 @@ mod tests {
 
     #[test]
     fn test_find_str() {
-        // byte positions
         assert find_str("banana", "apple pie") == none;
         assert find_str("", "") == some(0u);
 
@@ -2052,16 +2051,12 @@ mod tests {
     }
 
     #[test]
-    fn test_find_str_between() {
+    fn test_find_str_between_ascii() {
         assert find_str_between("", "", 0u, 0u) == some(0u);
         assert find_str_between("", "pow", 0u, 0u) == none;
         assert find_str_between("donatello", "don", 0u, 9u) == some(0u);
-      //assert find_str_between("don", "donatello", 0u, 10u) == none; //was OK
-        assert find_str_between("don", "donatello", 0u, 3u) == none; //OK
-    }
+        assert find_str_between("don", "donatello", 0u, 3u) == none;
 
-    #[test]
-    fn test_find_str_between_ascii() {
         let data0 = "abcabc";
         assert find_str_between(data0, "ab", 0u, 6u) == some(0u);
         assert find_str_between(data0, "ab", 2u, 6u) == some(3u);

From ababa8857c791c6841b41356968e82222d5cb65e Mon Sep 17 00:00:00 2001
From: Kevin Cantu <me@kevincantu.org>
Date: Tue, 6 Mar 2012 00:55:20 -0800
Subject: [PATCH 05/14] (core::str) export findn_str_between

---
 src/libcore/str.rs | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 9f70751b9cb95..13af9891d0c67 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -69,7 +69,7 @@ export
    find_char, find_char_from, find_char_between,
    rfind_char, rfind_char_from, rfind_char_between,
    find_str, find_str_from, find_str_between,
-   findn_str,
+   findn_str, findn_str_between,
    contains,
    starts_with,
    ends_with,
@@ -2057,10 +2057,10 @@ mod tests {
         assert find_str_between("donatello", "don", 0u, 9u) == some(0u);
         assert find_str_between("don", "donatello", 0u, 3u) == none;
 
-        let data0 = "abcabc";
-        assert find_str_between(data0, "ab", 0u, 6u) == some(0u);
-        assert find_str_between(data0, "ab", 2u, 6u) == some(3u);
-        assert find_str_between(data0, "ab", 2u, 4u) == none;
+        let data = "abcabc";
+        assert find_str_between(data, "ab", 0u, 6u) == some(0u);
+        assert find_str_between(data, "ab", 2u, 6u) == some(3u);
+        assert find_str_between(data, "ab", 2u, 4u) == none;
     }
 
     #[test]
@@ -2083,6 +2083,14 @@ mod tests {
         assert find_str_between(data, "Nam", 43u, 86u) == some(83u);
     }
 
+    #[test]
+    fn test_findn_str_between() {
+        let data = "abcabc";
+        assert findn_str_between(data, "ab", 2u, 0u, 6u) == [0u, 3u];
+        assert findn_str_between(data, "ab", 1u, 0u, 6u) == [0u];
+        assert findn_str_between(data, "ax", 1u, 0u, 6u) == [];
+    }
+
     #[test]
     fn test_findn_str() {
         assert []       == str::findn_str("banana", "apple pie", 1u);

From d1a26a47dcba13104c507eacaad1e0710ad175bb Mon Sep 17 00:00:00 2001
From: Kevin Cantu <me@kevincantu.org>
Date: Thu, 8 Mar 2012 20:52:38 -0800
Subject: [PATCH 06/14] (core::str) add simple_search and temporarily export it
 and boyer_moore_search for testing

---
 src/libcore/str.rs | 71 ++++++++++++++++++++++++++++++++++++++++------
 src/libcore/vec.rs |  2 +-
 2 files changed, 64 insertions(+), 9 deletions(-)

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 13af9891d0c67..6d30b938d1f69 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -69,7 +69,10 @@ export
    find_char, find_char_from, find_char_between,
    rfind_char, rfind_char_from, rfind_char_between,
    find_str, find_str_from, find_str_between,
-   findn_str, findn_str_between,
+   findn_str,
+   findn_str_between,
+   simple_search,      // temp, called by findn_str_between
+   boyer_moore_search, // temp, called by findn_str_between
    contains,
    starts_with,
    ends_with,
@@ -572,7 +575,7 @@ fn chars_iter(ss: str, it: fn(char)) {
 #[doc = "Iterate over the characters in a string"]
 fn chars_iteri(ss: str, it: fn(uint,char)) {
     let mut pos = 0u;
-    let len = len(s);
+    let len = len(ss);
 
     while (pos < len) {
         let {ch, next} = char_range_at(ss, pos);
@@ -1019,13 +1022,65 @@ fn findn_str_between (haystack: str, needle: str,
     boyer_moore_search(haystack, needle, nn, start, end)
 }
 
+// Returns up to `nn` byte positions of matched substrings
+// between `start` and `end`
+// (using a naive search algorithm)
+fn simple_search (haystack: str, needle: str,
+                      nn: uint,
+                      start: uint, end: uint) -> [uint] {
+    let mut results = [];
+
+    let nlen = str::len(needle);
+
+    assert start <= end;
+    assert end <= str::len(haystack);
+    let hlen = end - start;
+
+    // empty needle
+    if nlen == 0u {
+        ret [start];
+    }
+
+    // haystack empty, or smaller than needle
+    if hlen == 0u || hlen < nlen {
+        ret [];
+    }
+
+    let mut ii = start, match_start = 0u, match_i = 0u;
+
+    while ii < end {
+        if haystack[ii] == needle[match_i] {
+            if match_i == 0u { match_start = ii; }
+            match_i += 1u;
+            // Found a match
+            if match_i == nlen {
+                vec::push(results, match_start);
+                match_i = 0u;
+
+                if vec::len(results) >= nn { ret results; }
+            }
+            ii += 1u;
+        } else {
+            // Failed match, backtrack
+            if match_i > 0u {
+                match_i = 0u;
+                ii = match_start + 1u;
+            } else {
+                ii += 1u;
+            }
+        }
+    }
+
+    ret results;
+}
+
 // Returns up to `nn` byte positions of matched substrings
 // between `start` and `end`
 // (using Boyer-Moore)
 fn boyer_moore_search (haystack: str, needle: str,
                       nn: uint,
                       start: uint, end: uint) -> [uint] {
-    let results = [];
+    let mut results = [];
 
     let nlen = str::len(needle);
 
@@ -1066,12 +1121,12 @@ fn boyer_moore_search (haystack: str, needle: str,
     };
 
     // step up through the haystack
-    let outerii = start;
+    let mut outerii = start;
     while outerii + nlen <= end {
 
         // step back through needle
         // (checking outer range again)
-        let windowii = nlen;
+        let mut windowii = nlen;
         while 0u < windowii {
 
             windowii -= 1u;
@@ -1110,7 +1165,7 @@ fn boyer_moore_unmatched_chars(needle: str) -> [uint] {
     let len = str::len(needle);
     let mm  = vec::to_mut(vec::init_elt(255u, len));
 
-    let jj = len - 1u; // drop the last byte
+    let mut jj = len - 1u; // drop the last byte
 
     //assert 0u <= jj;
     //assert       jj < str::len(needle);
@@ -1144,7 +1199,7 @@ fn boyer_moore_matching_suffixes(needle_str: str) -> [uint] {
     let mm  = vec::to_mut(vec::init_elt(len, len));
 
     // step to larger suffixes
-    let sii = 0u;
+    let mut sii = 0u;
     while sii < len {
 
         // tail of the needle we seek
@@ -1153,7 +1208,7 @@ fn boyer_moore_matching_suffixes(needle_str: str) -> [uint] {
         let slen = vec::len(suffix);
 
         // step to smaller prefixes
-        let pii = len - 1u;
+        let mut pii = len - 1u;
         while pii > 0u {
 
             // a prefix of the needle
diff --git a/src/libcore/vec.rs b/src/libcore/vec.rs
index be381fc5ee267..aeb112a840d1a 100644
--- a/src/libcore/vec.rs
+++ b/src/libcore/vec.rs
@@ -900,7 +900,7 @@ fn ends_with <TT> (vvv: [TT], vv: [TT]) -> bool {
     if lll < ll { ret false; }
 
     let delta = lll - ll;
-    let res = true;
+    let mut res = true;
 
     vec::riteri(vv) {|ii, elem|
         if elem != vvv[delta + ii] { res = false; }

From ce62b60b42853bc75b08978cda2e5d2a4734b44e Mon Sep 17 00:00:00 2001
From: Kevin Cantu <me@kevincantu.org>
Date: Thu, 8 Mar 2012 20:57:24 -0800
Subject: [PATCH 07/14] touchups

---
 src/rustdoc/markdown_pass.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/rustdoc/markdown_pass.rs b/src/rustdoc/markdown_pass.rs
index 8e26997023871..ad72e55ba1054 100644
--- a/src/rustdoc/markdown_pass.rs
+++ b/src/rustdoc/markdown_pass.rs
@@ -57,7 +57,6 @@ fn should_write_modules_last() {
     types of items, or else the header nesting will end up wrong, with
     modules appearing to contain items that they do not.
     */
-
     let markdown = test::render(
         "mod a { }\
          fn b() { }\

From 80bcb8ca10a957af8d35dbafc0180bac965fec44 Mon Sep 17 00:00:00 2001
From: Kevin Cantu <me@kevincantu.org>
Date: Fri, 9 Mar 2012 01:22:20 -0800
Subject: [PATCH 08/14] (core::str) tweaking some assertions

---
 src/libcore/str.rs | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 6d30b938d1f69..4843bdbb12e8e 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -1165,11 +1165,9 @@ fn boyer_moore_unmatched_chars(needle: str) -> [uint] {
     let len = str::len(needle);
     let mm  = vec::to_mut(vec::init_elt(255u, len));
 
+    assert 0u < len;
     let mut jj = len - 1u; // drop the last byte
 
-    //assert 0u <= jj;
-    //assert       jj < str::len(needle);
-
     // from last-1 to first
     while jj > 0u {
         jj -= 1u;
@@ -1193,7 +1191,6 @@ fn boyer_moore_matching_suffixes(needle_str: str) -> [uint] {
     let needle = str::bytes(needle_str);
 
     let len   = vec::len(needle);
-    //assert 0u < len;
 
     // initialize len chars to len
     let mm  = vec::to_mut(vec::init_elt(len, len));

From ae3c3bb3a519dd0e5ae0637955c82b131fe6b30e Mon Sep 17 00:00:00 2001
From: Kevin Cantu <me@kevincantu.org>
Date: Fri, 9 Mar 2012 02:22:37 -0800
Subject: [PATCH 09/14] (core::str) simplifying
 boyer_moore_matching_suffixes...

---
 src/libcore/str.rs | 49 +++++++++++++++++++++++++++++++---------------
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 4843bdbb12e8e..ad2c576581033 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -928,6 +928,8 @@ fn rfind_between(s: str, start: uint, end: uint, f: fn(char) -> bool)
 }
 
 // Utility used by various searching functions
+// Returns true if the whole needle is present in the haystack
+// beginning at haystack[at]
 fn match_at(haystack: str, needle: str, at: uint) -> bool {
     let mut i = at;
     for c in needle { if haystack[i] != c { ret false; } i += 1u; }
@@ -1190,38 +1192,55 @@ fn boyer_moore_unmatched_chars(needle: str) -> [uint] {
 fn boyer_moore_matching_suffixes(needle_str: str) -> [uint] {
     let needle = str::bytes(needle_str);
 
-    let len   = vec::len(needle);
+    let len = vec::len(needle);
 
     // initialize len chars to len
-    let mm  = vec::to_mut(vec::init_elt(len, len));
+    let mm = vec::to_mut(vec::init_elt(len, len));
+
+    let range_ends_with = fn@(vvv0: uint, vvv1: uint,
+                             vv0:  uint, vv1:  uint) -> bool {
+        // needle: [u8]
+
+        let shortLen = vv1 - vv0;
+
+        let mut iii = vvv1 - shortLen;
+        let mut ii = vv0;
+
+        while ii < vv1 {
+            if needle[ii] != needle[iii] { ret false; }
+            ii += 1u;
+            iii += 1u;
+        }
+
+        ret true;
+    };
 
     // step to larger suffixes
     let mut sii = 0u;
     while sii < len {
 
         // tail of the needle we seek
-        let suffix      = vec::slice(needle, len - sii,      len);
-        let suffix_plus = vec::slice(needle, len - sii - 1u, len);
-        let slen = vec::len(suffix);
+        //let suffix      = vec::slice(needle, len - sii,      len);
+        //let suffix_plus = vec::slice(needle, len - sii - 1u, len);
 
         // step to smaller prefixes
-        let mut pii = len - 1u;
-        while pii > 0u {
+        let mut pii = len;
+        while 0u < pii {
+            pii -= 1u;
 
             // a prefix of the needle
-            let prefix = vec::slice(needle, 0u, pii);
-            let plen = vec::len(prefix);
+            //let prefix = vec::slice(needle, 0u, pii); // 0 -> pii
 
             // if suffix fully matched, or
             // prefix is bigger than suffix: only tail matched
             // (which we might jump to)
             if
-                (plen <= slen
-                 && vec::ends_with(suffix, prefix))
+                (pii <= sii
+                 && range_ends_with(len-sii, len, 0u, pii))
             ||
-                (slen < plen
-                 && vec::ends_with(prefix, suffix)
-                 && !vec::ends_with(prefix, suffix_plus))
+                (sii < pii
+                 && range_ends_with(0u, pii, len-sii, len)
+                 && needle[pii - sii -1u] != needle[len-sii - 1u])
             {
                 // if we haven't set it yet, set it now
                 // (besides default)
@@ -1229,8 +1248,6 @@ fn boyer_moore_matching_suffixes(needle_str: str) -> [uint] {
                     mm[sii] = len-pii;
                 }
             }
-
-            pii -= 1u;
         }
 
         // if it hasn't been set, there was no matching prefix,

From cb4e9680840ab809af278d3fed2764647bd75485 Mon Sep 17 00:00:00 2001
From: Kevin Cantu <me@kevincantu.org>
Date: Fri, 9 Mar 2012 02:50:35 -0800
Subject: [PATCH 10/14] (core::str) demonstrate that the suffix table currently
 sucks, i.e., this is currently Boyer-Moore-Horspool

---
 src/libcore/str.rs | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index ad2c576581033..9a51b70965886 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -1102,24 +1102,25 @@ fn boyer_moore_search (haystack: str, needle: str,
 
     // generate the tables
     let ct = boyer_moore_unmatched_chars(needle);
-    let pt = boyer_moore_matching_suffixes(needle);
+    //let pt = boyer_moore_matching_suffixes(needle);
 
     // query both tables based on position
     // within the needle and character in haystack
     let getShift = fn@(pos: uint, ch: u8) -> uint {
         let matchedSoFar = nlen - 1u - pos;
         let rawCharShift = ct[ch as uint];
-        let prefShift    = pt[matchedSoFar];
+//        let prefShift    = pt[matchedSoFar];
 
         if rawCharShift >= matchedSoFar {
            let adjCharShift = rawCharShift - matchedSoFar;
 
-           if adjCharShift > prefShift {
+//           if adjCharShift > prefShift {
                ret adjCharShift;
-           }
+//           }
         }
 
-        ret prefShift;
+//        ret prefShift;
+        ret 1u;
     };
 
     // step up through the haystack

From 90e903aed12a3c5ff0a9ddda112853ee52dd87c3 Mon Sep 17 00:00:00 2001
From: Kevin Cantu <me@kevincantu.org>
Date: Sat, 10 Mar 2012 13:20:10 -0800
Subject: [PATCH 11/14] (core::str) significantly improved boyer-moore, still
 testing...

---
 src/libcore/str.rs | 153 +++++++++++++++++++++++++--------------------
 src/libcore/vec.rs |   4 +-
 2 files changed, 87 insertions(+), 70 deletions(-)

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 9a51b70965886..80d175cee33a5 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -1006,27 +1006,28 @@ fn find_str_between(haystack: str, needle: str, start: uint, end:uint)
     }
 }
 
-// Function: findn_str
-//
-// Returns up to `nn` byte positions of matched substrings
+#[doc = "Returns up to `nn` byte positions of matched substrings"]
 fn findn_str(haystack: str, needle: str, nn: uint) -> [uint] {
     findn_str_between(haystack, needle, nn, 0u, str::len(haystack))
 }
 
-// Function: findn_str_between
-//
-// Returns up to `nn` byte positions of matched substrings
-// between `start` and `end`
+#[doc = "
+Returns up to `nn` byte positions of matched substrings
+between `start` and `end`
+"]
 fn findn_str_between (haystack: str, needle: str,
                       nn: uint,
                       start: uint, end: uint) -> [uint] {
 
     boyer_moore_search(haystack, needle, nn, start, end)
+    //simple_search(haystack, needle, nn, start, end)
 }
 
-// Returns up to `nn` byte positions of matched substrings
-// between `start` and `end`
-// (using a naive search algorithm)
+#[doc = "
+Returns up to `nn` byte positions of matched substrings
+between `start` and `end`
+(using a naive search algorithm)
+"]
 fn simple_search (haystack: str, needle: str,
                       nn: uint,
                       start: uint, end: uint) -> [uint] {
@@ -1076,9 +1077,11 @@ fn simple_search (haystack: str, needle: str,
     ret results;
 }
 
-// Returns up to `nn` byte positions of matched substrings
-// between `start` and `end`
-// (using Boyer-Moore)
+#[doc = "
+Returns up to `nn` byte positions of matched substrings
+between `start` and `end`
+(using Boyer-Moore)
+"]
 fn boyer_moore_search (haystack: str, needle: str,
                       nn: uint,
                       start: uint, end: uint) -> [uint] {
@@ -1102,25 +1105,24 @@ fn boyer_moore_search (haystack: str, needle: str,
 
     // generate the tables
     let ct = boyer_moore_unmatched_chars(needle);
-    //let pt = boyer_moore_matching_suffixes(needle);
+    let pt = boyer_moore_matching_suffixes(needle);
 
     // query both tables based on position
     // within the needle and character in haystack
     let getShift = fn@(pos: uint, ch: u8) -> uint {
         let matchedSoFar = nlen - 1u - pos;
         let rawCharShift = ct[ch as uint];
-//        let prefShift    = pt[matchedSoFar];
+        let prefShift    = pt[matchedSoFar];
 
         if rawCharShift >= matchedSoFar {
            let adjCharShift = rawCharShift - matchedSoFar;
 
-//           if adjCharShift > prefShift {
+           if adjCharShift > prefShift {
                ret adjCharShift;
-//           }
+           }
         }
 
-//        ret prefShift;
-        ret 1u;
+        ret prefShift;
     };
 
     // step up through the haystack
@@ -1166,7 +1168,7 @@ fn boyer_moore_search (haystack: str, needle: str,
 // (a.k.a. the bad-character table)
 fn boyer_moore_unmatched_chars(needle: str) -> [uint] {
     let len = str::len(needle);
-    let mm  = vec::to_mut(vec::init_elt(255u, len));
+    let mm  = vec::to_mut(vec::from_elem(255u, len));
 
     assert 0u < len;
     let mut jj = len - 1u; // drop the last byte
@@ -1196,70 +1198,87 @@ fn boyer_moore_matching_suffixes(needle_str: str) -> [uint] {
     let len = vec::len(needle);
 
     // initialize len chars to len
-    let mm = vec::to_mut(vec::init_elt(len, len));
-
-    let range_ends_with = fn@(vvv0: uint, vvv1: uint,
-                             vv0:  uint, vv1:  uint) -> bool {
-        // needle: [u8]
+    let mm = vec::to_mut(vec::from_elem(len, len));
+
+    // is the suffix from here a prefix of the needle?
+    let is_prefix = fn@(pos: uint) -> bool {
+        let suffixlen = len - pos;
+        let mut ii = 0u;
+        while ii < suffixlen {
+            if needle[ii] != needle[pos + ii] { ret false; }
+            ii += 1u;
+        }
+        ret true;
+    };
 
-        let shortLen = vv1 - vv0;
+    // if this is the end of a suffix of the word, how long is it?
+    let longest_suffix = fn@(pos: uint) -> uint {
+        let mut jj = 0u;
 
-        let mut iii = vvv1 - shortLen;
-        let mut ii = vv0;
+        // count up while matching larger suffixes with this prefix
+        while needle[pos - jj] == needle[len - 1u - jj]
+              && jj < pos
+        {
+            jj += 1u;
 
-        while ii < vv1 {
-            if needle[ii] != needle[iii] { ret false; }
-            ii += 1u;
-            iii += 1u;
+            assert pos    >= jj;
+            assert len-1u >= jj;
         }
 
-        ret true;
+        ret jj;
     };
 
+
+    // step to smaller prefixes
+    // for the case where each suffix could contain a prefix of the needle
+    // i.e., suffix ends with prefix?
+    let mut pii = len;
+    let mut last_prefix_index = len - 1u;
+    while 0u < pii {
+        pii -= 1u;
+
+        // FIXME: possible +1 issue
+
+        // find if each possible suffix is a prefix
+        if is_prefix(pii + 1u) { last_prefix_index = pii + 1u; };
+        ////log(error, "pref idx ->");
+        ////log(error, last_prefix_index);
+
+        ////log(error, "prefix(pii..len):");
+        ////log(error, str::from_bytes(vec::slice(needle, pii, len)));
+        //mm[pii] = last_prefix_index + len - 1u - pii;
+        mm[len - 1u - pii] = last_prefix_index;
+    }
+
+
+    ////log(error, mm);
+
     // step to larger suffixes
+    // for the case where each suffix could be part of the needle
+    // i.e., prefix ends with suffix?
     let mut sii = 0u;
     while sii < len {
+        let slen = longest_suffix(sii);
+        assert sii >= slen;
 
-        // tail of the needle we seek
-        //let suffix      = vec::slice(needle, len - sii,      len);
-        //let suffix_plus = vec::slice(needle, len - sii - 1u, len);
-
-        // step to smaller prefixes
-        let mut pii = len;
-        while 0u < pii {
-            pii -= 1u;
-
-            // a prefix of the needle
-            //let prefix = vec::slice(needle, 0u, pii); // 0 -> pii
-
-            // if suffix fully matched, or
-            // prefix is bigger than suffix: only tail matched
-            // (which we might jump to)
-            if
-                (pii <= sii
-                 && range_ends_with(len-sii, len, 0u, pii))
-            ||
-                (sii < pii
-                 && range_ends_with(0u, pii, len-sii, len)
-                 && needle[pii - sii -1u] != needle[len-sii - 1u])
-            {
-                // if we haven't set it yet, set it now
-                // (besides default)
-                if mm[sii] == len {
-                    mm[sii] = len-pii;
-                }
-            }
-        }
+        if needle[sii - slen] != needle[len - 1u - slen] {
+            ////log(error, "suffix(len-1-slen)");
+            ////log(error, str::from_bytes(vec::slice(needle,len-slen, len)));
 
-        // if it hasn't been set, there was no matching prefix,
-        // so set it now
-        if mm[sii] == len {
-            mm[sii] = len-pii;
+            ////log(error, "sii:");
+            ////log(error, sii);
+
+            ////log(error, "slen:");
+            ////log(error, slen);
+
+            mm[slen] = len - 1u - sii;
         }
 
         sii += 1u;
     }
 
+    ////log(error, mm);
+
     ret vec::from_mut(mm);
 }
 
diff --git a/src/libcore/vec.rs b/src/libcore/vec.rs
index aeb112a840d1a..9d98c61766390 100644
--- a/src/libcore/vec.rs
+++ b/src/libcore/vec.rs
@@ -869,9 +869,7 @@ fn permute<T: copy>(v: [T], put: fn([T])) {
   }
 }
 
-// Function: windowed
-//
-// Return all sub-vectors of size `nn`
+#[doc = "Return all sub-vectors of size `nn`"]
 fn windowed <TT: copy> (nn: uint, xx: [const TT]) -> [[TT]] {
    let mut ww = [];
 

From 95f5b36c8631e03f06fbe0e4d20ca27946fd1bb2 Mon Sep 17 00:00:00 2001
From: Kevin Cantu <me@kevincantu.org>
Date: Thu, 22 Mar 2012 01:43:40 -0700
Subject: [PATCH 12/14] (core::str) updated Boyer-Moore again, with faster
 good-suffix calc

---
 src/libcore/str.rs | 226 +++++++++++++++++++++++++++++----------------
 1 file changed, 146 insertions(+), 80 deletions(-)

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 80d175cee33a5..09bc777cb87ea 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -1019,8 +1019,10 @@ fn findn_str_between (haystack: str, needle: str,
                       nn: uint,
                       start: uint, end: uint) -> [uint] {
 
-    boyer_moore_search(haystack, needle, nn, start, end)
-    //simple_search(haystack, needle, nn, start, end)
+    let BM = boyer_moore_search(haystack, needle, nn, start, end);
+    let SS = simple_search(haystack, needle, nn, start, end);
+    assert SS == BM;
+    ret SS;
 }
 
 #[doc = "
@@ -1168,7 +1170,7 @@ fn boyer_moore_search (haystack: str, needle: str,
 // (a.k.a. the bad-character table)
 fn boyer_moore_unmatched_chars(needle: str) -> [uint] {
     let len = str::len(needle);
-    let mm  = vec::to_mut(vec::from_elem(255u, len));
+    let deltas = vec::to_mut(vec::from_elem(255u, len));
 
     assert 0u < len;
     let mut jj = len - 1u; // drop the last byte
@@ -1181,105 +1183,132 @@ fn boyer_moore_unmatched_chars(needle: str) -> [uint] {
 
         // if we haven't set it yet, set it now
         // (besides default)
-        if mm[key] == len {
-            mm[key] = len - 1u - jj;
+        if deltas[key] == len {
+            deltas[key] = len - 1u - jj;
         }
     }
 
-    ret vec::from_mut(mm);
+    ret vec::from_mut(deltas);
 }
 
-// compute the table used to choose a shift based on
-// a partially matched suffix of the search string
-// (a.k.a. the good-suffix table)
-fn boyer_moore_matching_suffixes(needle_str: str) -> [uint] {
-    let needle = str::bytes(needle_str);
+// for each prefix of the search string
+// find the largest suffix which is a suffix of the search string
+fn boyer_moore_largest_suffixes(needle: str) -> [uint] {
+    let len = str::len(needle);
 
-    let len = vec::len(needle);
+    if len == 0u { ret []; }
 
-    // initialize len chars to len
-    let mm = vec::to_mut(vec::from_elem(len, len));
+    let mut suffs = vec::to_mut(vec::from_elem(len, 0u));
+    suffs[len - 1u] = len;
 
-    // is the suffix from here a prefix of the needle?
-    let is_prefix = fn@(pos: uint) -> bool {
-        let suffixlen = len - pos;
-        let mut ii = 0u;
-        while ii < suffixlen {
-            if needle[ii] != needle[pos + ii] { ret false; }
-            ii += 1u;
-        }
-        ret true;
-    };
+    let mut ii   = len - 1u;
+    let mut head = len; // index starting the previous found suffix
+    let mut tail = len; // index after the previous found suffix
 
-    // if this is the end of a suffix of the word, how long is it?
-    let longest_suffix = fn@(pos: uint) -> uint {
-        let mut jj = 0u;
+    // loop through each smaller prefix,
+    // keeping track of the last suffix of a prefix
+    // which was found to be a suffix of the needle
+    while 0u < ii {
+        ii -= 1u;
 
-        // count up while matching larger suffixes with this prefix
-        while needle[pos - jj] == needle[len - 1u - jj]
-              && jj < pos
+        if head < ii + 1u
+           && suffs[(len - 1u) - ((tail - 1u) - ii)] + head < ii + 1u
         {
-            jj += 1u;
-
-            assert pos    >= jj;
-            assert len-1u >= jj;
-        }
-
-        ret jj;
-    };
-
+            // The needle is a suffix of itself, stored before this loop,
+            // so each prefix of that is matched
+            // with its largest possible suffix...
+            //
+            // So (bear with me) when considering prefixes
+            // of another matched prefix (i.e., when head <= ii < tail)
+            // if the corresponding maximum prefix's match is
+            // smaller than the space left within the current match,
+            // then we know this prefix's matching suffix is the same.
+
+            // Consider:
+            //     01234567
+            //     heyyheyy
+            //       ^   ^
+            //
+            // When testing i=2, a match from 0-3 has already been found
+            // ("heyy"), and the match at i=6 ("y") fits
+            // in the remaining space within the current match,
+            // we know that suffs[2]=sufs[6].
+            //
+            // If, however, sufs[6] was much larger, we'd have to work more.
+
+            suffs[ii] = suffs[(len - 1u) - ((tail-1u) - ii)];
 
-    // step to smaller prefixes
-    // for the case where each suffix could contain a prefix of the needle
-    // i.e., suffix ends with prefix?
-    let mut pii = len;
-    let mut last_prefix_index = len - 1u;
-    while 0u < pii {
-        pii -= 1u;
+        } else {
+            // Here, find the largest suffix of the needle which matches
+            // the prefix ending at ii.
+
+            // move the head left
+            //
+            // Note that if the head is already further left,
+            // we've already explored that far and eliminated the possibility
+            // of smaller match, above.
+            if ii + 1u <= head {
+                 head = ii + 1u;
+            }
 
-        // FIXME: possible +1 issue
+            // put the tail here (the ending of this suffix)
+            tail = ii + 1u;
 
-        // find if each possible suffix is a prefix
-        if is_prefix(pii + 1u) { last_prefix_index = pii + 1u; };
-        ////log(error, "pref idx ->");
-        ////log(error, last_prefix_index);
+            // move the head left until it is before the matching suffix
+            while 1u <= head
+               && needle[head-1u] == needle[(len - 1u) - (tail - head)]
+            {
+                head -= 1u;
+            }
 
-        ////log(error, "prefix(pii..len):");
-        ////log(error, str::from_bytes(vec::slice(needle, pii, len)));
-        //mm[pii] = last_prefix_index + len - 1u - pii;
-        mm[len - 1u - pii] = last_prefix_index;
+            // store the length of this suffix
+            suffs[ii] = tail - head;
+        }
     }
 
+    ret vec::from_mut(suffs);
+}
 
-    ////log(error, mm);
-
-    // step to larger suffixes
-    // for the case where each suffix could be part of the needle
-    // i.e., prefix ends with suffix?
-    let mut sii = 0u;
-    while sii < len {
-        let slen = longest_suffix(sii);
-        assert sii >= slen;
-
-        if needle[sii - slen] != needle[len - 1u - slen] {
-            ////log(error, "suffix(len-1-slen)");
-            ////log(error, str::from_bytes(vec::slice(needle,len-slen, len)));
-
-            ////log(error, "sii:");
-            ////log(error, sii);
-
-            ////log(error, "slen:");
-            ////log(error, slen);
-
-            mm[slen] = len - 1u - sii;
+// compute the table used to choose a shift based on
+// a partially matched suffix of the search string
+// (a.k.a. the good-suffix table)
+fn boyer_moore_matching_suffixes(needle: str) -> [uint] {
+    let len   = str::len(needle);
+
+    // compute the largest suffix of each prefix
+    let suffs = boyer_moore_largest_suffixes(needle);
+
+    // (1) initialize deltas
+    let deltas = vec::to_mut(vec::from_elem(len, len));
+
+    // (2) step to smaller suffixes ending with ii, and
+    // if a whole prefix is a suffix
+    // set all the deltas for indexes smaller than length - 1 - ii
+    // to length - 1 - ii
+    let mut ii = len;
+    let mut jj = 0u;
+    while 0u < ii {
+        ii -= 1u;
+
+        if suffs[ii] == ii + 1u {
+            // do not reset jj, only do this once
+            while ii < len - 1u - jj {
+                if deltas[len - 1u - jj] == len {
+                    deltas[len - 1u - jj] = len - 1u - ii;
+                }
+                jj += 1u;
+            }
         }
-
-        sii += 1u;
     }
 
-    ////log(error, mm);
+    // (3) then for each different matched suffix size, set the delta
+    let mut kk = 0u;
+    while 2u <= len && kk <= len - 2u {
+        deltas[suffs[kk]] = len - 1u - kk;
+        kk += 1u;
+    }
 
-    ret vec::from_mut(mm);
+    ret vec::from_mut(deltas);
 }
 
 #[doc = "
@@ -2180,6 +2209,22 @@ mod tests {
         assert findn_str_between(data, "ax", 1u, 0u, 6u) == [];
     }
 
+    #[test]
+    fn test_simple_search() {
+        let data = "abcabc";
+        assert simple_search(data, "ab", 2u, 0u, 6u) == [0u, 3u];
+        assert simple_search(data, "ab", 1u, 0u, 6u) == [0u];
+        assert simple_search(data, "ax", 1u, 0u, 6u) == [];
+    }
+
+    #[test]
+    fn test_boyer_moore_search() {
+        let data = "abcabc";
+        assert boyer_moore_search(data, "ab", 2u, 0u, 6u) == [0u, 3u];
+        assert boyer_moore_search(data, "ab", 1u, 0u, 6u) == [0u];
+        assert boyer_moore_search(data, "ax", 1u, 0u, 6u) == [];
+    }
+
     #[test]
     fn test_findn_str() {
         assert []       == str::findn_str("banana", "apple pie", 1u);
@@ -2618,8 +2663,29 @@ mod tests {
         assert 6u == ct[0x_80_u];
     }
 
+    #[test]
+    fn test_boyer_moore_largest_suffixes() {
+        assert boyer_moore_largest_suffixes("")
+            == [];
+
+        assert boyer_moore_largest_suffixes("x")
+            == [1u];
+
+        assert boyer_moore_largest_suffixes("heyyheyyheyy")
+            == [0u,0u,1u,4u,0u,0u,1u,8u,0u,0u,1u,12u];
+
+        assert boyer_moore_largest_suffixes("gcagagag")
+            == [1u,0u,0u,2u,0u,4u,0u,8u];
+    }
+
     #[test]
     fn test_matching_suffixes_ascii() {
+        assert [] == boyer_moore_matching_suffixes("");
+
+        let test1 = boyer_moore_matching_suffixes("gcagagag");
+        assert test1 == [1u,7u,4u,7u,2u,7u,7u,7u];
+
+
         let pt = boyer_moore_matching_suffixes("ANPANMAN");
 
         assert 1u == pt[0u]; //        (n)

From 6e8de238d8307fd5f46f80ea9630cbc328d95261 Mon Sep 17 00:00:00 2001
From: Kevin Cantu <me@kevincantu.org>
Date: Mon, 26 Mar 2012 04:06:43 -0700
Subject: [PATCH 13/14] (core::str) based on testing so far, choose boyer-moore
 when it can be faster

---
 src/libcore/str.rs | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 09bc777cb87ea..94e89adc3cf53 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -1019,10 +1019,15 @@ fn findn_str_between (haystack: str, needle: str,
                       nn: uint,
                       start: uint, end: uint) -> [uint] {
 
-    let BM = boyer_moore_search(haystack, needle, nn, start, end);
-    let SS = simple_search(haystack, needle, nn, start, end);
-    assert SS == BM;
-    ret SS;
+    let hl = str::len(haystack);
+    let nl = str::len(needle);
+
+    // numbers subject to change...
+    if hl > 10*nl + 1500 && nl > 10 {
+        ret boyer_moore_search(haystack, needle, nn, start, end);
+    } else {
+        ret simple_search(haystack, needle, nn, start, end);
+    }
 }
 
 #[doc = "

From f7aa6b23e49dbb131e6b6d7e9b3e5c86ac77c2f9 Mon Sep 17 00:00:00 2001
From: Kevin Cantu <me@kevincantu.org>
Date: Mon, 26 Mar 2012 04:11:08 -0700
Subject: [PATCH 14/14] touchups

---
 src/libcore/str.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 94e89adc3cf53..fc2858de0c117 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -1023,7 +1023,9 @@ fn findn_str_between (haystack: str, needle: str,
     let nl = str::len(needle);
 
     // numbers subject to change...
-    if hl > 10*nl + 1500 && nl > 10 {
+    if hl > 10u * nl + 1500u
+       && nl > 10u
+    {
         ret boyer_moore_search(haystack, needle, nn, start, end);
     } else {
         ret simple_search(haystack, needle, nn, start, end);