From de14a739ae27088acaa3c5e21949a39bffd5313b Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 30 Apr 2014 00:05:59 +1000 Subject: [PATCH 1/2] regex: remove the use of ~[] & some unnecessary ~'s. The AST doesn't need ~s everywhere, so we can save allocations this way & the enum isn't particularly large (~4 words) nor are regexes long (normally), so the space saved in the `Cat` vector is unlikely to be very much. --- src/libregex/compile.rs | 51 ++++++++-------- src/libregex/lib.rs | 2 +- src/libregex/parse.rs | 116 ++++++++++++++++++------------------- src/libregex/re.rs | 2 +- src/libregex/test/bench.rs | 2 +- src/libregex_macros/lib.rs | 6 +- 6 files changed, 89 insertions(+), 90 deletions(-) diff --git a/src/libregex/compile.rs b/src/libregex/compile.rs index 3987d75505099..e5166c6c17cb4 100644 --- a/src/libregex/compile.rs +++ b/src/libregex/compile.rs @@ -13,7 +13,6 @@ #![allow(visible_private_types)] use std::cmp; -use std::iter; use parse; use parse::{ Flags, FLAG_EMPTY, @@ -89,7 +88,7 @@ pub struct Program { impl Program { /// Compiles a Regex given its AST. - pub fn new(ast: ~parse::Ast) -> (Program, ~[Option<~str>]) { + pub fn new(ast: parse::Ast) -> (Program, Vec>) { let mut c = Compiler { insts: Vec::with_capacity(100), names: Vec::with_capacity(10), @@ -104,16 +103,16 @@ impl Program { // This is a bit hacky since we have to skip over the initial // 'Save' instruction. let mut pre = StrBuf::with_capacity(5); - for i in iter::range(1, c.insts.len()) { - match *c.insts.get(i) { + for inst in c.insts.slice_from(1).iter() { + match *inst { OneChar(c, FLAG_EMPTY) => pre.push_char(c), _ => break } } - let names = c.names.as_slice().into_owned(); + let Compiler { insts, names } = c; let prog = Program { - insts: c.insts, + insts: insts, prefix: pre.into_owned(), }; (prog, names) @@ -144,17 +143,17 @@ struct Compiler<'r> { // The only tricky thing here is patching jump/split instructions to point to // the right instruction. impl<'r> Compiler<'r> { - fn compile(&mut self, ast: ~parse::Ast) { + fn compile(&mut self, ast: parse::Ast) { match ast { - ~Nothing => {}, - ~Literal(c, flags) => self.push(OneChar(c, flags)), - ~Dot(nl) => self.push(Any(nl)), - ~Class(ranges, flags) => + Nothing => {}, + Literal(c, flags) => self.push(OneChar(c, flags)), + Dot(nl) => self.push(Any(nl)), + Class(ranges, flags) => self.push(CharClass(ranges, flags)), - ~Begin(flags) => self.push(EmptyBegin(flags)), - ~End(flags) => self.push(EmptyEnd(flags)), - ~WordBoundary(flags) => self.push(EmptyWordBoundary(flags)), - ~Capture(cap, name, x) => { + Begin(flags) => self.push(EmptyBegin(flags)), + End(flags) => self.push(EmptyEnd(flags)), + WordBoundary(flags) => self.push(EmptyWordBoundary(flags)), + Capture(cap, name, x) => { let len = self.names.len(); if cap >= len { self.names.grow(10 + cap - len, &None) @@ -162,30 +161,30 @@ impl<'r> Compiler<'r> { *self.names.get_mut(cap) = name; self.push(Save(2 * cap)); - self.compile(x); + self.compile(*x); self.push(Save(2 * cap + 1)); } - ~Cat(xs) => { + Cat(xs) => { for x in xs.move_iter() { self.compile(x) } } - ~Alt(x, y) => { + Alt(x, y) => { let split = self.empty_split(); // push: split 0, 0 let j1 = self.insts.len(); - self.compile(x); // push: insts for x + self.compile(*x); // push: insts for x let jmp = self.empty_jump(); // push: jmp 0 let j2 = self.insts.len(); - self.compile(y); // push: insts for y + self.compile(*y); // push: insts for y let j3 = self.insts.len(); self.set_split(split, j1, j2); // split 0, 0 -> split j1, j2 self.set_jump(jmp, j3); // jmp 0 -> jmp j3 } - ~Rep(x, ZeroOne, g) => { + Rep(x, ZeroOne, g) => { let split = self.empty_split(); let j1 = self.insts.len(); - self.compile(x); + self.compile(*x); let j2 = self.insts.len(); if g.is_greedy() { @@ -194,11 +193,11 @@ impl<'r> Compiler<'r> { self.set_split(split, j2, j1); } } - ~Rep(x, ZeroMore, g) => { + Rep(x, ZeroMore, g) => { let j1 = self.insts.len(); let split = self.empty_split(); let j2 = self.insts.len(); - self.compile(x); + self.compile(*x); let jmp = self.empty_jump(); let j3 = self.insts.len(); @@ -209,9 +208,9 @@ impl<'r> Compiler<'r> { self.set_split(split, j3, j2); } } - ~Rep(x, OneMore, g) => { + Rep(x, OneMore, g) => { let j1 = self.insts.len(); - self.compile(x); + self.compile(*x); let split = self.empty_split(); let j2 = self.insts.len(); diff --git a/src/libregex/lib.rs b/src/libregex/lib.rs index cd5d387bfa0d6..b94271622d7e9 100644 --- a/src/libregex/lib.rs +++ b/src/libregex/lib.rs @@ -362,7 +362,7 @@ html_root_url = "http://static.rust-lang.org/doc/master")] #![feature(macro_rules, phase)] -#![deny(missing_doc)] +#![deny(missing_doc, deprecated_owned_vector)] extern crate collections; #[cfg(test)] diff --git a/src/libregex/parse.rs b/src/libregex/parse.rs index 27510f01bd676..094da4ae269b5 100644 --- a/src/libregex/parse.rs +++ b/src/libregex/parse.rs @@ -62,7 +62,7 @@ pub enum Ast { Capture(uint, Option<~str>, ~Ast), // Represent concatenation as a flat vector to avoid blowing the // stack in the compiler. - Cat(Vec<~Ast>), + Cat(Vec), Alt(~Ast, ~Ast), Rep(~Ast, Repeater, Greed), } @@ -103,7 +103,7 @@ impl Greed { /// state. #[deriving(Show)] enum BuildAst { - Ast(~Ast), + Ast(Ast), Paren(Flags, uint, ~str), // '(' Bar, // '|' } @@ -152,7 +152,7 @@ impl BuildAst { } } - fn unwrap(self) -> Result<~Ast, Error> { + fn unwrap(self) -> Result { match self { Ast(x) => Ok(x), _ => fail!("Tried to unwrap non-AST item: {}", self), @@ -188,7 +188,7 @@ struct Parser<'a> { names: Vec<~str>, } -pub fn parse(s: &str) -> Result<~Ast, Error> { +pub fn parse(s: &str) -> Result { Parser { chars: s.chars().collect(), chari: 0, @@ -200,7 +200,7 @@ pub fn parse(s: &str) -> Result<~Ast, Error> { } impl<'a> Parser<'a> { - fn parse(&mut self) -> Result<~Ast, Error> { + fn parse(&mut self) -> Result { loop { let c = self.cur(); match c { @@ -243,7 +243,7 @@ impl<'a> Parser<'a> { // alternate and make it a capture. if cap.is_some() { let ast = try!(self.pop_ast()); - self.push(~Capture(cap.unwrap(), cap_name, ast)); + self.push(Capture(cap.unwrap(), cap_name, ~ast)); } } '|' => { @@ -294,14 +294,14 @@ impl<'a> Parser<'a> { self.chari < self.chars.len() } - fn pop_ast(&mut self) -> Result<~Ast, Error> { + fn pop_ast(&mut self) -> Result { match self.stack.pop().unwrap().unwrap() { Err(e) => Err(e), Ok(ast) => Ok(ast), } } - fn push(&mut self, ast: ~Ast) { + fn push(&mut self, ast: Ast) { self.stack.push(Ast(ast)) } @@ -323,29 +323,29 @@ impl<'a> Parser<'a> { } let ast = try!(self.pop_ast()); match ast { - ~Begin(_) | ~End(_) | ~WordBoundary(_) => + Begin(_) | End(_) | WordBoundary(_) => return self.err( "Repeat arguments cannot be empty width assertions."), _ => {} } let greed = try!(self.get_next_greedy()); - self.push(~Rep(ast, rep, greed)); + self.push(Rep(~ast, rep, greed)); Ok(()) } fn push_literal(&mut self, c: char) -> Result<(), Error> { match c { '.' => { - self.push(~Dot(self.flags)) + self.push(Dot(self.flags)) } '^' => { - self.push(~Begin(self.flags)) + self.push(Begin(self.flags)) } '$' => { - self.push(~End(self.flags)) + self.push(End(self.flags)) } _ => { - self.push(~Literal(c, self.flags)) + self.push(Literal(c, self.flags)) } } Ok(()) @@ -362,7 +362,7 @@ impl<'a> Parser<'a> { FLAG_EMPTY }; let mut ranges: Vec<(char, char)> = vec!(); - let mut alts: Vec<~Ast> = vec!(); + let mut alts: Vec = vec!(); if self.peek_is(1, ']') { try!(self.expect(']')) @@ -378,8 +378,8 @@ impl<'a> Parser<'a> { match c { '[' => match self.try_parse_ascii() { - Some(~Class(asciis, flags)) => { - alts.push(~Class(asciis, flags ^ negated)); + Some(Class(asciis, flags)) => { + alts.push(Class(asciis, flags ^ negated)); continue } Some(ast) => @@ -389,12 +389,12 @@ impl<'a> Parser<'a> { }, '\\' => { match try!(self.parse_escape()) { - ~Class(asciis, flags) => { - alts.push(~Class(asciis, flags ^ negated)); + Class(asciis, flags) => { + alts.push(Class(asciis, flags ^ negated)); continue } - ~Literal(c2, _) => c = c2, // process below - ~Begin(_) | ~End(_) | ~WordBoundary(_) => + Literal(c2, _) => c = c2, // process below + Begin(_) | End(_) | WordBoundary(_) => return self.err( "\\A, \\z, \\b and \\B are not valid escape \ sequences inside a character class."), @@ -407,15 +407,15 @@ impl<'a> Parser<'a> { ']' => { if ranges.len() > 0 { let flags = negated | (self.flags & FLAG_NOCASE); - let mut ast = ~Class(combine_ranges(ranges), flags); + let mut ast = Class(combine_ranges(ranges), flags); for alt in alts.move_iter() { - ast = ~Alt(alt, ast) + ast = Alt(~alt, ~ast) } self.push(ast); } else if alts.len() > 0 { let mut ast = alts.pop().unwrap(); for alt in alts.move_iter() { - ast = ~Alt(alt, ast) + ast = Alt(~alt, ~ast) } self.push(ast); } @@ -444,7 +444,7 @@ impl<'a> Parser<'a> { // and moves the parser to the final ']' character. // If unsuccessful, no state is changed and None is returned. // Assumes that '[' is the current character. - fn try_parse_ascii(&mut self) -> Option<~Ast> { + fn try_parse_ascii(&mut self) -> Option { if !self.peek_is(1, ':') { return None } @@ -473,7 +473,7 @@ impl<'a> Parser<'a> { Some(ranges) => { self.chari = closer; let flags = negated | (self.flags & FLAG_NOCASE); - Some(~Class(combine_ranges(ranges), flags)) + Some(Class(combine_ranges(ranges), flags)) } } } @@ -546,7 +546,7 @@ impl<'a> Parser<'a> { for _ in iter::range(0, min) { self.push(ast.clone()) } - self.push(~Rep(ast, ZeroMore, greed)); + self.push(Rep(~ast, ZeroMore, greed)); } else { // Require N copies of what's on the stack and then repeat it // up to M times optionally. @@ -556,14 +556,14 @@ impl<'a> Parser<'a> { } if max.is_some() { for _ in iter::range(min, max.unwrap()) { - self.push(~Rep(ast.clone(), ZeroOne, greed)) + self.push(Rep(~ast.clone(), ZeroOne, greed)) } } // It's possible that we popped something off the stack but // never put anything back on it. To keep things simple, add // a no-op expression. if min == 0 && (max.is_none() || max == Some(0)) { - self.push(~Nothing) + self.push(Nothing) } } Ok(()) @@ -571,24 +571,24 @@ impl<'a> Parser<'a> { // Parses all escape sequences. // Assumes that '\' is the current character. - fn parse_escape(&mut self) -> Result<~Ast, Error> { + fn parse_escape(&mut self) -> Result { try!(self.noteof("an escape sequence following a '\\'")) let c = self.cur(); if is_punct(c) { - return Ok(~Literal(c, FLAG_EMPTY)) + return Ok(Literal(c, FLAG_EMPTY)) } match c { - 'a' => Ok(~Literal('\x07', FLAG_EMPTY)), - 'f' => Ok(~Literal('\x0C', FLAG_EMPTY)), - 't' => Ok(~Literal('\t', FLAG_EMPTY)), - 'n' => Ok(~Literal('\n', FLAG_EMPTY)), - 'r' => Ok(~Literal('\r', FLAG_EMPTY)), - 'v' => Ok(~Literal('\x0B', FLAG_EMPTY)), - 'A' => Ok(~Begin(FLAG_EMPTY)), - 'z' => Ok(~End(FLAG_EMPTY)), - 'b' => Ok(~WordBoundary(FLAG_EMPTY)), - 'B' => Ok(~WordBoundary(FLAG_NEGATED)), + 'a' => Ok(Literal('\x07', FLAG_EMPTY)), + 'f' => Ok(Literal('\x0C', FLAG_EMPTY)), + 't' => Ok(Literal('\t', FLAG_EMPTY)), + 'n' => Ok(Literal('\n', FLAG_EMPTY)), + 'r' => Ok(Literal('\r', FLAG_EMPTY)), + 'v' => Ok(Literal('\x0B', FLAG_EMPTY)), + 'A' => Ok(Begin(FLAG_EMPTY)), + 'z' => Ok(End(FLAG_EMPTY)), + 'b' => Ok(WordBoundary(FLAG_EMPTY)), + 'B' => Ok(WordBoundary(FLAG_NEGATED)), '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7' => Ok(try!(self.parse_octal())), 'x' => Ok(try!(self.parse_hex())), 'p' | 'P' => Ok(try!(self.parse_unicode_name())), @@ -596,7 +596,7 @@ impl<'a> Parser<'a> { let ranges = perl_unicode_class(c); let mut flags = self.flags & FLAG_NOCASE; if c.is_uppercase() { flags |= FLAG_NEGATED } - Ok(~Class(ranges, flags)) + Ok(Class(ranges, flags)) } _ => self.err(format!("Invalid escape sequence '\\\\{}'", c)), } @@ -607,7 +607,7 @@ impl<'a> Parser<'a> { // name is the unicode class name. // Assumes that \p or \P has been read (and 'p' or 'P' is the current // character). - fn parse_unicode_name(&mut self) -> Result<~Ast, Error> { + fn parse_unicode_name(&mut self) -> Result { let negated = if self.cur() == 'P' { FLAG_NEGATED } else { FLAG_EMPTY }; let mut name: ~str; if self.peek_is(1, '{') { @@ -635,14 +635,14 @@ impl<'a> Parser<'a> { None => return self.err(format!( "Could not find Unicode class '{}'", name)), Some(ranges) => { - Ok(~Class(ranges, negated | (self.flags & FLAG_NOCASE))) + Ok(Class(ranges, negated | (self.flags & FLAG_NOCASE))) } } } // Parses an octal number, up to 3 digits. // Assumes that \n has been read, where n is the first digit. - fn parse_octal(&mut self) -> Result<~Ast, Error> { + fn parse_octal(&mut self) -> Result { let start = self.chari; let mut end = start + 1; let (d2, d3) = (self.peek(1), self.peek(2)); @@ -656,7 +656,7 @@ impl<'a> Parser<'a> { } let s = self.slice(start, end); match num::from_str_radix::(s, 8) { - Some(n) => Ok(~Literal(try!(self.char_from_u32(n)), FLAG_EMPTY)), + Some(n) => Ok(Literal(try!(self.char_from_u32(n)), FLAG_EMPTY)), None => self.err(format!( "Could not parse '{}' as octal number.", s)), } @@ -664,7 +664,7 @@ impl<'a> Parser<'a> { // Parse a hex number. Either exactly two digits or anything in {}. // Assumes that \x has been read. - fn parse_hex(&mut self) -> Result<~Ast, Error> { + fn parse_hex(&mut self) -> Result { if !self.peek_is(1, '{') { try!(self.expect('{')) return self.parse_hex_two() @@ -684,7 +684,7 @@ impl<'a> Parser<'a> { // Assumes that \xn has been read, where n is the first digit and is the // current character. // After return, parser will point at the second digit. - fn parse_hex_two(&mut self) -> Result<~Ast, Error> { + fn parse_hex_two(&mut self) -> Result { let (start, end) = (self.chari, self.chari + 2); let bad = self.slice(start - 2, self.chars.len()); try!(self.noteof(format!("Invalid hex escape sequence '{}'", bad))) @@ -692,9 +692,9 @@ impl<'a> Parser<'a> { } // Parses `s` as a hexadecimal number. - fn parse_hex_digits(&self, s: &str) -> Result<~Ast, Error> { + fn parse_hex_digits(&self, s: &str) -> Result { match num::from_str_radix::(s, 16) { - Some(n) => Ok(~Literal(try!(self.char_from_u32(n)), FLAG_EMPTY)), + Some(n) => Ok(Literal(try!(self.char_from_u32(n)), FLAG_EMPTY)), None => self.err(format!( "Could not parse '{}' as hex number.", s)), } @@ -840,7 +840,7 @@ impl<'a> Parser<'a> { // thrown away). But be careful with overflow---we can't count on the // open paren to be there. if from > 0 { from = from - 1} - let ast = try!(self.build_from(from, Alt)); + let ast = try!(self.build_from(from, |l,r| Alt(~l, ~r))); self.push(ast); Ok(()) } @@ -848,8 +848,8 @@ impl<'a> Parser<'a> { // build_from combines all AST elements starting at 'from' in the // parser's stack using 'mk' to combine them. If any such element is not an // AST then it is popped off the stack and ignored. - fn build_from(&mut self, from: uint, mk: |~Ast, ~Ast| -> Ast) - -> Result<~Ast, Error> { + fn build_from(&mut self, from: uint, mk: |Ast, Ast| -> Ast) + -> Result { if from >= self.stack.len() { return self.err("Empty group or alternate not allowed.") } @@ -859,7 +859,7 @@ impl<'a> Parser<'a> { while i > from { i = i - 1; match self.stack.pop().unwrap() { - Ast(x) => combined = ~mk(x, combined), + Ast(x) => combined = mk(x, combined), _ => {}, } } @@ -961,11 +961,11 @@ fn perl_unicode_class(which: char) -> Vec<(char, char)> { // Returns a concatenation of two expressions. This also guarantees that a // `Cat` expression will never be a direct child of another `Cat` expression. -fn concat_flatten(x: ~Ast, y: ~Ast) -> Ast { +fn concat_flatten(x: Ast, y: Ast) -> Ast { match (x, y) { - (~Cat(mut xs), ~Cat(ys)) => { xs.push_all_move(ys); Cat(xs) } - (~Cat(mut xs), ast) => { xs.push(ast); Cat(xs) } - (ast, ~Cat(mut xs)) => { xs.unshift(ast); Cat(xs) } + (Cat(mut xs), Cat(ys)) => { xs.push_all_move(ys); Cat(xs) } + (Cat(mut xs), ast) => { xs.push(ast); Cat(xs) } + (ast, Cat(mut xs)) => { xs.unshift(ast); Cat(xs) } (ast1, ast2) => Cat(vec!(ast1, ast2)), } } diff --git a/src/libregex/re.rs b/src/libregex/re.rs index da3ebaee6dba1..6569f047abae8 100644 --- a/src/libregex/re.rs +++ b/src/libregex/re.rs @@ -109,7 +109,7 @@ pub struct Regex { #[doc(hidden)] pub original: ~str, #[doc(hidden)] - pub names: ~[Option<~str>], + pub names: Vec>, #[doc(hidden)] pub p: MaybeNative, } diff --git a/src/libregex/test/bench.rs b/src/libregex/test/bench.rs index a5667ab088e75..17c66bc670a92 100644 --- a/src/libregex/test/bench.rs +++ b/src/libregex/test/bench.rs @@ -150,6 +150,7 @@ fn easy1() -> Regex { regex!("A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$") fn medium() -> Regex { regex!("[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$") } fn hard() -> Regex { regex!("[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$") } +#[allow(deprecated_owned_vector)] fn gen_text(n: uint) -> ~str { let mut rng = task_rng(); let mut bytes = rng.gen_ascii_str(n).into_bytes(); @@ -176,4 +177,3 @@ throughput!(medium_32K,medium(), 32<<10) throughput!(hard_32, hard(), 32) throughput!(hard_1K, hard(), 1<<10) throughput!(hard_32K,hard(), 32<<10) - diff --git a/src/libregex_macros/lib.rs b/src/libregex_macros/lib.rs index 72e00deba4d9c..5445c5f676173 100644 --- a/src/libregex_macros/lib.rs +++ b/src/libregex_macros/lib.rs @@ -102,7 +102,7 @@ struct NfaGen<'a> { cx: &'a ExtCtxt<'a>, sp: codemap::Span, prog: Program, - names: ~[Option<~str>], + names: Vec>, original: ~str, } @@ -112,7 +112,7 @@ impl<'a> NfaGen<'a> { // expression returned. let num_cap_locs = 2 * self.prog.num_captures(); let num_insts = self.prog.insts.len(); - let cap_names = self.vec_expr(self.names, + let cap_names = self.vec_expr(self.names.as_slice(), |cx, name| match name { &Some(ref name) => { let name = name.as_slice(); @@ -309,7 +309,7 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str, ::regex::Regex { original: ~$regex, - names: ~$cap_names, + names: vec!$cap_names, p: ::regex::native::Native(exec), } }) From 33f98ada02cccb58f700b1b26059ae63d75917c4 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 30 Apr 2014 00:55:28 +1000 Subject: [PATCH 2/2] regex: General style tweaks. For loops are nicer than manual whiles, etc. --- src/libregex/re.rs | 9 ++++----- src/libregex/vm.rs | 24 ++++++------------------ src/libregex_macros/lib.rs | 6 ++---- 3 files changed, 12 insertions(+), 27 deletions(-) diff --git a/src/libregex/re.rs b/src/libregex/re.rs index 6569f047abae8..b40968283bd50 100644 --- a/src/libregex/re.rs +++ b/src/libregex/re.rs @@ -477,14 +477,13 @@ impl Regex { (&self, text: &str, limit: uint, mut rep: R) -> StrBuf { let mut new = StrBuf::with_capacity(text.len()); let mut last_match = 0u; - let mut i = 0; - for cap in self.captures_iter(text) { + + for (i, cap) in self.captures_iter(text).enumerate() { // It'd be nicer to use the 'take' iterator instead, but it seemed // awkward given that '0' => no limit. if limit > 0 && i >= limit { break } - i += 1; let (s, e) = cap.pos(0).unwrap(); // captures only reports matches new.push_str(text.slice(last_match, s)); @@ -800,7 +799,7 @@ impl<'r, 't> Iterator> for FindCaptures<'r, 't> { // Don't accept empty matches immediately following a match. // i.e., no infinite loops please. - if e - s == 0 && Some(self.last_end) == self.last_match { + if e == s && Some(self.last_end) == self.last_match { self.last_end += 1; return self.next() } @@ -842,7 +841,7 @@ impl<'r, 't> Iterator<(uint, uint)> for FindMatches<'r, 't> { // Don't accept empty matches immediately following a match. // i.e., no infinite loops please. - if e - s == 0 && Some(self.last_end) == self.last_match { + if e == s && Some(self.last_end) == self.last_match { self.last_end += 1; return self.next() } diff --git a/src/libregex/vm.rs b/src/libregex/vm.rs index 6058ba6bf9210..ea89c8986930e 100644 --- a/src/libregex/vm.rs +++ b/src/libregex/vm.rs @@ -169,17 +169,15 @@ impl<'r, 't> Nfa<'r, 't> { self.ic = next_ic; next_ic = self.chars.advance(); - let mut i = 0; - while i < clist.size { + for i in range(0, clist.size) { let pc = clist.pc(i); let step_state = self.step(groups.as_mut_slice(), nlist, clist.groups(i), pc); match step_state { StepMatchEarlyReturn => return vec![Some(0), Some(0)], - StepMatch => { matched = true; clist.empty() }, + StepMatch => { matched = true; break }, StepContinue => {}, } - i += 1; } mem::swap(&mut clist, &mut nlist); nlist.empty(); @@ -226,7 +224,7 @@ impl<'r, 't> Nfa<'r, 't> { let found = ranges.as_slice(); let found = found.bsearch(|&rc| class_cmp(casei, c, rc)); let found = found.is_some(); - if (found && !negate) || (!found && negate) { + if found ^ negate { self.add(nlist, pc+1, caps); } } @@ -568,20 +566,10 @@ pub fn find_prefix(needle: &[u8], haystack: &[u8]) -> Option { if nlen > hlen || nlen == 0 { return None } - let mut hayi = 0u; - 'HAYSTACK: loop { - if hayi > hlen - nlen { - break + for (offset, window) in haystack.windows(nlen).enumerate() { + if window == needle { + return Some(offset) } - let mut nedi = 0; - while nedi < nlen { - if haystack[hayi+nedi] != needle[nedi] { - hayi += 1; - continue 'HAYSTACK - } - nedi += 1; - } - return Some(hayi) } None } diff --git a/src/libregex_macros/lib.rs b/src/libregex_macros/lib.rs index 5445c5f676173..8b134d5af8bbc 100644 --- a/src/libregex_macros/lib.rs +++ b/src/libregex_macros/lib.rs @@ -187,18 +187,16 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str, self.ic = next_ic; next_ic = self.chars.advance(); - let mut i = 0; - while i < clist.size { + for i in range(0, clist.size) { let pc = clist.pc(i); let step_state = self.step(&mut groups, nlist, clist.groups(i), pc); match step_state { StepMatchEarlyReturn => return vec![Some(0u), Some(0u)], - StepMatch => { matched = true; clist.empty() }, + StepMatch => { matched = true; break }, StepContinue => {}, } - i += 1; } ::std::mem::swap(&mut clist, &mut nlist); nlist.empty();