Skip to content

Commit 1cadee3

Browse files
character sets support escaping of special chars
1 parent 7406d9c commit 1cadee3

File tree

5 files changed

+36
-6
lines changed

5 files changed

+36
-6
lines changed

__tests__/character-sets.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,17 @@ it("matches discrete characters", () => {
55
expectNotMatch("[abce]", ["", "f", "h"]);
66
});
77

8+
it("throws an error if no closing bracket is found", () => {
9+
expect(() => new RegExp("[abce")).toThrow();
10+
});
11+
12+
it("supports escaping of special characters", () => {
13+
expectMatch("[a\\^b]", ["a", "b", "^"]);
14+
expectMatch("[a\\-c]", ["a", "c", "-"]);
15+
expectMatch("[a\\]]", ["a", "]"]);
16+
expectMatch("[a\\\\b]", ["a", "\\"]);
17+
});
18+
819
it("matches character ranges", () => {
920
expectMatch("[a-c]", ["a", "b", "c"]);
1021
expectNotMatch("[a-c]", ["d", "e", ""]);

__tests__/quantifiers.js

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ it("zero or more is greedy", () => {
3232

3333
it("one or more is greedy", () => {
3434
let match = matches("a+", "aaaaa");
35-
console.log(match);
3635
expect(match).not.toBeNull();
3736
expect(match.matches[0]).toEqual("aaaaa");
3837
});

assembly/parser/parser.ts

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,16 @@ function isQuantifier(code: Char): bool {
1818
return code == Char.Question || code == Char.Plus || code == Char.Asterisk;
1919
}
2020

21+
// characters which have special meaning within character sets
22+
function isCharacterSetSpecialChar(code: Char): bool {
23+
return (
24+
code == Char.Caret ||
25+
code == Char.Minus ||
26+
code == Char.RightSquareBracket ||
27+
code == Char.Backslash
28+
);
29+
}
30+
2131
function isAssertion(code: u32): bool {
2232
return code == Char.Dollar || code == Char.Caret; // "$" or "^"
2333
}
@@ -228,16 +238,25 @@ export class Parser {
228238
while (this.currentToken != "]" || nodes.length == 0) {
229239
// lookahead for character range
230240
if (
231-
this.cursor + 1 < u32(this.input.length) &&
241+
this.cursor + 2 < u32(this.input.length) &&
242+
this.currentToken != "\\" &&
232243
this.input.charCodeAt(this.cursor + 1) == Char.Minus &&
233244
this.input.charCodeAt(this.cursor + 2) != Char.RightSquareBracket
234245
) {
235246
nodes.push(this.parseCharacterRange());
236247
} else {
237-
nodes.push(this.parseCharacter());
248+
if (
249+
this.currentToken == "\\" &&
250+
isCharacterSetSpecialChar(this.input.charCodeAt(this.cursor + 1))
251+
) {
252+
this.eatToken(Char.Backslash);
253+
}
254+
nodes.push(new CharacterNode(this.eatToken()));
238255
}
239256

240-
// TODO error if we run out of chars?
257+
if (this.cursor >= u32(this.input.length)) {
258+
throw new SyntaxError("Unterminated character class");
259+
}
241260
}
242261
this.eatToken(Char.RightSquareBracket);
243262
return new CharacterSetNode(nodes, negated);

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"scripts": {
77
"test": "npm run asbuild:untouched && npm run prettier:check && jest __tests__",
88
"test:suite": "npm run asbuild:untouched && jest __spec_tests__ --reporter=jest-summary-reporter",
9+
"jest": "jest __tests__",
910
"prettier:check": "prettier --check .",
1011
"prettier:write": "prettier --write .",
1112
"asbuild:untouched": "asc assembly/index.ts --target debug",

ts/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ globalAny.log = console.log;
55

66
import { RegExp } from "../assembly/regexp";
77

8-
const regexObj = new RegExp("[]a]");
9-
const match = regexObj.exec("]");
8+
const regexObj = new RegExp("[a\\\\c]");
9+
const match = regexObj.exec("\\");
1010

1111
console.log(match);

0 commit comments

Comments
 (0)