Skip to content

character set => move logic into parser #12

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jan 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions test/data.test.js → __spec_tests__/data.test.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
const { RegExp } = require("./util");
const { RegExp } = require("../__tests__/util");
const fs = require("fs");
const { fail } = require("assert");

const data = fs.readFileSync("./test/test.dat", "utf8");
const data = fs.readFileSync("./__spec_tests__/test.dat", "utf8");
const lines = data.split("\n");

const matches = (regex, value) => {
Expand Down
File renamed without changes.
15 changes: 15 additions & 0 deletions __tests__/alternations.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
const { RegExp, expectNotMatch, expectMatch, matches } = require("./util");

it("or", () => {
expectMatch("a|b", ["b", "a"]);
expectNotMatch("a|b", ["c"]);
expectMatch("a|br", ["br", "a"]);
expectNotMatch("a|br", ["b", "c"]);
});

it("or multi-term", () => {
expectMatch("a|b|c", ["b", "a", "c"]);
expectNotMatch("a|b|c", ["d"]);
expectMatch("a|br|pc", ["br", "a", "pc"]);
expectNotMatch("a|br|pc", ["b", "pr"]);
});
19 changes: 19 additions & 0 deletions __tests__/boundary-assertions.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
const { RegExp, expectNotMatch, expectMatch, matches } = require("./util");

it("matches end of string", () => {
const regex = new RegExp("a$");
const match = regex.exec("ba");
expect(match.index).toEqual(1);
expect(match.matches[0]).toEqual("a");
expectNotMatch("a$", ["ab"]);
});

it("matches start of string", () => {
expectMatch("^a", ["a"]);
expectNotMatch("^a", ["ba"]);
});

it("handles escaped boundaries", () => {
expectMatch("\\^a", ["^a"]);
expectMatch("a\\$", ["a$"]);
});
35 changes: 35 additions & 0 deletions __tests__/capture-groups.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
const { RegExp, expectNotMatch, expectMatch, matches } = require("./util");

it("supports capture groups", () => {
let match = matches("a(\\d)a", "a3a");
expect(match.index).toEqual(0);
expect(match.input).toEqual("a3a");
expect(match.matches[0]).toEqual("a3a");
expect(match.matches[1]).toEqual("3");

match = matches("a(\\d)a", " a3a");
expect(match.index).toEqual(2);
expect(match.input).toEqual(" a3a");
expect(match.matches[0]).toEqual("a3a");
expect(match.matches[1]).toEqual("3");

match = matches("a(\\d*)a", "a3456a");
expect(match.index).toEqual(0);
expect(match.input).toEqual("a3456a");
expect(match.matches[0]).toEqual("a3456a");
expect(match.matches[1]).toEqual("3456");

match = matches("a*(\\d*)(a*)", "aaa456aaa");
expect(match.index).toEqual(0);
expect(match.input).toEqual("aaa456aaa");
expect(match.matches[0]).toEqual("aaa456aaa");
expect(match.matches[1]).toEqual("456");
expect(match.matches[2]).toEqual("aaa");
});

it.skip("should not return captured values for non-matching alternations", () => {
const match = matches("(a|b)c|a(b|c)", "ab");
expect(match.matches[0]).toEqual("ab");
expect(match.matches[1]).toEqual("");
expect(match.matches[2]).toEqual("b");
});
50 changes: 50 additions & 0 deletions __tests__/character-classes.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
const { RegExp, expectNotMatch, expectMatch, matches } = require("./util");

it("dot", () => {
expectMatch(".", [" ", "B", "|", "9"]);
expectNotMatch(".", ["", "\n"]);
});

it("digit", () => {
expectMatch("\\d", ["0", "9"]);
expectNotMatch("\\d", ["", "b"]);
});

it("non-digit", () => {
expectNotMatch("\\D", ["0", "9", ""]);
expectMatch("\\D", ["b", "|"]);
});

it("word", () => {
expectMatch("\\w", ["A", "a", "Z", "z", "0", "9", "_"]);
expectNotMatch("\\w", ["", "$"]);
});

it("not word", () => {
expectNotMatch("\\W", ["A", "a", "Z", "z", "0", "9", "_", ""]);
expectMatch("\\W", ["&", "$"]);
});

it("whitespace", () => {
expectMatch("\\s", ["\f", "\n", "\r", "\t", "\v"]);
expectNotMatch("\\s", ["", "a", "0"]);
});

it("not whitespace", () => {
expectNotMatch("\\S", ["", "\f", "\n", "\r", "\t", "\v"]);
expectMatch("\\S", ["a", "0"]);
});

it("tab, cr, lf, vt, ff", () => {
expectMatch("\\t", ["\t"]);
expectMatch("\\r", ["\r"]);
expectMatch("\\n", ["\n"]);
expectMatch("\\v", ["\v"]);
expectMatch("\\f", ["\f"]);
expectNotMatch("\\t", ["a", " ", ""]);
});

it("escaped dot", () => {
expectMatch("\\.", ["."]);
expectNotMatch("\\.", ["", "a"]);
});
52 changes: 52 additions & 0 deletions __tests__/character-sets.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
const { RegExp, expectNotMatch, expectMatch, matches } = require("./util");

it("matches discrete characters", () => {
expectMatch("[abce]", ["a", "b", "c", "e"]);
expectNotMatch("[abce]", ["", "f", "h"]);
});

it("throws an error if no closing bracket is found", () => {
expect(() => new RegExp("[abce")).toThrow();
});

it("supports escaping of special characters", () => {
expectMatch("[a\\^b]", ["a", "b", "^"]);
expectMatch("[a\\-c]", ["a", "c", "-"]);
expectMatch("[a\\]]", ["a", "]"]);
expectMatch("[a\\\\b]", ["a", "\\"]);
});

it("matches character ranges", () => {
expectMatch("[a-c]", ["a", "b", "c"]);
expectNotMatch("[a-c]", ["d", "e", ""]);
expectMatch("[K-M]", ["K", "L", "M"]);
expectNotMatch("[K-M]", ["9", "J"]);
expectMatch("[0-9]", ["0", "9"]);
expectNotMatch("[0-9]", ["a", "A"]);
});

it("matches multiple ranges", () => {
expectMatch("[a-ce-f]", ["a", "b", "c", "e", "f"]);
expectNotMatch("[a-ce-f]", ["d"]);
});

it("supports closing brackets", () => {
expectMatch("[]a]", ["]", "a"]);
});

it("supports negated sets", () => {
expectNotMatch("[^a-c]", ["a", "b", "c"]);
expectMatch("[^a-c]", ["d", "e"]);
expectNotMatch("[^a-ce-f]", ["a", "b", "c", "e", "f"]);
expectMatch("[^a-ce-f]", ["d"]);
});

it("treats - as a literal", () => {
expectMatch("[-abc]", ["-", "a", "b", "c"]);
expectMatch("[abc-]", ["-", "a", "b", "c"]);
});

it("treats - as a literal in negated sets", () => {
expectNotMatch("[^-abc]", ["-", "a", "b", "c"]);
expectMatch("[^-abc]", ["1", "A"]);
});
11 changes: 11 additions & 0 deletions __tests__/characters.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
const { RegExp, expectNotMatch, expectMatch, matches } = require("./util");

it("single character", () => {
expectMatch("a", ["a"]);
expectNotMatch("a", ["fish", ""]);
});

it("concatenation", () => {
expectMatch("ab", ["ab"]);
expectNotMatch("ab", ["aac", "aa", ""]);
});
71 changes: 71 additions & 0 deletions __tests__/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
const { RegExp, expectNotMatch, expectMatch, matches } = require("./util");

describe("regexp", () => {
it("match returns correct substring", () => {
const match = matches("\\d", "asd123asd");
expect(match.index).toEqual(3);
expect(match.input).toEqual("asd123asd");
expect(match.matches[0]).toEqual("1");
});

describe("global mode", () => {
it("increments lastIndex", () => {
const regex = new RegExp("\\d+", "g");
const match = regex.exec("dog 23 fish 45 cat");
expect(match.matches[0]).toEqual("23");
expect(regex.lastIndex).toEqual(6);
});

it("uses lastIndex to support multiple matches", () => {
const regex = new RegExp("\\d+", "g");

let match = regex.exec("dog 23 fish 45 cat");
expect(match.matches[0]).toEqual("23");
expect(regex.lastIndex).toEqual(6);

match = regex.exec("dog 23 fish 45 cat");
expect(match.matches[0]).toEqual("45");
expect(regex.lastIndex).toEqual(14);

match = regex.exec("dog 23 fish 45 cat");
expect(match).toBeNull();
expect(regex.lastIndex).toEqual(0);
});
});

describe("non-global mode", () => {
it("doesn't increment lastIndex", () => {
const regex = new RegExp("\\d+");

let match = regex.exec("dog 23 fish 45 cat");
expect(match.matches[0]).toEqual("23");
expect(regex.lastIndex).toEqual(0);

match = regex.exec("dog 23 fish 45 cat");
expect(match.matches[0]).toEqual("23");
expect(regex.lastIndex).toEqual(0);
});
});
});

describe("use cases", () => {
it("matches combinations", () => {
expectMatch("\\s\\w*", [" bar"]);
expectMatch("\\S\\w*", ["foo"]);
});

it("email", () => {
const regex = ".+@.+\\..+";
expect(matches(regex, "[email protected]")).toBeTruthy();
expect(matches(regex, "gmail")).toBeFalsy();

const capturingRegex = "(.+)@(.+)\\.(.+)";
expect(matches(capturingRegex, "[email protected]")).toBeTruthy();

match = matches(capturingRegex, "[email protected]");
expect(match.matches[0]).toEqual("[email protected]");
expect(match.matches[1]).toEqual("colin");
expect(match.matches[2]).toEqual("gmail");
expect(match.matches[3]).toEqual("com");
});
});
37 changes: 37 additions & 0 deletions __tests__/quantifiers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
const { RegExp, expectNotMatch, expectMatch, matches } = require("./util");

it("matches empty strings", () => {
expectMatch("a?", [""]);
expectMatch("a*", [""]);
});

it("zero or one", () => {
expectMatch("a?", ["a"]);
// expectNotMatch("a?", ["bc"]);
});

it("one or more", () => {
expectMatch("a+", ["a", "aa"]);
expectNotMatch("a+", [""]);
});

it("zero or more", () => {
expectMatch("a*", ["aa", "aaaa"]);
});

it("multiple rules", () => {
expectMatch("a*b", ["b", "ab", "aaaab"]);
expectNotMatch("a*b", ["aaaad"]);
});

it("zero or more is greedy", () => {
let match = matches("a*", "aaaaa");
expect(match).not.toBeNull();
expect(match.matches[0]).toEqual("aaaaa");
});

it("one or more is greedy", () => {
let match = matches("a+", "aaaaa");
expect(match).not.toBeNull();
expect(match.matches[0]).toEqual("aaaaa");
});
27 changes: 27 additions & 0 deletions __tests__/range-quantifiers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
const { RegExp, expectNotMatch, expectMatch, matches } = require("./util");

it("handles single quantifier", () => {
expectMatch("a{2}", ["aa"]);
expectMatch("ba{2}", ["baa"]);
expectMatch("ba{1}b", ["bab"]);
});

it("handles open upper bound quantifiers", () => {
expectMatch("a{2,}", ["aa", "aaaaa"]);
expectMatch("ba{2,}", ["baa", "baaaaaaa"]);
expectMatch("ba{1,}b", ["bab", "baaaaaab"]);
});

it("handles explicit upper bound quantifiers", () => {
const match = matches("a{2,4}", "aaaaaaaaaa");
expect(match.matches[0]).toEqual("aaaa");
});

it("handles zero value quantifier", () => {
expectMatch("ba{0}b", ["bb"]);
});

it("handles quantifiers within alternates", () => {
expectMatch("a{2}|b{2}", ["bb", "aa"]);
expectNotMatch("a{2}|b{2}", ["cc"]);
});
31 changes: 29 additions & 2 deletions test/util.js → __tests__/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ global.TextDecoder = require("text-encoding").TextDecoder;
const fs = require("fs");
const loader = require("@assemblyscript/loader/umd/index");

class RegExpProxy {
class RegExp {
constructor(regex, flags = "") {
this.wasmModule = loader.instantiateSync(
fs.readFileSync("./build/untouched.wasm"),
Expand Down Expand Up @@ -80,4 +80,31 @@ class RegExpProxy {
}
}

module.exports.RegExp = RegExpProxy;
const expectMatch = (regex, arr) => {
arr.forEach((value) => {
const regexp = new RegExp(regex);
const match = regexp.exec(value);
expect(match).not.toBeNull();
expect(match.matches[0]).toEqual(value);
});
};

const expectNotMatch = (regex, arr) => {
arr.forEach((value) => {
const regexp = new RegExp(regex);
const match = regexp.exec(value);
expect(match).toBeNull();
});
};

const matches = (regex, value) => {
const regexp = new RegExp(regex);
return regexp.exec(value);
};

test.todo("no tests in this file!");

module.exports.RegExp = RegExp;
module.exports.matches = matches;
module.exports.expectNotMatch = expectNotMatch;
module.exports.expectMatch = expectMatch;
3 changes: 2 additions & 1 deletion assembly/char.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ export const enum Char {
LeftParenthesis = 0x28,
RightParenthesis = 0x29,
Asterisk = 0x2a, // "*"
Comma = 0x2c, // "*"
Plus = 0x2b, // "+"
Comma = 0x2c, // "*"
Minus = 0x2d, // "-"
Dot = 0x2e, // "."
Zero = 0x30,
Question = 0x3f, // "?"
Expand Down
Loading