Skip to content

Commit 43dd8f0

Browse files
Added string iterator concept to parser
1 parent 1cadee3 commit 43dd8f0

File tree

2 files changed

+74
-40
lines changed

2 files changed

+74
-40
lines changed

assembly/parser/parser.ts

Lines changed: 72 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -58,44 +58,78 @@ class Range {
5858
to: i32 = -1;
5959
}
6060

61-
export class Parser {
62-
currentToken: string = "";
61+
class StringIterator {
62+
current: u32;
6363
cursor: u32 = 0;
6464

65-
private constructor(public input: string) {}
65+
constructor(private sourceString: string) {
66+
this.current = this.sourceString.charCodeAt(0);
67+
}
6668

67-
static toAST(input: string): AST {
68-
return new Parser(input).toAST();
69+
lookahead(distance: u32): u32 {
70+
return this.sourceString.charCodeAt(this.cursor + distance);
6971
}
7072

71-
private eatToken(value: u32 = -1): u32 {
72-
const token = this.currentToken.charCodeAt(0) as u32;
73-
if (value != -1 && token != value) {
74-
throw new Error("invalid token");
73+
next(): bool {
74+
this.cursor++;
75+
if (this.cursor >= u32(this.sourceString.length)) {
76+
return false;
7577
}
76-
this.currentToken = this.input.charAt(++this.cursor);
77-
return token;
78+
this.current = this.sourceString.charCodeAt(this.cursor);
79+
return true;
80+
}
81+
82+
currentAsString(): string {
83+
return String.fromCharCode(this.current);
84+
}
85+
86+
more(): bool {
87+
return this.cursor < u32(this.sourceString.length);
88+
}
89+
90+
copy(): StringIterator {
91+
const iterator = new StringIterator(this.sourceString);
92+
iterator.cursor = this.cursor;
93+
iterator.current = this.current;
94+
return iterator;
7895
}
96+
}
97+
98+
export class Parser {
99+
// currentToken: string = "";
100+
// cursor: u32 = 0;
101+
iterator: StringIterator;
79102

80-
private more(): bool {
81-
return this.currentToken.length > 0;
103+
private constructor(input: string) {
104+
this.iterator = new StringIterator(input);
105+
}
106+
107+
static toAST(input: string): AST {
108+
return new Parser(input).toAST();
82109
}
83110

84-
private resetCursor(): void {
85-
this.cursor = 0;
86-
this.currentToken = this.input.charAt(0);
111+
private eatToken(value: u32 = -1): u32 {
112+
const currentToken = this.iterator.current;
113+
if (value != -1 && this.iterator.current != value) {
114+
throw new Error("invalid token");
115+
}
116+
this.iterator.next();
117+
return currentToken;
87118
}
88119

89120
private toAST(): AST {
90-
this.resetCursor();
91121
return new AST(this.parseSequence());
92122
}
93123

124+
private currentCharCode(): u32 {
125+
return this.iterator.current;
126+
}
127+
94128
private parseCharacter(): Node {
95-
let token = this.currentToken.charCodeAt(0);
129+
let token = this.iterator.current;
96130
if (token == Char.Backslash) {
97131
this.eatToken(Char.Backslash);
98-
token = this.currentToken.charCodeAt(0);
132+
token = this.iterator.current;
99133
if (isSpecialCharacter(token)) {
100134
this.eatToken();
101135
return new CharacterNode(token);
@@ -120,20 +154,20 @@ export class Parser {
120154

121155
private maybeParseRepetitionRange(): Range {
122156
// snapshot
123-
const previousCursor = this.cursor;
157+
const iteratorCopy = this.iterator.copy();
124158
this.eatToken(Char.LeftCurlyBrace);
125159

126160
let range = new Range();
127161

128162
let firstDigit = true;
129163
let digitStr = "";
130-
while (this.more()) {
131-
const token = this.currentToken.charCodeAt(0);
164+
while (this.iterator.more()) {
165+
const token = this.iterator.current;
132166
if (token == Char.RightParenthesis) break;
133167
if (firstDigit) {
134168
if (isDigit(token)) {
135169
// if it is a digit, keep eating
136-
digitStr += this.currentToken;
170+
digitStr += this.iterator.currentAsString();
137171
} else {
138172
range.from = digitStr.length ? <i32>parseInt(digitStr) : -1;
139173
range.to = range.from;
@@ -154,7 +188,7 @@ export class Parser {
154188
} else {
155189
if (isDigit(token)) {
156190
// if it is a digit, keep eating
157-
digitStr += this.currentToken;
191+
digitStr += this.iterator.currentAsString();
158192
} else {
159193
range.to = digitStr.length ? <i32>parseInt(digitStr) : -1;
160194
if (token == Char.RightCurlyBrace) {
@@ -171,17 +205,16 @@ export class Parser {
171205
}
172206

173207
// repetition not found - reset state
174-
this.cursor = previousCursor;
175-
this.currentToken = this.input.charAt(previousCursor);
208+
this.iterator = iteratorCopy;
176209

177210
return range;
178211
}
179212

180213
// parses a sequence of chars
181214
private parseSequence(): Node {
182215
let nodes = new Array<Node>();
183-
while (this.more()) {
184-
const token = this.currentToken.charCodeAt(0);
216+
while (this.iterator.more()) {
217+
const token = this.iterator.current;
185218
if (token == Char.RightParenthesis) break;
186219
// @ts-ignore
187220
if (token == Char.VerticalBar) {
@@ -227,34 +260,35 @@ export class Parser {
227260

228261
private parseCharacterSet(): CharacterSetNode {
229262
this.eatToken(Char.LeftSquareBracket);
230-
const token = this.currentToken.charCodeAt(0);
231263

232-
const negated = token == Char.Caret;
264+
const negated = this.iterator.current == Char.Caret;
233265
if (negated) {
234266
this.eatToken(Char.Caret);
235267
}
236268

237269
const nodes = new Array<Node>();
238-
while (this.currentToken != "]" || nodes.length == 0) {
270+
while (
271+
this.iterator.current != Char.RightSquareBracket ||
272+
nodes.length == 0
273+
) {
239274
// lookahead for character range
240275
if (
241-
this.cursor + 2 < u32(this.input.length) &&
242-
this.currentToken != "\\" &&
243-
this.input.charCodeAt(this.cursor + 1) == Char.Minus &&
244-
this.input.charCodeAt(this.cursor + 2) != Char.RightSquareBracket
276+
this.iterator.current != Char.Backslash &&
277+
this.iterator.lookahead(1) == Char.Minus &&
278+
this.iterator.lookahead(2) != Char.RightSquareBracket
245279
) {
246280
nodes.push(this.parseCharacterRange());
247281
} else {
248282
if (
249-
this.currentToken == "\\" &&
250-
isCharacterSetSpecialChar(this.input.charCodeAt(this.cursor + 1))
283+
this.iterator.current == Char.Backslash &&
284+
isCharacterSetSpecialChar(this.iterator.lookahead(1))
251285
) {
252286
this.eatToken(Char.Backslash);
253287
}
254288
nodes.push(new CharacterNode(this.eatToken()));
255289
}
256290

257-
if (this.cursor >= u32(this.input.length)) {
291+
if (!this.iterator.more()) {
258292
throw new SyntaxError("Unterminated character class");
259293
}
260294
}

ts/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ globalAny.log = console.log;
55

66
import { RegExp } from "../assembly/regexp";
77

8-
const regexObj = new RegExp("[a\\\\c]");
9-
const match = regexObj.exec("\\");
8+
const regexObj = new RegExp("[abce]");
9+
const match = regexObj.exec("a");
1010

1111
console.log(match);

0 commit comments

Comments
 (0)