@@ -58,44 +58,78 @@ class Range {
58
58
to : i32 = - 1 ;
59
59
}
60
60
61
- export class Parser {
62
- currentToken : string = "" ;
61
+ class StringIterator {
62
+ current : u32 ;
63
63
cursor : u32 = 0 ;
64
64
65
- private constructor ( public input : string ) { }
65
+ constructor ( private sourceString : string ) {
66
+ this . current = this . sourceString . charCodeAt ( 0 ) ;
67
+ }
66
68
67
- static toAST ( input : string ) : AST {
68
- return new Parser ( input ) . toAST ( ) ;
69
+ lookahead ( distance : u32 ) : u32 {
70
+ return this . sourceString . charCodeAt ( this . cursor + distance ) ;
69
71
}
70
72
71
- private eatToken ( value : u32 = - 1 ) : u32 {
72
- const token = this . currentToken . charCodeAt ( 0 ) as u32 ;
73
- if ( value != - 1 && token != value ) {
74
- throw new Error ( "invalid token" ) ;
73
+ next ( ) : bool {
74
+ this . cursor ++ ;
75
+ if ( this . cursor >= u32 ( this . sourceString . length ) ) {
76
+ return false ;
75
77
}
76
- this . currentToken = this . input . charAt ( ++ this . cursor ) ;
77
- return token ;
78
+ this . current = this . sourceString . charCodeAt ( this . cursor ) ;
79
+ return true ;
80
+ }
81
+
82
+ currentAsString ( ) : string {
83
+ return String . fromCharCode ( this . current ) ;
84
+ }
85
+
86
+ more ( ) : bool {
87
+ return this . cursor < u32 ( this . sourceString . length ) ;
88
+ }
89
+
90
+ copy ( ) : StringIterator {
91
+ const iterator = new StringIterator ( this . sourceString ) ;
92
+ iterator . cursor = this . cursor ;
93
+ iterator . current = this . current ;
94
+ return iterator ;
78
95
}
96
+ }
97
+
98
+ export class Parser {
99
+ // currentToken: string = "";
100
+ // cursor: u32 = 0;
101
+ iterator : StringIterator ;
79
102
80
- private more ( ) : bool {
81
- return this . currentToken . length > 0 ;
103
+ private constructor ( input : string ) {
104
+ this . iterator = new StringIterator ( input ) ;
105
+ }
106
+
107
+ static toAST ( input : string ) : AST {
108
+ return new Parser ( input ) . toAST ( ) ;
82
109
}
83
110
84
- private resetCursor ( ) : void {
85
- this . cursor = 0 ;
86
- this . currentToken = this . input . charAt ( 0 ) ;
111
+ private eatToken ( value : u32 = - 1 ) : u32 {
112
+ const currentToken = this . iterator . current ;
113
+ if ( value != - 1 && this . iterator . current != value ) {
114
+ throw new Error ( "invalid token" ) ;
115
+ }
116
+ this . iterator . next ( ) ;
117
+ return currentToken ;
87
118
}
88
119
89
120
private toAST ( ) : AST {
90
- this . resetCursor ( ) ;
91
121
return new AST ( this . parseSequence ( ) ) ;
92
122
}
93
123
124
+ private currentCharCode ( ) : u32 {
125
+ return this . iterator . current ;
126
+ }
127
+
94
128
private parseCharacter ( ) : Node {
95
- let token = this . currentToken . charCodeAt ( 0 ) ;
129
+ let token = this . iterator . current ;
96
130
if ( token == Char . Backslash ) {
97
131
this . eatToken ( Char . Backslash ) ;
98
- token = this . currentToken . charCodeAt ( 0 ) ;
132
+ token = this . iterator . current ;
99
133
if ( isSpecialCharacter ( token ) ) {
100
134
this . eatToken ( ) ;
101
135
return new CharacterNode ( token ) ;
@@ -120,20 +154,20 @@ export class Parser {
120
154
121
155
private maybeParseRepetitionRange ( ) : Range {
122
156
// snapshot
123
- const previousCursor = this . cursor ;
157
+ const iteratorCopy = this . iterator . copy ( ) ;
124
158
this . eatToken ( Char . LeftCurlyBrace ) ;
125
159
126
160
let range = new Range ( ) ;
127
161
128
162
let firstDigit = true ;
129
163
let digitStr = "" ;
130
- while ( this . more ( ) ) {
131
- const token = this . currentToken . charCodeAt ( 0 ) ;
164
+ while ( this . iterator . more ( ) ) {
165
+ const token = this . iterator . current ;
132
166
if ( token == Char . RightParenthesis ) break ;
133
167
if ( firstDigit ) {
134
168
if ( isDigit ( token ) ) {
135
169
// if it is a digit, keep eating
136
- digitStr += this . currentToken ;
170
+ digitStr += this . iterator . currentAsString ( ) ;
137
171
} else {
138
172
range . from = digitStr . length ? < i32 > parseInt ( digitStr ) : - 1 ;
139
173
range . to = range . from ;
@@ -154,7 +188,7 @@ export class Parser {
154
188
} else {
155
189
if ( isDigit ( token ) ) {
156
190
// if it is a digit, keep eating
157
- digitStr += this . currentToken ;
191
+ digitStr += this . iterator . currentAsString ( ) ;
158
192
} else {
159
193
range . to = digitStr . length ? < i32 > parseInt ( digitStr ) : - 1 ;
160
194
if ( token == Char . RightCurlyBrace ) {
@@ -171,17 +205,16 @@ export class Parser {
171
205
}
172
206
173
207
// repetition not found - reset state
174
- this . cursor = previousCursor ;
175
- this . currentToken = this . input . charAt ( previousCursor ) ;
208
+ this . iterator = iteratorCopy ;
176
209
177
210
return range ;
178
211
}
179
212
180
213
// parses a sequence of chars
181
214
private parseSequence ( ) : Node {
182
215
let nodes = new Array < Node > ( ) ;
183
- while ( this . more ( ) ) {
184
- const token = this . currentToken . charCodeAt ( 0 ) ;
216
+ while ( this . iterator . more ( ) ) {
217
+ const token = this . iterator . current ;
185
218
if ( token == Char . RightParenthesis ) break ;
186
219
// @ts -ignore
187
220
if ( token == Char . VerticalBar ) {
@@ -227,34 +260,35 @@ export class Parser {
227
260
228
261
private parseCharacterSet ( ) : CharacterSetNode {
229
262
this . eatToken ( Char . LeftSquareBracket ) ;
230
- const token = this . currentToken . charCodeAt ( 0 ) ;
231
263
232
- const negated = token == Char . Caret ;
264
+ const negated = this . iterator . current == Char . Caret ;
233
265
if ( negated ) {
234
266
this . eatToken ( Char . Caret ) ;
235
267
}
236
268
237
269
const nodes = new Array < Node > ( ) ;
238
- while ( this . currentToken != "]" || nodes . length == 0 ) {
270
+ while (
271
+ this . iterator . current != Char . RightSquareBracket ||
272
+ nodes . length == 0
273
+ ) {
239
274
// lookahead for character range
240
275
if (
241
- this . cursor + 2 < u32 ( this . input . length ) &&
242
- this . currentToken != "\\" &&
243
- this . input . charCodeAt ( this . cursor + 1 ) == Char . Minus &&
244
- this . input . charCodeAt ( this . cursor + 2 ) != Char . RightSquareBracket
276
+ this . iterator . current != Char . Backslash &&
277
+ this . iterator . lookahead ( 1 ) == Char . Minus &&
278
+ this . iterator . lookahead ( 2 ) != Char . RightSquareBracket
245
279
) {
246
280
nodes . push ( this . parseCharacterRange ( ) ) ;
247
281
} else {
248
282
if (
249
- this . currentToken == "\\" &&
250
- isCharacterSetSpecialChar ( this . input . charCodeAt ( this . cursor + 1 ) )
283
+ this . iterator . current == Char . Backslash &&
284
+ isCharacterSetSpecialChar ( this . iterator . lookahead ( 1 ) )
251
285
) {
252
286
this . eatToken ( Char . Backslash ) ;
253
287
}
254
288
nodes . push ( new CharacterNode ( this . eatToken ( ) ) ) ;
255
289
}
256
290
257
- if ( this . cursor >= u32 ( this . input . length ) ) {
291
+ if ( ! this . iterator . more ( ) ) {
258
292
throw new SyntaxError ( "Unterminated character class" ) ;
259
293
}
260
294
}
0 commit comments