Skip to content

Commit f0ef9e1

Browse files
authored
Implement RegExp 'v' flag, part 1 (#229)
This commit implements the flag itself and teaches the regex engine to reject previously accepted patterns when in unicodeSets mode. Refs: #228
1 parent d1852b5 commit f0ef9e1

File tree

5 files changed

+199
-2
lines changed

5 files changed

+199
-2
lines changed

libregexp.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,15 @@ typedef enum {
6363

6464
#define TMP_BUF_SIZE 128
6565

66+
// invariant: is_unicode ^ unicode_sets (or neither, but not both)
6667
typedef struct {
6768
DynBuf byte_code;
6869
const uint8_t *buf_ptr;
6970
const uint8_t *buf_end;
7071
const uint8_t *buf_start;
7172
int re_flags;
7273
BOOL is_unicode;
74+
BOOL unicode_sets;
7375
BOOL ignore_case;
7476
BOOL dotall;
7577
int capture_count;
@@ -853,6 +855,8 @@ static int re_emit_range(REParseState *s, const CharRange *cr)
853855
return 0;
854856
}
855857

858+
// s->unicode turns patterns like []] into syntax errors
859+
// s->unicode_sets turns more patterns into errors, like [a-] or [[]
856860
static int re_parse_char_class(REParseState *s, const uint8_t **pp)
857861
{
858862
const uint8_t *p;
@@ -864,17 +868,43 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
864868
cr_init(cr, s->opaque, lre_realloc);
865869
p = *pp;
866870
p++; /* skip '[' */
871+
872+
if (s->unicode_sets) {
873+
static const char verboten[] =
874+
"()[{}/-|" "\0"
875+
"&&!!##$$%%**++,,..::;;<<==>>??@@``~~" "\0"
876+
"^^^_^^";
877+
const char *s = verboten;
878+
int n = 1;
879+
do {
880+
if (!memcmp(s, p, n))
881+
if (p[n] == ']')
882+
goto invalid_class_range;
883+
s += n;
884+
if (!*s) {
885+
s++;
886+
n++;
887+
}
888+
} while (n < 4);
889+
}
890+
867891
invert = FALSE;
868892
if (*p == '^') {
869893
p++;
870894
invert = TRUE;
871895
}
896+
872897
for(;;) {
873898
if (*p == ']')
874899
break;
875900
c1 = get_class_atom(s, cr1, &p, TRUE);
876901
if ((int)c1 < 0)
877902
goto fail;
903+
if (*p == '-' && p[1] == ']' && s->unicode_sets) {
904+
if (c1 >= CLASS_RANGE_BASE)
905+
cr_free(cr1);
906+
goto invalid_class_range;
907+
}
878908
if (*p == '-' && p[1] != ']') {
879909
const uint8_t *p0 = p + 1;
880910
if (c1 >= CLASS_RANGE_BASE) {
@@ -1843,6 +1873,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
18431873
is_sticky = ((re_flags & LRE_FLAG_STICKY) != 0);
18441874
s->ignore_case = ((re_flags & LRE_FLAG_IGNORECASE) != 0);
18451875
s->dotall = ((re_flags & LRE_FLAG_DOTALL) != 0);
1876+
s->unicode_sets = ((re_flags & LRE_FLAG_UNICODE_SETS) != 0);
18461877
s->capture_count = 1;
18471878
s->total_capture_count = -1;
18481879
s->has_named_captures = -1;

libregexp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@
3737
#define LRE_FLAG_UNICODE (1 << 4)
3838
#define LRE_FLAG_STICKY (1 << 5)
3939
#define LRE_FLAG_INDICES (1 << 6) /* Unused by libregexp, just recorded. */
40-
4140
#define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */
41+
#define LRE_FLAG_UNICODE_SETS (1 << 8)
4242

4343
uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
4444
const char *buf, size_t buf_len, int re_flags,

quickjs.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40730,6 +40730,9 @@ static JSValue js_compile_regexp(JSContext *ctx, JSValue pattern,
4073040730
case 'u':
4073140731
mask = LRE_FLAG_UNICODE;
4073240732
break;
40733+
case 'v':
40734+
mask = LRE_FLAG_UNICODE_SETS;
40735+
break;
4073340736
case 'y':
4073440737
mask = LRE_FLAG_STICKY;
4073540738
break;
@@ -40746,6 +40749,10 @@ static JSValue js_compile_regexp(JSContext *ctx, JSValue pattern,
4074640749
JS_FreeCString(ctx, str);
4074740750
}
4074840751

40752+
if (re_flags & LRE_FLAG_UNICODE)
40753+
if (re_flags & LRE_FLAG_UNICODE_SETS)
40754+
return JS_ThrowSyntaxError(ctx, "invalid regular expression flags");
40755+
4074940756
str = JS_ToCStringLen2(ctx, &len, pattern, !(re_flags & LRE_FLAG_UNICODE));
4075040757
if (!str)
4075140758
return JS_EXCEPTION;
@@ -41067,6 +41074,11 @@ static JSValue js_regexp_get_flags(JSContext *ctx, JSValue this_val)
4106741074
goto exception;
4106841075
if (res)
4106941076
*p++ = 'u';
41077+
res = JS_ToBoolFree(ctx, JS_GetPropertyStr(ctx, this_val, "unicodeSets"));
41078+
if (res < 0)
41079+
goto exception;
41080+
if (res)
41081+
*p++ = 'v';
4107041082
res = JS_ToBoolFree(ctx, JS_GetPropertyStr(ctx, this_val, "sticky"));
4107141083
if (res < 0)
4107241084
goto exception;
@@ -42152,6 +42164,7 @@ static const JSCFunctionListEntry js_regexp_proto_funcs[] = {
4215242164
JS_CGETSET_MAGIC_DEF("multiline", js_regexp_get_flag, NULL, LRE_FLAG_MULTILINE ),
4215342165
JS_CGETSET_MAGIC_DEF("dotAll", js_regexp_get_flag, NULL, LRE_FLAG_DOTALL ),
4215442166
JS_CGETSET_MAGIC_DEF("unicode", js_regexp_get_flag, NULL, LRE_FLAG_UNICODE ),
42167+
JS_CGETSET_MAGIC_DEF("unicodeSets", js_regexp_get_flag, NULL, LRE_FLAG_UNICODE_SETS ),
4215542168
JS_CGETSET_MAGIC_DEF("sticky", js_regexp_get_flag, NULL, LRE_FLAG_STICKY ),
4215642169
JS_CGETSET_MAGIC_DEF("hasIndices", js_regexp_get_flag, NULL, LRE_FLAG_INDICES ),
4215742170
JS_CFUNC_DEF("exec", 1, js_regexp_exec ),

test262.conf

Lines changed: 143 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ regexp-lookbehind
157157
regexp-match-indices
158158
regexp-named-groups
159159
regexp-unicode-property-escapes
160-
regexp-v-flag=skip
160+
regexp-v-flag
161161
resizable-arraybuffer=skip
162162
rest-parameters
163163
Set
@@ -223,5 +223,147 @@ test262/test/built-ins/ThrowTypeError/unique-per-realm-function-proto.js
223223
#test262/test/built-ins/RegExp/CharacterClassEscapes/
224224
#test262/test/built-ins/RegExp/property-escapes/
225225

226+
# in progress regexp-v-flag support, see https://github.com/quickjs-ng/quickjs/issues/228
227+
test262/test/built-ins/RegExp/property-escapes/generated/strings/Basic_Emoji-negative-CharacterClass.js
228+
test262/test/built-ins/RegExp/property-escapes/generated/strings/Basic_Emoji-negative-P.js
229+
test262/test/built-ins/RegExp/property-escapes/generated/strings/Basic_Emoji-negative-u.js
230+
test262/test/built-ins/RegExp/property-escapes/generated/strings/Basic_Emoji.js
231+
test262/test/built-ins/RegExp/property-escapes/generated/strings/Emoji_Keycap_Sequence-negative-CharacterClass.js
232+
test262/test/built-ins/RegExp/property-escapes/generated/strings/Emoji_Keycap_Sequence-negative-P.js
233+
test262/test/built-ins/RegExp/property-escapes/generated/strings/Emoji_Keycap_Sequence-negative-u.js
234+
test262/test/built-ins/RegExp/property-escapes/generated/strings/Emoji_Keycap_Sequence.js
235+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji-negative-CharacterClass.js
236+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji-negative-P.js
237+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji-negative-u.js
238+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji.js
239+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Flag_Sequence-negative-CharacterClass.js
240+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Flag_Sequence-negative-P.js
241+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Flag_Sequence-negative-u.js
242+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Flag_Sequence.js
243+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Modifier_Sequence-negative-CharacterClass.js
244+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Modifier_Sequence-negative-P.js
245+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Modifier_Sequence-negative-u.js
246+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Modifier_Sequence.js
247+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Tag_Sequence-negative-CharacterClass.js
248+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Tag_Sequence-negative-P.js
249+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Tag_Sequence-negative-u.js
250+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Tag_Sequence.js
251+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-CharacterClass.js
252+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-P.js
253+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-u.js
254+
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence.js
255+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-difference-character-class-escape.js
256+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-difference-character-class.js
257+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-difference-character-property-escape.js
258+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-difference-character.js
259+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-difference-property-of-strings-escape.js
260+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-difference-string-literal.js
261+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-character-class-escape.js
262+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-character-class.js
263+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-character-property-escape.js
264+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-character.js
265+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-property-of-strings-escape.js
266+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-string-literal.js
267+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-character-class-escape.js
268+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-character-class.js
269+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-character-property-escape.js
270+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-character.js
271+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-property-of-strings-escape.js
272+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-string-literal.js
273+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-union-character-class-escape.js
274+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-union-character-class.js
275+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-union-character-property-escape.js
276+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-union-character.js
277+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-union-property-of-strings-escape.js
278+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-union-string-literal.js
279+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-intersection-character-class-escape.js
280+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-intersection-character-class.js
281+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-intersection-character-property-escape.js
282+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-intersection-character.js
283+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-intersection-property-of-strings-escape.js
284+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-intersection-string-literal.js
285+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-union-character-class-escape.js
286+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-union-character-class.js
287+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-union-character-property-escape.js
288+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-union-character.js
289+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-union-property-of-strings-escape.js
290+
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-union-string-literal.js
291+
test262/test/built-ins/RegExp/unicodeSets/generated/character-difference-character-class-escape.js
292+
test262/test/built-ins/RegExp/unicodeSets/generated/character-difference-character-class.js
293+
test262/test/built-ins/RegExp/unicodeSets/generated/character-difference-character-property-escape.js
294+
test262/test/built-ins/RegExp/unicodeSets/generated/character-difference-character.js
295+
test262/test/built-ins/RegExp/unicodeSets/generated/character-difference-property-of-strings-escape.js
296+
test262/test/built-ins/RegExp/unicodeSets/generated/character-difference-string-literal.js
297+
test262/test/built-ins/RegExp/unicodeSets/generated/character-intersection-character-class-escape.js
298+
test262/test/built-ins/RegExp/unicodeSets/generated/character-intersection-character-class.js
299+
test262/test/built-ins/RegExp/unicodeSets/generated/character-intersection-character-property-escape.js
300+
test262/test/built-ins/RegExp/unicodeSets/generated/character-intersection-character.js
301+
test262/test/built-ins/RegExp/unicodeSets/generated/character-intersection-property-of-strings-escape.js
302+
test262/test/built-ins/RegExp/unicodeSets/generated/character-intersection-string-literal.js
303+
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-class-escape.js
304+
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-class.js
305+
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-property-escape.js
306+
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character.js
307+
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-property-of-strings-escape.js
308+
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-string-literal.js
309+
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-class-escape.js
310+
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-class.js
311+
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-property-escape.js
312+
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character.js
313+
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-property-of-strings-escape.js
314+
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-string-literal.js
315+
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-class-escape.js
316+
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-class.js
317+
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-property-escape.js
318+
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character.js
319+
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-union-property-of-strings-escape.js
320+
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-union-string-literal.js
321+
test262/test/built-ins/RegExp/unicodeSets/generated/character-union-character-class-escape.js
322+
test262/test/built-ins/RegExp/unicodeSets/generated/character-union-character-class.js
323+
test262/test/built-ins/RegExp/unicodeSets/generated/character-union-character-property-escape.js
324+
test262/test/built-ins/RegExp/unicodeSets/generated/character-union-character.js
325+
test262/test/built-ins/RegExp/unicodeSets/generated/character-union-property-of-strings-escape.js
326+
test262/test/built-ins/RegExp/unicodeSets/generated/character-union-string-literal.js
327+
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character-class-escape.js
328+
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character-class.js
329+
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character-property-escape.js
330+
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character.js
331+
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-property-of-strings-escape.js
332+
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-string-literal.js
333+
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-intersection-character-class-escape.js
334+
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-intersection-character-class.js
335+
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-intersection-character-property-escape.js
336+
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-intersection-character.js
337+
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-intersection-property-of-strings-escape.js
338+
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-intersection-string-literal.js
339+
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-character-class-escape.js
340+
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-character-class.js
341+
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-character-property-escape.js
342+
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-character.js
343+
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-property-of-strings-escape.js
344+
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-string-literal.js
345+
test262/test/built-ins/RegExp/unicodeSets/generated/rgi-emoji-13.1.js
346+
test262/test/built-ins/RegExp/unicodeSets/generated/rgi-emoji-14.0.js
347+
test262/test/built-ins/RegExp/unicodeSets/generated/rgi-emoji-15.0.js
348+
test262/test/built-ins/RegExp/unicodeSets/generated/rgi-emoji-15.1.js
349+
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-difference-character-class-escape.js
350+
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-difference-character-class.js
351+
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-difference-character-property-escape.js
352+
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-difference-character.js
353+
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-difference-property-of-strings-escape.js
354+
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-difference-string-literal.js
355+
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character-class-escape.js
356+
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character-class.js
357+
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character-property-escape.js
358+
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character.js
359+
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-intersection-property-of-strings-escape.js
360+
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-intersection-string-literal.js
361+
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-union-character-class-escape.js
362+
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-union-character-class.js
363+
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-union-character-property-escape.js
364+
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-union-character.js
365+
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-union-property-of-strings-escape.js
366+
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-union-string-literal.js
367+
226368
[tests]
227369
# list test files or use config.testdir

tests/test_builtin.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,17 @@ function test_regexp()
646646
assert(/{1a}/.toString(), "/{1a}/");
647647
a = /a{1+/.exec("a{11");
648648
assert(a, ["a{11"] );
649+
650+
eval("/[a-]/"); // accepted with no flag
651+
eval("/[a-]/u"); // accepted with 'u' flag
652+
653+
let ex;
654+
try {
655+
eval("/[a-]/v"); // rejected with 'v' flag
656+
} catch (_ex) {
657+
ex = _ex;
658+
}
659+
assert(ex?.message, "invalid class range");
649660
}
650661

651662
function test_symbol()

0 commit comments

Comments
 (0)