Skip to content

Commit 2c72bb1

Browse files
committed
RegExp refactoring and improvements
Move RegExp bytecode functions to a separate file. Optimize bytecode lenght on character matching. Implement a basic RegExp cache to optimize memory usage on duplicated RegExp in JS files. Also fix minor style issues and add missing comments. Improve existing comments. JerryScript-DCO-1.0-Signed-off-by: László Langó [email protected]
1 parent 3f37769 commit 2c72bb1

16 files changed

+843
-600
lines changed

jerry-core/ecma/base/ecma-gc.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "jrt.h"
3333
#include "jrt-libc-includes.h"
3434
#include "jrt-bit-fields.h"
35+
#include "re-compiler.h"
3536
#include "vm-defines.h"
3637
#include "vm-stack.h"
3738

@@ -549,6 +550,11 @@ ecma_gc_run (void)
549550
ecma_gc_objects_lists[ECMA_GC_COLOR_BLACK] = NULL;
550551

551552
ecma_gc_visited_flip_flag = !ecma_gc_visited_flip_flag;
553+
554+
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
555+
/* Free RegExp bytecodes stored in cache */
556+
re_cache_gc_run ();
557+
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
552558
} /* ecma_gc_run */
553559

554560
/**

jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,9 @@
5353
* See also:
5454
* ECMA-262 v5, B.2.5.1
5555
*
56-
* @return ecma value
56+
* @return undefined - if compiled successfully
57+
* error ecma value - otherwise
58+
*
5759
* Returned value must be freed with ecma_free_value.
5860
*/
5961
static ecma_value_t
@@ -240,8 +242,10 @@ ecma_builtin_regexp_prototype_compile (ecma_value_t this_arg, /**< this argument
240242
* See also:
241243
* ECMA-262 v5, 15.10.6.2
242244
*
243-
* @return ecma value
244-
* Returned value must be freed with ecma_free_value.
245+
* @return array object containing the results - if the matched
246+
* null - otherwise
247+
*
248+
* May raise error, so returned value must be freed with ecma_free_value.
245249
*/
246250
static ecma_value_t
247251
ecma_builtin_regexp_prototype_exec (ecma_value_t this_arg, /**< this argument */
@@ -314,8 +318,10 @@ ecma_builtin_regexp_prototype_exec (ecma_value_t this_arg, /**< this argument */
314318
* See also:
315319
* ECMA-262 v5, 15.10.6.3
316320
*
317-
* @return ecma value
318-
* Returned value must be freed with ecma_free_value.
321+
* @return true - if match is not null
322+
* false - otherwise
323+
*
324+
* May raise error, so returned value must be freed with ecma_free_value.
319325
*/
320326
static ecma_value_t
321327
ecma_builtin_regexp_prototype_test (ecma_value_t this_arg, /**< this argument */
@@ -439,4 +445,4 @@ ecma_builtin_regexp_prototype_to_string (ecma_value_t this_arg) /**< this argume
439445
* @}
440446
*/
441447

442-
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
448+
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */

jerry-core/ecma/builtin-objects/ecma-builtin-regexp.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
* Handle calling [[Call]] of built-in RegExp object
4747
*
4848
* @return ecma value
49+
* Returned value must be freed with ecma_free_value.
4950
*/
5051
ecma_value_t
5152
ecma_builtin_regexp_dispatch_call (const ecma_value_t *arguments_list_p, /**< arguments list */
@@ -58,6 +59,7 @@ ecma_builtin_regexp_dispatch_call (const ecma_value_t *arguments_list_p, /**< ar
5859
* Handle calling [[Construct]] of built-in RegExp object
5960
*
6061
* @return ecma value
62+
* Returned value must be freed with ecma_free_value.
6163
*/
6264
ecma_value_t
6365
ecma_builtin_regexp_dispatch_construct (const ecma_value_t *arguments_list_p, /**< arguments list */
@@ -152,4 +154,4 @@ ecma_builtin_regexp_dispatch_construct (const ecma_value_t *arguments_list_p, /*
152154
* @}
153155
*/
154156

155-
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
157+
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */

jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.inc.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ ROUTINE (LIT_MAGIC_STRING_MATCH, ecma_builtin_string_prototype_object_match, 1,
7373
ROUTINE (LIT_MAGIC_STRING_REPLACE, ecma_builtin_string_prototype_object_replace, 2, 2)
7474
ROUTINE (LIT_MAGIC_STRING_SEARCH, ecma_builtin_string_prototype_object_search, 1, 1)
7575
ROUTINE (LIT_MAGIC_STRING_SPLIT, ecma_builtin_string_prototype_object_split, 2, 2)
76-
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
76+
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
7777

7878
ROUTINE (LIT_MAGIC_STRING_SUBSTRING, ecma_builtin_string_prototype_object_substring, 2, 2)
7979
ROUTINE (LIT_MAGIC_STRING_TO_LOWER_CASE_UL, ecma_builtin_string_prototype_object_to_lower_case, 0, 0)
@@ -84,7 +84,7 @@ ROUTINE (LIT_MAGIC_STRING_TRIM, ecma_builtin_string_prototype_object_trim, 0, 0)
8484

8585
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ANNEXB_BUILTIN
8686
ROUTINE (LIT_MAGIC_STRING_SUBSTR, ecma_builtin_string_prototype_object_substr, 2, 2)
87-
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ANNEXB_BUILTIN */
87+
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ANNEXB_BUILTIN */
8888

8989
#undef OBJECT_ID
9090
#undef SIMPLE_VALUE

jerry-core/ecma/operations/ecma-objects.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -958,13 +958,13 @@ ecma_object_get_class_name (ecma_object_t *obj_p) /**< object */
958958
{
959959
return LIT_MAGIC_STRING_DATE_UL;
960960
}
961-
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_DATE_BUILTIN */
961+
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_DATE_BUILTIN */
962962
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
963963
case ECMA_BUILTIN_ID_REGEXP_PROTOTYPE:
964964
{
965965
return LIT_MAGIC_STRING_REGEXP_UL;
966966
}
967-
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
967+
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
968968
default:
969969
{
970970
JERRY_ASSERT (ecma_builtin_is (obj_p, ECMA_BUILTIN_ID_GLOBAL));

jerry-core/ecma/operations/ecma-regexp-object.c

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,19 @@
5555
#define RE_GLOBAL_START_IDX 0
5656
#define RE_GLOBAL_END_IDX 1
5757

58+
/**
59+
* Check if a RegExp opcode is a capture group or not
60+
*/
61+
#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0)
62+
5863
/**
5964
* Parse RegExp flags (global, ignoreCase, multiline)
6065
*
6166
* See also: ECMA-262 v5, 15.10.4.1
6267
*
63-
* @return ecma value
68+
* @return empty ecma value - if parsed successfully
69+
* error ecma value - otherwise
70+
*
6471
* Returned value must be freed with ecma_free_value
6572
*/
6673
ecma_value_t
@@ -123,7 +130,7 @@ re_parse_regexp_flags (ecma_string_t *flags_str_p, /**< Input string with flags
123130
return ret_value;
124131
} /* re_parse_regexp_flags */
125132

126-
/*
133+
/**
127134
* Initializes the source, global, ignoreCase, multiline, and lastIndex properties of RegExp instance.
128135
*/
129136
void
@@ -223,11 +230,11 @@ re_initialize_props (ecma_object_t *re_obj_p, /**< RegExp obejct */
223230
*
224231
* See also: ECMA-262 v5, 15.10.4.1
225232
*
226-
* @return ecma value
233+
* @return constructed RegExp object
227234
* Returned value must be freed with ecma_free_value
228235
*/
229236
ecma_value_t
230-
ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *bytecode_p) /**< input pattern */
237+
ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *bytecode_p) /**< RegExp bytecode */
231238
{
232239
JERRY_ASSERT (bytecode_p != NULL);
233240

@@ -259,7 +266,9 @@ ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *bytecode_p) /**<
259266
*
260267
* See also: ECMA-262 v5, 15.10.4.1
261268
*
262-
* @return ecma value
269+
* @return constructed RegExp object - if pattern and flags were parsed successfully
270+
* error ecma value - otherwise
271+
*
263272
* Returned value must be freed with ecma_free_value
264273
*/
265274
ecma_value_t
@@ -367,8 +376,10 @@ re_canonicalize (ecma_char_t ch, /**< character */
367376
* See also:
368377
* ECMA-262 v5, 15.10.2.1
369378
*
370-
* @return ecma value
371-
* Returned value must be freed with ecma_free_value
379+
* @return true - if matched
380+
* false - otherwise
381+
*
382+
* May raise error, so returned value must be freed with ecma_free_value
372383
*/
373384
static ecma_value_t
374385
re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
@@ -400,7 +411,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
400411
}
401412

402413
bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE;
403-
ecma_char_t ch1 = (ecma_char_t) re_get_value (&bc_p); /* Already canonicalized. */
414+
ecma_char_t ch1 = (ecma_char_t) re_get_char (&bc_p); /* Already canonicalized. */
404415
ecma_char_t ch2 = re_canonicalize (lit_utf8_read_next (&str_curr_p), is_ignorecase);
405416
JERRY_DDLOG ("Character matching %d to %d: ", ch1, ch2);
406417

@@ -613,8 +624,8 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
613624

614625
while (num_of_ranges)
615626
{
616-
ecma_char_t ch1 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase);
617-
ecma_char_t ch2 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase);
627+
ecma_char_t ch1 = re_canonicalize (re_get_char (&bc_p), is_ignorecase);
628+
ecma_char_t ch2 = re_canonicalize (re_get_char (&bc_p), is_ignorecase);
618629
JERRY_DDLOG ("num_of_ranges=%d, ch1=%d, ch2=%d, curr_ch=%d; ",
619630
num_of_ranges, ch1, ch2, curr_ch);
620631

@@ -698,6 +709,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
698709
uint32_t offset = re_get_value (&bc_p);
699710
lit_utf8_byte_t *sub_str_p = NULL;
700711
ecma_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);
712+
701713
if (ecma_is_value_true (match_value))
702714
{
703715
*out_str_p = sub_str_p;
@@ -707,6 +719,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
707719
{
708720
return match_value;
709721
}
722+
710723
bc_p += offset;
711724
old_bc_p = bc_p;
712725
}
@@ -839,6 +852,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
839852
{
840853
offset = re_get_value (&bc_p);
841854
ecma_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);
855+
842856
if (ecma_is_value_true (match_value))
843857
{
844858
*out_str_p = sub_str_p;
@@ -848,6 +862,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
848862
{
849863
return match_value;
850864
}
865+
851866
bc_p += offset;
852867
old_bc_p = bc_p;
853868
}
@@ -915,6 +930,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
915930

916931
lit_utf8_byte_t *sub_str_p = NULL;
917932
ecma_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);
933+
918934
if (ecma_is_value_true (match_value))
919935
{
920936
*out_str_p = sub_str_p;
@@ -1225,7 +1241,13 @@ re_set_result_array_properties (ecma_object_t *array_obj_p, /**< result array */
12251241
* RegExp helper function to start the recursive matching algorithm
12261242
* and create the result Array object
12271243
*
1228-
* @return ecma value
1244+
* See also:
1245+
* ECMA-262 v5, 15.10.6.2
1246+
*
1247+
* @return array object - if matched
1248+
* null - otherwise
1249+
*
1250+
* May raise error.
12291251
* Returned value must be freed with ecma_free_value
12301252
*/
12311253
ecma_value_t
@@ -1475,4 +1497,4 @@ ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */
14751497
* @}
14761498
*/
14771499

1478-
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
1500+
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */

jerry-core/ecma/operations/ecma-regexp-object.h

Lines changed: 20 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -32,48 +32,39 @@
3232
/**
3333
* RegExp flags
3434
*/
35-
#define RE_FLAG_GLOBAL (1u << 1) /* ECMA-262 v5, 15.10.7.2 */
36-
#define RE_FLAG_IGNORE_CASE (1u << 2) /* ECMA-262 v5, 15.10.7.3 */
37-
#define RE_FLAG_MULTILINE (1u << 3) /* ECMA-262 v5, 15.10.7.4 */
35+
typedef enum
36+
{
37+
RE_FLAG_GLOBAL = (1u << 1), /**< ECMA-262 v5, 15.10.7.2 */
38+
RE_FLAG_IGNORE_CASE = (1u << 2), /**< ECMA-262 v5, 15.10.7.3 */
39+
RE_FLAG_MULTILINE = (1u << 3) /**< ECMA-262 v5, 15.10.7.4 */
40+
} re_flags_t;
3841

3942
/**
4043
* RegExp executor context
4144
*/
4245
typedef struct
4346
{
44-
lit_utf8_byte_t **saved_p; /**< saved result string pointers, ECMA 262 v5, 15.10.2.1, State */
47+
lit_utf8_byte_t **saved_p; /**< saved result string pointers, ECMA 262 v5, 15.10.2.1, State */
4548
const lit_utf8_byte_t *input_start_p; /**< start of input pattern string */
46-
const lit_utf8_byte_t *input_end_p; /**< end of input pattern string */
47-
uint32_t num_of_captures; /**< number of capture groups */
48-
uint32_t num_of_non_captures; /**< number of non-capture groups */
49-
uint32_t *num_of_iterations_p; /**< number of iterations */
50-
uint16_t flags; /**< RegExp flags */
49+
const lit_utf8_byte_t *input_end_p; /**< end of input pattern string */
50+
uint32_t num_of_captures; /**< number of capture groups */
51+
uint32_t num_of_non_captures; /**< number of non-capture groups */
52+
uint32_t *num_of_iterations_p; /**< number of iterations */
53+
uint16_t flags; /**< RegExp flags */
5154
} re_matcher_ctx_t;
5255

53-
extern ecma_value_t
54-
ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *);
55-
56-
extern ecma_value_t
57-
ecma_op_create_regexp_object (ecma_string_t *, ecma_string_t *);
58-
59-
extern ecma_value_t
60-
ecma_regexp_exec_helper (ecma_value_t, ecma_value_t, bool);
61-
62-
extern ecma_char_t
63-
re_canonicalize (ecma_char_t, bool);
64-
extern void
65-
re_set_result_array_properties (ecma_object_t *, ecma_string_t *, uint32_t, int32_t);
66-
67-
extern ecma_value_t
68-
re_parse_regexp_flags (ecma_string_t *, uint16_t *);
69-
70-
extern void
71-
re_initialize_props (ecma_object_t *, ecma_string_t *, uint16_t);
56+
ecma_value_t ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *);
57+
ecma_value_t ecma_op_create_regexp_object (ecma_string_t *, ecma_string_t *);
58+
ecma_value_t ecma_regexp_exec_helper (ecma_value_t, ecma_value_t, bool);
59+
ecma_char_t re_canonicalize (ecma_char_t, bool);
60+
void re_set_result_array_properties (ecma_object_t *, ecma_string_t *, uint32_t, int32_t);
61+
ecma_value_t re_parse_regexp_flags (ecma_string_t *, uint16_t *);
62+
void re_initialize_props (ecma_object_t *, ecma_string_t *, uint16_t);
7263

7364
/**
7465
* @}
7566
* @}
7667
*/
7768

78-
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
69+
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
7970
#endif /* !ECMA_REGEXP_OBJECT_H */

jerry-core/jerry.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1655,9 +1655,9 @@ jerry_cleanup (void)
16551655

16561656
bool is_show_mem_stats = ((jerry_flags & JERRY_FLAG_MEM_STATS) != 0);
16571657

1658+
vm_finalize ();
16581659
ecma_finalize ();
16591660
lit_finalize ();
1660-
vm_finalize ();
16611661
mem_finalize (is_show_mem_stats);
16621662
} /* jerry_cleanup */
16631663

jerry-core/parser/js/js-lexer.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1812,7 +1812,6 @@ lexer_construct_regexp_object (parser_context_t *context_p, /**< context */
18121812
ecma_value_t completion_value;
18131813

18141814
ecma_string_t *pattern_str_p = ecma_new_ecma_string_from_utf8 (regex_start_p, length);
1815-
// FIXME: check return value of 're_compile_bytecode' and throw an error
18161815
completion_value = re_compile_bytecode (&re_bytecode_p,
18171816
pattern_str_p,
18181817
current_flags);

0 commit comments

Comments
 (0)