Skip to content

Commit d0b7bdf

Browse files
committed
Add RegExp cache, optimize CHAR_CLASS
JerryScript-DCO-1.0-Signed-off-by: László Langó [email protected]
1 parent ec0c9b1 commit d0b7bdf

File tree

4 files changed

+127
-63
lines changed

4 files changed

+127
-63
lines changed

jerry-core/ecma/operations/ecma-regexp-object.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -618,8 +618,8 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
618618

619619
while (num_of_ranges)
620620
{
621-
ecma_char_t ch1 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase);
622-
ecma_char_t ch2 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase);
621+
ecma_char_t ch1 = re_canonicalize (re_get_char (&bc_p), is_ignorecase);
622+
ecma_char_t ch2 = re_canonicalize (re_get_char (&bc_p), is_ignorecase);
623623
JERRY_DDLOG ("num_of_ranges=%d, ch1=%d, ch2=%d, curr_ch=%d; ",
624624
num_of_ranges, ch1, ch2, curr_ch);
625625

jerry-core/parser/regexp/re-bytecode.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,8 +420,8 @@ re_dump_bytecode (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */
420420
JERRY_DLOG ("%d", num_of_class);
421421
while (num_of_class)
422422
{
423-
JERRY_DLOG (" %d", re_get_value (&bytecode_p));
424-
JERRY_DLOG ("-%d", re_get_value (&bytecode_p));
423+
JERRY_DLOG (" %d", re_get_char (&bytecode_p));
424+
JERRY_DLOG ("-%d", re_get_char (&bytecode_p));
425425
num_of_class--;
426426
}
427427
JERRY_DLOG (", ");

jerry-core/parser/regexp/re-bytecode.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@
3131
* @{
3232
*/
3333

34+
/**
35+
* Size of the RegExp bytecode cache
36+
*/
37+
#define RE_CHACHE_SIZE 10u
38+
3439
/**
3540
* RegExp opcodes
3641
*/
@@ -116,4 +121,4 @@ void re_dump_bytecode (re_bytecode_ctx_t *bc_ctx);
116121
*/
117122

118123
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
119-
#endif RE_BYTECODE_H /* !RE_BYTECODE_H */
124+
#endif /* !RE_BYTECODE_H */

jerry-core/parser/regexp/re-compiler.c

Lines changed: 117 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,12 @@
4141
*/
4242
static void
4343
re_append_char_class (void *re_ctx_p, /**< RegExp compiler context */
44-
uint32_t start, /**< character class range from */
45-
uint32_t end) /**< character class range to */
44+
ecma_char_t start, /**< character class range from */
45+
ecma_char_t end) /**< character class range to */
4646
{
4747
re_compiler_ctx_t *ctx_p = (re_compiler_ctx_t *) re_ctx_p;
48-
re_append_u32 (ctx_p->bytecode_ctx_p, start);
49-
re_append_u32 (ctx_p->bytecode_ctx_p, end);
48+
re_append_char (ctx_p->bytecode_ctx_p, start);
49+
re_append_char (ctx_p->bytecode_ctx_p, end);
5050
ctx_p->parser_ctx_p->num_of_classes++;
5151
} /* re_append_char_class */
5252

@@ -202,7 +202,7 @@ re_insert_into_group_with_jump (re_compiler_ctx_t *re_ctx_p, /**< RegExp compile
202202
/**
203203
* Parse alternatives
204204
*
205-
* @return completion value
205+
* @return ecma value
206206
* Returned value must be freed with ecma_free_value
207207
*/
208208
static ecma_value_t
@@ -438,14 +438,54 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
438438
return ret_value;
439439
} /* re_parse_alternative */
440440

441+
static const re_compiled_code_t *re_cache[RE_CHACHE_SIZE];
442+
443+
/**
444+
* Search for the given pattern in the RegExp cache
445+
*
446+
* @return compiled bytecode - if found
447+
* NULL - otherwise
448+
*/
449+
static re_compiled_code_t *
450+
re_find_bytecode_in_cache (ecma_string_t *pattern_str_p, /**< pattern string */
451+
uint32_t *idx) /**< [out] index */
452+
{
453+
uint32_t free_idx = RE_CHACHE_SIZE;
454+
455+
for (*idx = 0u; *idx < RE_CHACHE_SIZE && re_cache[*idx] != NULL; (*idx)++)
456+
{
457+
re_compiled_code_t *cached_bytecode_p = re_cache[*idx];
458+
459+
if ((cached_bytecode_p->flags >> ECMA_BYTECODE_REF_SHIFT) > 0)
460+
{
461+
ecma_string_t *cached_pattern_str_p;
462+
cached_pattern_str_p = ECMA_GET_NON_NULL_POINTER (ecma_string_t, cached_bytecode_p->pattern_cp);
463+
464+
if (ecma_compare_ecma_strings (cached_pattern_str_p, pattern_str_p))
465+
{
466+
return re_cache[*idx];
467+
}
468+
}
469+
else
470+
{
471+
/* mark as free, so it can be overridden if the cache is full */
472+
free_idx = *idx;
473+
}
474+
}
475+
476+
*idx = free_idx;
477+
478+
return NULL;
479+
} /* re_find_bytecode_in_cache */
480+
441481
/**
442482
* Compilation of RegExp bytecode
443483
*
444-
* @return completion value
484+
* @return ecma value
445485
* Returned value must be freed with ecma_free_value
446486
*/
447487
ecma_value_t
448-
re_compile_bytecode (re_compiled_code_t **out_bytecode_p, /**< out:pointer to bytecode */
488+
re_compile_bytecode (re_compiled_code_t **out_bytecode_p, /**< [out] pointer to bytecode */
449489
ecma_string_t *pattern_str_p, /**< pattern */
450490
uint16_t flags) /**< flags */
451491
{
@@ -462,64 +502,83 @@ re_compile_bytecode (re_compiled_code_t **out_bytecode_p, /**< out:pointer to by
462502

463503
re_ctx.bytecode_ctx_p = &bc_ctx;
464504

465-
lit_utf8_size_t pattern_str_size = ecma_string_get_size (pattern_str_p);
466-
MEM_DEFINE_LOCAL_ARRAY (pattern_start_p, pattern_str_size, lit_utf8_byte_t);
505+
uint32_t cache_idx;
506+
*out_bytecode_p = re_find_bytecode_in_cache (pattern_str_p, &cache_idx);
467507

468-
ssize_t sz = ecma_string_to_utf8_string (pattern_str_p, pattern_start_p, (ssize_t) pattern_str_size);
469-
JERRY_ASSERT (sz >= 0);
508+
if (*out_bytecode_p != NULL)
509+
{
510+
ecma_bytecode_ref ((ecma_compiled_code_t *) *out_bytecode_p);
511+
}
512+
else
513+
{ /* not in the RegExp cache, so compile it */
514+
lit_utf8_size_t pattern_str_size = ecma_string_get_size (pattern_str_p);
515+
MEM_DEFINE_LOCAL_ARRAY (pattern_start_p, pattern_str_size, lit_utf8_byte_t);
470516

471-
re_parser_ctx_t parser_ctx;
472-
parser_ctx.input_start_p = pattern_start_p;
473-
parser_ctx.input_curr_p = pattern_start_p;
474-
parser_ctx.input_end_p = pattern_start_p + pattern_str_size;
475-
parser_ctx.num_of_groups = -1;
476-
re_ctx.parser_ctx_p = &parser_ctx;
517+
ssize_t sz = ecma_string_to_utf8_string (pattern_str_p, pattern_start_p, (ssize_t) pattern_str_size);
518+
JERRY_ASSERT (sz >= 0);
477519

478-
/* 1. Parse RegExp pattern */
479-
re_ctx.num_of_captures = 1;
480-
re_append_opcode (&bc_ctx, RE_OP_SAVE_AT_START);
520+
re_parser_ctx_t parser_ctx;
521+
parser_ctx.input_start_p = pattern_start_p;
522+
parser_ctx.input_curr_p = pattern_start_p;
523+
parser_ctx.input_end_p = pattern_start_p + pattern_str_size;
524+
parser_ctx.num_of_groups = -1;
525+
re_ctx.parser_ctx_p = &parser_ctx;
481526

482-
ECMA_TRY_CATCH (empty, re_parse_alternative (&re_ctx, true), ret_value);
527+
/* 1. Parse RegExp pattern */
528+
re_ctx.num_of_captures = 1;
529+
re_append_opcode (&bc_ctx, RE_OP_SAVE_AT_START);
483530

484-
/* 2. Check for invalid backreference */
485-
if (re_ctx.highest_backref >= re_ctx.num_of_captures)
486-
{
487-
ret_value = ecma_raise_syntax_error ("Invalid backreference.\n");
488-
}
489-
else
490-
{
491-
re_append_opcode (&bc_ctx, RE_OP_SAVE_AND_MATCH);
492-
re_append_opcode (&bc_ctx, RE_OP_EOF);
493-
494-
/* 3. Insert extra informations for bytecode header */
495-
re_compiled_code_t re_compiled_code;
496-
497-
re_compiled_code.flags = re_ctx.flags | (1u << ECMA_BYTECODE_REF_SHIFT);
498-
ECMA_SET_NON_NULL_POINTER (re_compiled_code.pattern_cp,
499-
ecma_copy_or_ref_ecma_string (pattern_str_p));
500-
re_compiled_code.num_of_captures = re_ctx.num_of_captures * 2;
501-
re_compiled_code.num_of_non_captures = re_ctx.num_of_non_captures;
502-
503-
re_bytecode_list_insert (&bc_ctx,
504-
0,
505-
(uint8_t *) &re_compiled_code,
506-
sizeof (re_compiled_code_t));
507-
}
508-
ECMA_FINALIZE (empty);
531+
ECMA_TRY_CATCH (empty, re_parse_alternative (&re_ctx, true), ret_value);
509532

510-
MEM_FINALIZE_LOCAL_ARRAY (pattern_start_p);
533+
/* 2. Check for invalid backreference */
534+
if (re_ctx.highest_backref >= re_ctx.num_of_captures)
535+
{
536+
ret_value = ecma_raise_syntax_error ("Invalid backreference.\n");
537+
}
538+
else
539+
{
540+
re_append_opcode (&bc_ctx, RE_OP_SAVE_AND_MATCH);
541+
re_append_opcode (&bc_ctx, RE_OP_EOF);
542+
543+
/* 3. Insert extra informations for bytecode header */
544+
re_compiled_code_t re_compiled_code;
545+
546+
re_compiled_code.flags = re_ctx.flags | (1u << ECMA_BYTECODE_REF_SHIFT);
547+
ECMA_SET_NON_NULL_POINTER (re_compiled_code.pattern_cp,
548+
ecma_copy_or_ref_ecma_string (pattern_str_p));
549+
re_compiled_code.num_of_captures = re_ctx.num_of_captures * 2;
550+
re_compiled_code.num_of_non_captures = re_ctx.num_of_non_captures;
551+
552+
re_bytecode_list_insert (&bc_ctx,
553+
0,
554+
(uint8_t *) &re_compiled_code,
555+
sizeof (re_compiled_code_t));
556+
}
557+
ECMA_FINALIZE (empty);
511558

512-
if (!ecma_is_value_empty (ret_value))
513-
{
514-
/* Compilation failed, free bytecode. */
515-
mem_heap_free_block (bc_ctx.block_start_p);
516-
*out_bytecode_p = NULL;
517-
}
518-
else
519-
{
520-
/* The RegExp bytecode contains at least a RE_OP_SAVE_AT_START opdoce, so it cannot be NULL. */
521-
JERRY_ASSERT (bc_ctx.block_start_p != NULL);
522-
*out_bytecode_p = (re_compiled_code_t *) bc_ctx.block_start_p;
559+
MEM_FINALIZE_LOCAL_ARRAY (pattern_start_p);
560+
561+
if (!ecma_is_value_empty (ret_value))
562+
{
563+
/* Compilation failed, free bytecode. */
564+
mem_heap_free_block (bc_ctx.block_start_p);
565+
*out_bytecode_p = NULL;
566+
}
567+
else
568+
{
569+
/* The RegExp bytecode contains at least a RE_OP_SAVE_AT_START opdoce, so it cannot be NULL. */
570+
JERRY_ASSERT (bc_ctx.block_start_p != NULL);
571+
*out_bytecode_p = (re_compiled_code_t *) bc_ctx.block_start_p;
572+
573+
if (cache_idx < RE_CHACHE_SIZE)
574+
{
575+
re_cache[cache_idx] = *out_bytecode_p;
576+
}
577+
else
578+
{
579+
JERRY_DDLOG ("RegExp cache is full! Cannot add new bytecode to it.");
580+
}
581+
}
523582
}
524583

525584
#ifdef JERRY_ENABLE_LOG

0 commit comments

Comments
 (0)