Skip to content

Commit 7891675

Browse files
committed
Few improvements for RegExp
Added eviction mechanism to RegExp cache and small refactoring. Fixed a bug when logging is enabled. Related issue: #927 JerryScript-DCO-1.0-Signed-off-by: László Langó [email protected]
1 parent 81d4c97 commit 7891675

File tree

2 files changed

+86
-74
lines changed

2 files changed

+86
-74
lines changed

jerry-core/parser/regexp/re-compiler.c

Lines changed: 86 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -445,19 +445,20 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
445445
} /* re_parse_alternative */
446446

447447
static const re_compiled_code_t *re_cache[RE_CACHE_SIZE];
448+
static uint16_t re_cache_idx = RE_CACHE_SIZE;
448449

449450
/**
450451
* Search for the given pattern in the RegExp cache
451452
*
452453
* @return compiled bytecode - if found
453454
* NULL - otherwise
454455
*/
455-
const re_compiled_code_t *
456+
static const re_compiled_code_t *
456457
re_find_bytecode_in_cache (ecma_string_t *pattern_str_p, /**< pattern string */
457458
uint16_t flags, /**< flags */
458-
uint32_t *idx) /**< [out] index */
459+
uint16_t *idx) /**< [out] index */
459460
{
460-
uint32_t free_idx = RE_CACHE_SIZE;
461+
uint16_t free_idx = RE_CACHE_SIZE;
461462

462463
for (*idx = 0u; *idx < RE_CACHE_SIZE; (*idx)++)
463464
{
@@ -521,6 +522,17 @@ re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, /**< [out] point
521522
uint16_t flags) /**< flags */
522523
{
523524
ecma_value_t ret_value = ecma_make_simple_value (ECMA_SIMPLE_VALUE_EMPTY);
525+
uint16_t cache_idx;
526+
527+
*out_bytecode_p = re_find_bytecode_in_cache (pattern_str_p, flags, &cache_idx);
528+
529+
if (*out_bytecode_p != NULL)
530+
{
531+
ecma_bytecode_ref ((ecma_compiled_code_t *) *out_bytecode_p);
532+
return ret_value;
533+
}
534+
535+
/* not in the RegExp cache, so compile it */
524536
re_compiler_ctx_t re_ctx;
525537
re_ctx.flags = flags;
526538
re_ctx.highest_backref = 0;
@@ -533,90 +545,93 @@ re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, /**< [out] point
533545

534546
re_ctx.bytecode_ctx_p = &bc_ctx;
535547

536-
uint32_t cache_idx;
537-
*out_bytecode_p = re_find_bytecode_in_cache (pattern_str_p, flags, &cache_idx);
548+
lit_utf8_size_t pattern_str_size = ecma_string_get_size (pattern_str_p);
549+
MEM_DEFINE_LOCAL_ARRAY (pattern_start_p, pattern_str_size, lit_utf8_byte_t);
538550

539-
if (*out_bytecode_p != NULL)
540-
{
541-
ecma_bytecode_ref ((ecma_compiled_code_t *) *out_bytecode_p);
542-
}
543-
else
544-
{ /* not in the RegExp cache, so compile it */
545-
lit_utf8_size_t pattern_str_size = ecma_string_get_size (pattern_str_p);
546-
MEM_DEFINE_LOCAL_ARRAY (pattern_start_p, pattern_str_size, lit_utf8_byte_t);
551+
lit_utf8_size_t sz = ecma_string_to_utf8_string (pattern_str_p, pattern_start_p, pattern_str_size);
552+
JERRY_ASSERT (sz == pattern_str_size);
547553

548-
lit_utf8_size_t sz = ecma_string_to_utf8_string (pattern_str_p, pattern_start_p, pattern_str_size);
549-
JERRY_ASSERT (sz == pattern_str_size);
554+
re_parser_ctx_t parser_ctx;
555+
parser_ctx.input_start_p = pattern_start_p;
556+
parser_ctx.input_curr_p = pattern_start_p;
557+
parser_ctx.input_end_p = pattern_start_p + pattern_str_size;
558+
parser_ctx.num_of_groups = -1;
559+
re_ctx.parser_ctx_p = &parser_ctx;
550560

551-
re_parser_ctx_t parser_ctx;
552-
parser_ctx.input_start_p = pattern_start_p;
553-
parser_ctx.input_curr_p = pattern_start_p;
554-
parser_ctx.input_end_p = pattern_start_p + pattern_str_size;
555-
parser_ctx.num_of_groups = -1;
556-
re_ctx.parser_ctx_p = &parser_ctx;
561+
/* 1. Parse RegExp pattern */
562+
re_ctx.num_of_captures = 1;
563+
re_append_opcode (&bc_ctx, RE_OP_SAVE_AT_START);
557564

558-
/* 1. Parse RegExp pattern */
559-
re_ctx.num_of_captures = 1;
560-
re_append_opcode (&bc_ctx, RE_OP_SAVE_AT_START);
565+
ECMA_TRY_CATCH (empty, re_parse_alternative (&re_ctx, true), ret_value);
561566

562-
ECMA_TRY_CATCH (empty, re_parse_alternative (&re_ctx, true), ret_value);
567+
/* 2. Check for invalid backreference */
568+
if (re_ctx.highest_backref >= re_ctx.num_of_captures)
569+
{
570+
ret_value = ecma_raise_syntax_error ("Invalid backreference.\n");
571+
}
572+
else
573+
{
574+
re_append_opcode (&bc_ctx, RE_OP_SAVE_AND_MATCH);
575+
re_append_opcode (&bc_ctx, RE_OP_EOF);
576+
577+
/* 3. Insert extra informations for bytecode header */
578+
re_compiled_code_t re_compiled_code;
579+
580+
re_compiled_code.flags = re_ctx.flags | (1u << ECMA_BYTECODE_REF_SHIFT);
581+
ECMA_SET_NON_NULL_POINTER (re_compiled_code.pattern_cp,
582+
ecma_copy_or_ref_ecma_string (pattern_str_p));
583+
re_compiled_code.num_of_captures = re_ctx.num_of_captures * 2;
584+
re_compiled_code.num_of_non_captures = re_ctx.num_of_non_captures;
585+
586+
re_bytecode_list_insert (&bc_ctx,
587+
0,
588+
(uint8_t *) &re_compiled_code,
589+
sizeof (re_compiled_code_t));
590+
}
563591

564-
/* 2. Check for invalid backreference */
565-
if (re_ctx.highest_backref >= re_ctx.num_of_captures)
566-
{
567-
ret_value = ecma_raise_syntax_error ("Invalid backreference.\n");
568-
}
569-
else
570-
{
571-
re_append_opcode (&bc_ctx, RE_OP_SAVE_AND_MATCH);
572-
re_append_opcode (&bc_ctx, RE_OP_EOF);
573-
574-
/* 3. Insert extra informations for bytecode header */
575-
re_compiled_code_t re_compiled_code;
576-
577-
re_compiled_code.flags = re_ctx.flags | (1u << ECMA_BYTECODE_REF_SHIFT);
578-
ECMA_SET_NON_NULL_POINTER (re_compiled_code.pattern_cp,
579-
ecma_copy_or_ref_ecma_string (pattern_str_p));
580-
re_compiled_code.num_of_captures = re_ctx.num_of_captures * 2;
581-
re_compiled_code.num_of_non_captures = re_ctx.num_of_non_captures;
582-
583-
re_bytecode_list_insert (&bc_ctx,
584-
0,
585-
(uint8_t *) &re_compiled_code,
586-
sizeof (re_compiled_code_t));
587-
}
592+
ECMA_FINALIZE (empty);
588593

589-
ECMA_FINALIZE (empty);
594+
MEM_FINALIZE_LOCAL_ARRAY (pattern_start_p);
590595

591-
MEM_FINALIZE_LOCAL_ARRAY (pattern_start_p);
596+
if (!ecma_is_value_empty (ret_value))
597+
{
598+
/* Compilation failed, free bytecode. */
599+
JERRY_DDLOG ("RegExp compilation failed!\n");
600+
mem_heap_free_block_size_stored (bc_ctx.block_start_p);
601+
*out_bytecode_p = NULL;
602+
}
603+
else
604+
{
605+
#ifdef JERRY_ENABLE_LOG
606+
re_dump_bytecode (&bc_ctx);
607+
#endif
592608

593-
if (!ecma_is_value_empty (ret_value))
594-
{
595-
/* Compilation failed, free bytecode. */
596-
mem_heap_free_block_size_stored (bc_ctx.block_start_p);
597-
*out_bytecode_p = NULL;
598-
}
599-
else
600-
{
601-
/* The RegExp bytecode contains at least a RE_OP_SAVE_AT_START opdoce, so it cannot be NULL. */
602-
JERRY_ASSERT (bc_ctx.block_start_p != NULL);
603-
*out_bytecode_p = (re_compiled_code_t *) bc_ctx.block_start_p;
609+
/* The RegExp bytecode contains at least a RE_OP_SAVE_AT_START opdoce, so it cannot be NULL. */
610+
JERRY_ASSERT (bc_ctx.block_start_p != NULL);
611+
*out_bytecode_p = (re_compiled_code_t *) bc_ctx.block_start_p;
604612

605-
if (cache_idx < RE_CACHE_SIZE)
613+
if (cache_idx >= RE_CACHE_SIZE)
614+
{
615+
if (re_cache_idx < 1u)
606616
{
607-
ecma_bytecode_ref ((ecma_compiled_code_t *) *out_bytecode_p);
608-
re_cache[cache_idx] = *out_bytecode_p;
617+
re_cache_idx = RE_CACHE_SIZE;
609618
}
610-
else
619+
620+
const re_compiled_code_t *cached_bytecode_p = re_cache[--re_cache_idx];
621+
JERRY_DDLOG ("RegExp cache is full! Remove the element on idx: %d\n", re_cache_idx);
622+
623+
if (cached_bytecode_p != NULL)
611624
{
612-
JERRY_DDLOG ("RegExp cache is full! Cannot add new bytecode to it.");
625+
ecma_bytecode_deref ((re_compiled_code_t *) cached_bytecode_p);
613626
}
627+
628+
cache_idx = re_cache_idx;
614629
}
615-
}
616630

617-
#ifdef JERRY_ENABLE_LOG
618-
re_dump_bytecode (&bc_ctx);
619-
#endif
631+
JERRY_DDLOG ("Insert bytecode into RegExp cache (idx: %d).\n", cache_idx);
632+
ecma_bytecode_ref ((ecma_compiled_code_t *) *out_bytecode_p);
633+
re_cache[cache_idx] = *out_bytecode_p;
634+
}
620635

621636
return ret_value;
622637
} /* re_compile_bytecode */

jerry-core/parser/regexp/re-compiler.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,6 @@ typedef struct
5050
ecma_value_t
5151
re_compile_bytecode (const re_compiled_code_t **, ecma_string_t *, uint16_t);
5252

53-
const re_compiled_code_t *
54-
re_find_bytecode_in_cache (ecma_string_t *pattern_str_p, uint16_t flags, uint32_t *idx);
55-
5653
void re_cache_gc_run ();
5754

5855
/**

0 commit comments

Comments
 (0)