Skip to content

Commit f58c816

Browse files
committed
Few improvements for RegExp
Added eviction mechanism to RegExp cache and small refactoring. Fixed a bug when logging is enabled. Related issue: #927 JerryScript-DCO-1.0-Signed-off-by: László Langó [email protected]
1 parent ab26d57 commit f58c816

File tree

2 files changed

+96
-83
lines changed

2 files changed

+96
-83
lines changed

jerry-core/parser/regexp/re-compiler.c

Lines changed: 96 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -445,23 +445,23 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
445445
} /* re_parse_alternative */
446446

447447
static const re_compiled_code_t *re_cache[RE_CACHE_SIZE];
448+
static uint16_t re_cache_idx = RE_CACHE_SIZE;
448449

449450
/**
450451
* Search for the given pattern in the RegExp cache
451452
*
452-
* @return compiled bytecode - if found
453-
* NULL - otherwise
453+
* @return index of bytecode in cache - if found
454+
* RE_CACHE_SIZE - otherwise
454455
*/
455-
const re_compiled_code_t *
456+
static uint16_t
456457
re_find_bytecode_in_cache (ecma_string_t *pattern_str_p, /**< pattern string */
457-
uint16_t flags, /**< flags */
458-
uint32_t *idx) /**< [out] index */
458+
uint16_t flags) /**< flags */
459459
{
460-
uint32_t free_idx = RE_CACHE_SIZE;
460+
uint16_t free_idx = RE_CACHE_SIZE;
461461

462-
for (*idx = 0u; *idx < RE_CACHE_SIZE; (*idx)++)
462+
for (uint16_t idx = 0u; idx < RE_CACHE_SIZE; idx++)
463463
{
464-
const re_compiled_code_t *cached_bytecode_p = re_cache[*idx];
464+
const re_compiled_code_t *cached_bytecode_p = re_cache[idx];
465465

466466
if (cached_bytecode_p != NULL)
467467
{
@@ -472,19 +472,18 @@ re_find_bytecode_in_cache (ecma_string_t *pattern_str_p, /**< pattern string */
472472
&& ecma_compare_ecma_strings (cached_pattern_str_p, pattern_str_p))
473473
{
474474
JERRY_DDLOG ("RegExp is found in cache\n");
475-
return re_cache[*idx];
475+
return idx;
476476
}
477477
}
478478
else
479479
{
480480
/* mark as free, so it can be overridden if the cache is full */
481-
free_idx = *idx;
481+
free_idx = idx;
482482
}
483483
}
484484

485485
JERRY_DDLOG ("RegExp is NOT found in cache\n");
486-
*idx = free_idx;
487-
return NULL;
486+
return free_idx;
488487
} /* re_find_bytecode_in_cache */
489488

490489
/**
@@ -521,6 +520,20 @@ re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, /**< [out] point
521520
uint16_t flags) /**< flags */
522521
{
523522
ecma_value_t ret_value = ecma_make_simple_value (ECMA_SIMPLE_VALUE_EMPTY);
523+
uint16_t cache_idx = re_find_bytecode_in_cache (pattern_str_p, flags);
524+
525+
if (cache_idx < RE_CACHE_SIZE)
526+
{
527+
*out_bytecode_p = re_cache[cache_idx];
528+
529+
if (*out_bytecode_p != NULL)
530+
{
531+
ecma_bytecode_ref ((ecma_compiled_code_t *) *out_bytecode_p);
532+
return ret_value;
533+
}
534+
}
535+
536+
/* not in the RegExp cache, so compile it */
524537
re_compiler_ctx_t re_ctx;
525538
re_ctx.flags = flags;
526539
re_ctx.highest_backref = 0;
@@ -533,90 +546,93 @@ re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, /**< [out] point
533546

534547
re_ctx.bytecode_ctx_p = &bc_ctx;
535548

536-
uint32_t cache_idx;
537-
*out_bytecode_p = re_find_bytecode_in_cache (pattern_str_p, flags, &cache_idx);
549+
lit_utf8_size_t pattern_str_size = ecma_string_get_size (pattern_str_p);
550+
MEM_DEFINE_LOCAL_ARRAY (pattern_start_p, pattern_str_size, lit_utf8_byte_t);
538551

539-
if (*out_bytecode_p != NULL)
540-
{
541-
ecma_bytecode_ref ((ecma_compiled_code_t *) *out_bytecode_p);
542-
}
543-
else
544-
{ /* not in the RegExp cache, so compile it */
545-
lit_utf8_size_t pattern_str_size = ecma_string_get_size (pattern_str_p);
546-
MEM_DEFINE_LOCAL_ARRAY (pattern_start_p, pattern_str_size, lit_utf8_byte_t);
552+
lit_utf8_size_t sz = ecma_string_to_utf8_string (pattern_str_p, pattern_start_p, pattern_str_size);
553+
JERRY_ASSERT (sz == pattern_str_size);
547554

548-
lit_utf8_size_t sz = ecma_string_to_utf8_string (pattern_str_p, pattern_start_p, pattern_str_size);
549-
JERRY_ASSERT (sz == pattern_str_size);
555+
re_parser_ctx_t parser_ctx;
556+
parser_ctx.input_start_p = pattern_start_p;
557+
parser_ctx.input_curr_p = pattern_start_p;
558+
parser_ctx.input_end_p = pattern_start_p + pattern_str_size;
559+
parser_ctx.num_of_groups = -1;
560+
re_ctx.parser_ctx_p = &parser_ctx;
550561

551-
re_parser_ctx_t parser_ctx;
552-
parser_ctx.input_start_p = pattern_start_p;
553-
parser_ctx.input_curr_p = pattern_start_p;
554-
parser_ctx.input_end_p = pattern_start_p + pattern_str_size;
555-
parser_ctx.num_of_groups = -1;
556-
re_ctx.parser_ctx_p = &parser_ctx;
562+
/* 1. Parse RegExp pattern */
563+
re_ctx.num_of_captures = 1;
564+
re_append_opcode (&bc_ctx, RE_OP_SAVE_AT_START);
557565

558-
/* 1. Parse RegExp pattern */
559-
re_ctx.num_of_captures = 1;
560-
re_append_opcode (&bc_ctx, RE_OP_SAVE_AT_START);
566+
ECMA_TRY_CATCH (empty, re_parse_alternative (&re_ctx, true), ret_value);
561567

562-
ECMA_TRY_CATCH (empty, re_parse_alternative (&re_ctx, true), ret_value);
568+
/* 2. Check for invalid backreference */
569+
if (re_ctx.highest_backref >= re_ctx.num_of_captures)
570+
{
571+
ret_value = ecma_raise_syntax_error ("Invalid backreference.\n");
572+
}
573+
else
574+
{
575+
re_append_opcode (&bc_ctx, RE_OP_SAVE_AND_MATCH);
576+
re_append_opcode (&bc_ctx, RE_OP_EOF);
577+
578+
/* 3. Insert extra informations for bytecode header */
579+
re_compiled_code_t re_compiled_code;
580+
581+
re_compiled_code.flags = re_ctx.flags | (1u << ECMA_BYTECODE_REF_SHIFT);
582+
ECMA_SET_NON_NULL_POINTER (re_compiled_code.pattern_cp,
583+
ecma_copy_or_ref_ecma_string (pattern_str_p));
584+
re_compiled_code.num_of_captures = re_ctx.num_of_captures * 2;
585+
re_compiled_code.num_of_non_captures = re_ctx.num_of_non_captures;
586+
587+
re_bytecode_list_insert (&bc_ctx,
588+
0,
589+
(uint8_t *) &re_compiled_code,
590+
sizeof (re_compiled_code_t));
591+
}
563592

564-
/* 2. Check for invalid backreference */
565-
if (re_ctx.highest_backref >= re_ctx.num_of_captures)
566-
{
567-
ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid backreference.\n"));
568-
}
569-
else
570-
{
571-
re_append_opcode (&bc_ctx, RE_OP_SAVE_AND_MATCH);
572-
re_append_opcode (&bc_ctx, RE_OP_EOF);
573-
574-
/* 3. Insert extra informations for bytecode header */
575-
re_compiled_code_t re_compiled_code;
576-
577-
re_compiled_code.flags = re_ctx.flags | (1u << ECMA_BYTECODE_REF_SHIFT);
578-
ECMA_SET_NON_NULL_POINTER (re_compiled_code.pattern_cp,
579-
ecma_copy_or_ref_ecma_string (pattern_str_p));
580-
re_compiled_code.num_of_captures = re_ctx.num_of_captures * 2;
581-
re_compiled_code.num_of_non_captures = re_ctx.num_of_non_captures;
582-
583-
re_bytecode_list_insert (&bc_ctx,
584-
0,
585-
(uint8_t *) &re_compiled_code,
586-
sizeof (re_compiled_code_t));
587-
}
593+
ECMA_FINALIZE (empty);
588594

589-
ECMA_FINALIZE (empty);
595+
MEM_FINALIZE_LOCAL_ARRAY (pattern_start_p);
590596

591-
MEM_FINALIZE_LOCAL_ARRAY (pattern_start_p);
597+
if (!ecma_is_value_empty (ret_value))
598+
{
599+
/* Compilation failed, free bytecode. */
600+
JERRY_DDLOG ("RegExp compilation failed!\n");
601+
mem_heap_free_block_size_stored (bc_ctx.block_start_p);
602+
*out_bytecode_p = NULL;
603+
}
604+
else
605+
{
606+
#ifdef JERRY_ENABLE_LOG
607+
re_dump_bytecode (&bc_ctx);
608+
#endif
592609

593-
if (!ecma_is_value_empty (ret_value))
594-
{
595-
/* Compilation failed, free bytecode. */
596-
mem_heap_free_block_size_stored (bc_ctx.block_start_p);
597-
*out_bytecode_p = NULL;
598-
}
599-
else
600-
{
601-
/* The RegExp bytecode contains at least a RE_OP_SAVE_AT_START opdoce, so it cannot be NULL. */
602-
JERRY_ASSERT (bc_ctx.block_start_p != NULL);
603-
*out_bytecode_p = (re_compiled_code_t *) bc_ctx.block_start_p;
610+
/* The RegExp bytecode contains at least a RE_OP_SAVE_AT_START opdoce, so it cannot be NULL. */
611+
JERRY_ASSERT (bc_ctx.block_start_p != NULL);
612+
*out_bytecode_p = (re_compiled_code_t *) bc_ctx.block_start_p;
604613

605-
if (cache_idx < RE_CACHE_SIZE)
614+
if (cache_idx == RE_CACHE_SIZE)
615+
{
616+
if (re_cache_idx < 1u)
606617
{
607-
ecma_bytecode_ref ((ecma_compiled_code_t *) *out_bytecode_p);
608-
re_cache[cache_idx] = *out_bytecode_p;
618+
re_cache_idx = RE_CACHE_SIZE;
609619
}
610-
else
620+
621+
const re_compiled_code_t *cached_bytecode_p = re_cache[--re_cache_idx];
622+
JERRY_DDLOG ("RegExp cache is full! Remove the element on idx: %d\n", re_cache_idx);
623+
624+
if (cached_bytecode_p != NULL)
611625
{
612-
JERRY_DDLOG ("RegExp cache is full! Cannot add new bytecode to it.");
626+
ecma_bytecode_deref ((ecma_compiled_code_t *) cached_bytecode_p);
613627
}
628+
629+
cache_idx = re_cache_idx;
614630
}
615-
}
616631

617-
#ifdef JERRY_ENABLE_LOG
618-
re_dump_bytecode (&bc_ctx);
619-
#endif
632+
JERRY_DDLOG ("Insert bytecode into RegExp cache (idx: %d).\n", cache_idx);
633+
ecma_bytecode_ref ((ecma_compiled_code_t *) *out_bytecode_p);
634+
re_cache[cache_idx] = *out_bytecode_p;
635+
}
620636

621637
return ret_value;
622638
} /* re_compile_bytecode */

jerry-core/parser/regexp/re-compiler.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,6 @@ typedef struct
5050
ecma_value_t
5151
re_compile_bytecode (const re_compiled_code_t **, ecma_string_t *, uint16_t);
5252

53-
const re_compiled_code_t *
54-
re_find_bytecode_in_cache (ecma_string_t *pattern_str_p, uint16_t flags, uint32_t *idx);
55-
5653
void re_cache_gc_run ();
5754

5855
/**

0 commit comments

Comments
 (0)