Skip to content

Add RegExp recursion depth limit #2543

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions jerry-core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ set(FEATURE_SYSTEM_ALLOCATOR OFF CACHE BOOL "Enable system allocator?")
set(FEATURE_VALGRIND OFF CACHE BOOL "Enable Valgrind support?")
set(FEATURE_VM_EXEC_STOP OFF CACHE BOOL "Enable VM execution stopping?")
set(MEM_HEAP_SIZE_KB "512" CACHE STRING "Size of memory heap, in kilobytes")
set(REGEXP_RECURSION_LIMIT "0" CACHE STRING "Limit of regexp recursion depth")

# Option overrides
if(USING_MSVC)
Expand Down Expand Up @@ -94,6 +95,7 @@ message(STATUS "FEATURE_SYSTEM_ALLOCATOR " ${FEATURE_SYSTEM_ALLOCATOR})
message(STATUS "FEATURE_VALGRIND " ${FEATURE_VALGRIND})
message(STATUS "FEATURE_VM_EXEC_STOP " ${FEATURE_VM_EXEC_STOP})
message(STATUS "MEM_HEAP_SIZE_KB " ${MEM_HEAP_SIZE_KB})
message(STATUS "REGEXP_RECURSION_LIMIT " ${REGEXP_RECURSION_LIMIT})

# Include directories
set(INCLUDE_CORE_PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
Expand Down Expand Up @@ -228,6 +230,11 @@ if(FEATURE_REGEXP_STRICT_MODE)
set(DEFINES_JERRY ${DEFINES_JERRY} ENABLE_REGEXP_STRICT_MODE)
endif()

# RegExp recursion depth limit
if(REGEXP_RECURSION_LIMIT)
set(DEFINES_JERRY ${DEFINES_JERRY} REGEXP_RECURSION_LIMIT=${REGEXP_RECURSION_LIMIT})
endif()

# RegExp byte-code dumps
if(FEATURE_REGEXP_DUMP)
set(DEFINES_JERRY ${DEFINES_JERRY} REGEXP_DUMP_BYTE_CODE)
Expand Down
43 changes: 43 additions & 0 deletions jerry-core/ecma/operations/ecma-regexp-object.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@
*/
#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0)

/*
* Check RegExp recursion depth limit
*/
#ifdef REGEXP_RECURSION_LIMIT
JERRY_STATIC_ASSERT (REGEXP_RECURSION_LIMIT > 0, regexp_recursion_limit_must_be_greater_than_zero);
#endif /* REGEXP_RECURSION_LIMIT */

/**
* Parse RegExp flags (global, ignoreCase, multiline)
*
Expand Down Expand Up @@ -344,6 +351,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
const lit_utf8_byte_t *str_p, /**< input string pointer */
const lit_utf8_byte_t **out_str_p) /**< [out] matching substring iterator */
{
REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST ();
const lit_utf8_byte_t *str_curr_p = str_p;

while (true)
Expand All @@ -356,12 +364,14 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
{
JERRY_TRACE_MSG ("Execute RE_OP_MATCH: match\n");
*out_str_p = str_curr_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_TRUE; /* match */
}
case RE_OP_CHAR:
{
if (str_curr_p >= re_ctx_p->input_end_p)
{
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}

Expand All @@ -373,6 +383,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ch1 != ch2)
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}

Expand All @@ -384,6 +395,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
{
if (str_curr_p >= re_ctx_p->input_end_p)
{
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}

Expand All @@ -393,6 +405,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (lit_char_is_line_terminator (ch))
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}

Expand All @@ -412,6 +425,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}

Expand All @@ -422,6 +436,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
}

JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
case RE_OP_ASSERT_END:
Expand All @@ -437,6 +452,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}

Expand All @@ -447,6 +463,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
}

JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
case RE_OP_ASSERT_WORD_BOUNDARY:
Expand Down Expand Up @@ -478,6 +495,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (is_wordchar_left == is_wordchar_right)
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
}
Expand All @@ -489,6 +507,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (is_wordchar_left != is_wordchar_right)
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
}
Expand Down Expand Up @@ -556,6 +575,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
}

JMEM_FINALIZE_LOCAL_ARRAY (saved_bck_p);
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value;
}
case RE_OP_CHAR_CLASS:
Expand All @@ -568,6 +588,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (str_curr_p >= re_ctx_p->input_end_p)
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}

Expand Down Expand Up @@ -598,6 +619,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (!is_match)
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
}
Expand All @@ -607,6 +629,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (is_match)
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
}
Expand Down Expand Up @@ -637,6 +660,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (str_curr_p >= re_ctx_p->input_end_p)
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}

Expand All @@ -646,6 +670,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ch1 != ch2)
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
}
Expand All @@ -669,6 +694,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
Expand All @@ -683,13 +709,15 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
bc_p = old_bc_p;

re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = old_start_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
case RE_OP_SAVE_AND_MATCH:
{
JERRY_TRACE_MSG ("End of pattern is reached: match\n");
re_ctx_p->saved_p[RE_GLOBAL_END_IDX] = str_curr_p;
*out_str_p = str_curr_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_TRUE; /* match */
}
case RE_OP_ALTERNATIVE:
Expand Down Expand Up @@ -754,6 +782,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
Expand Down Expand Up @@ -812,6 +841,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
Expand All @@ -836,6 +866,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
Expand All @@ -845,6 +876,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
}

re_ctx_p->saved_p[start_idx] = old_start_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
case RE_OP_CAPTURE_NON_GREEDY_GROUP_END:
Expand Down Expand Up @@ -890,6 +922,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
Expand Down Expand Up @@ -938,6 +971,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (re_ctx_p->num_of_iterations_p[iter_idx] >= min
&& str_curr_p== re_ctx_p->saved_p[start_idx])
{
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}

Expand All @@ -959,6 +993,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
Expand All @@ -983,6 +1018,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
Expand All @@ -1004,6 +1040,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
Expand All @@ -1015,6 +1052,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
/* restore if fails */
re_ctx_p->saved_p[end_idx] = old_end_p;
re_ctx_p->num_of_iterations_p[iter_idx]--;
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
case RE_OP_NON_GREEDY_ITERATOR:
Expand All @@ -1039,6 +1077,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
Expand All @@ -1062,6 +1101,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
str_curr_p = sub_str_p;
num_of_iter++;
}
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
default:
Expand Down Expand Up @@ -1105,6 +1145,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
Expand All @@ -1120,6 +1161,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
lit_utf8_read_prev (&str_curr_p);
num_of_iter--;
}
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
}
Expand Down Expand Up @@ -1208,6 +1250,7 @@ ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */
re_ctx.input_start_p = input_curr_p;
const lit_utf8_byte_t *input_end_p = re_ctx.input_start_p + input_buffer_size;
re_ctx.input_end_p = input_end_p;
REGEXP_RECURSION_COUNTER_INIT ();

/* 1. Read bytecode header and init regexp matcher context. */
re_ctx.flags = bc_p->header.status_flags;
Expand Down
43 changes: 43 additions & 0 deletions jerry-core/ecma/operations/ecma-regexp-object.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,46 @@
* @{
*/

#ifdef REGEXP_RECURSION_LIMIT
/**
* Decrease the recursion counter and test it.
* If the counter reaches the limit of the recursion depth
* it will return with a range error.
*/
#define REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST() \
do \
{ \
if (--re_ctx_p->recursion_counter == 0) \
{ \
return ecma_raise_range_error (ECMA_ERR_MSG ("RegExp recursion limit is exceeded.")); \
} \
} \
while (0)
/**
* Increase the recursion counter.
*/
#define REGEXP_RECURSION_COUNTER_INCREASE() (++re_ctx_p->recursion_counter)
/**
* Set the recursion counter to the max depth of the recursion.
*/
#define REGEXP_RECURSION_COUNTER_INIT() (re_ctx.recursion_counter = REGEXP_RECURSION_LIMIT)
#else /* !REGEXP_RECURSION_LIMIT */
/**
* Decrease the recursion counter and test it.
* If the counter reaches the limit of the recursion depth
* it will return with a range error.
*/
#define REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST()
/**
* Increase the recursion counter.
*/
#define REGEXP_RECURSION_COUNTER_INCREASE()
/**
* Set the recursion counter to the max depth of the recursion.
*/
#define REGEXP_RECURSION_COUNTER_INIT()
#endif /* REGEXP_RECURSION_LIMIT */

/**
* RegExp flags
* Note:
Expand All @@ -48,6 +88,9 @@ typedef struct
const lit_utf8_byte_t **saved_p; /**< saved result string pointers, ECMA 262 v5, 15.10.2.1, State */
const lit_utf8_byte_t *input_start_p; /**< start of input pattern string */
const lit_utf8_byte_t *input_end_p; /**< end of input pattern string */
#ifdef REGEXP_RECURSION_LIMIT
uint32_t recursion_counter; /**< RegExp recursion counter */
#endif /* REGEXP_RECURSION_LIMIT */
uint32_t num_of_captures; /**< number of capture groups */
uint32_t num_of_non_captures; /**< number of non-capture groups */
uint32_t *num_of_iterations_p; /**< number of iterations */
Expand Down
Loading