Skip to content

Commit d271448

Browse files
author
Istvan Miklos
committed
Add RegExp recursion depth limit
The regexp engine does not have any recursion depth check, thus it can cause problems with various regexps. Added a new build option `--regexp-recursion-limit N` whose default value is 1000. For unlimited recursion depth use 0. Also added a build-option-test for the unlimited recursion depth. Fixes #2448 JerryScript-DCO-1.0-Signed-off-by: Istvan Miklos [email protected]
1 parent 9ab33e8 commit d271448

File tree

8 files changed

+79
-1
lines changed

8 files changed

+79
-1
lines changed

jerry-core/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ message(STATUS "FEATURE_SYSTEM_ALLOCATOR " ${FEATURE_SYSTEM_ALLOCATOR})
9494
message(STATUS "FEATURE_VALGRIND " ${FEATURE_VALGRIND})
9595
message(STATUS "FEATURE_VM_EXEC_STOP " ${FEATURE_VM_EXEC_STOP})
9696
message(STATUS "MEM_HEAP_SIZE_KB " ${MEM_HEAP_SIZE_KB})
97+
message(STATUS "REGEXP_RECURSION_LIMIT " ${REGEXP_RECURSION_LIMIT})
9798

9899
# Include directories
99100
set(INCLUDE_CORE_PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
@@ -242,6 +243,11 @@ if(FEATURE_REGEXP_STRICT_MODE)
242243
set(DEFINES_JERRY ${DEFINES_JERRY} ENABLE_REGEXP_STRICT_MODE)
243244
endif()
244245

246+
# RegExp recursion depth limit
247+
if(REGEXP_RECURSION_LIMIT)
248+
set(DEFINES_JERRY ${DEFINES_JERRY} REGEXP_RECURSION_LIMIT=${REGEXP_RECURSION_LIMIT})
249+
endif()
250+
245251
# RegExp byte-code dumps
246252
if(FEATURE_REGEXP_DUMP)
247253
set(DEFINES_JERRY ${DEFINES_JERRY} REGEXP_DUMP_BYTE_CODE)

jerry-core/ecma/operations/ecma-regexp-object.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,13 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
364364
const lit_utf8_byte_t *str_p, /**< input string pointer */
365365
const lit_utf8_byte_t **out_str_p) /**< [out] matching substring iterator */
366366
{
367+
#ifdef REGEXP_RECURSION_LIMIT
368+
JERRY_STATIC_ASSERT (REGEXP_RECURSION_LIMIT > 0, regexp_recursion_limit_must_be_greater_than_zero);
369+
if (--re_ctx_p->recursion_depth == 0)
370+
{
371+
return ecma_raise_range_error ("RegExp executor recursion limit is exceeded.");
372+
}
373+
#endif /* REGEXP_RECURSION_LIMIT */
367374
const lit_utf8_byte_t *str_curr_p = str_p;
368375

369376
while (true)
@@ -376,12 +383,14 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
376383
{
377384
JERRY_TRACE_MSG ("Execute RE_OP_MATCH: match\n");
378385
*out_str_p = str_curr_p;
386+
INCREASE_RECURSION_DEPTH_COUNTER;
379387
return ECMA_VALUE_TRUE; /* match */
380388
}
381389
case RE_OP_CHAR:
382390
{
383391
if (str_curr_p >= re_ctx_p->input_end_p)
384392
{
393+
INCREASE_RECURSION_DEPTH_COUNTER;
385394
return ECMA_VALUE_FALSE; /* fail */
386395
}
387396

@@ -393,6 +402,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
393402
if (ch1 != ch2)
394403
{
395404
JERRY_TRACE_MSG ("fail\n");
405+
INCREASE_RECURSION_DEPTH_COUNTER;
396406
return ECMA_VALUE_FALSE; /* fail */
397407
}
398408

@@ -404,6 +414,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
404414
{
405415
if (str_curr_p >= re_ctx_p->input_end_p)
406416
{
417+
INCREASE_RECURSION_DEPTH_COUNTER;
407418
return ECMA_VALUE_FALSE; /* fail */
408419
}
409420

@@ -413,6 +424,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
413424
if (lit_char_is_line_terminator (ch))
414425
{
415426
JERRY_TRACE_MSG ("fail\n");
427+
INCREASE_RECURSION_DEPTH_COUNTER;
416428
return ECMA_VALUE_FALSE; /* fail */
417429
}
418430

@@ -432,6 +444,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
432444
if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
433445
{
434446
JERRY_TRACE_MSG ("fail\n");
447+
INCREASE_RECURSION_DEPTH_COUNTER;
435448
return ECMA_VALUE_FALSE; /* fail */
436449
}
437450

@@ -442,6 +455,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
442455
}
443456

444457
JERRY_TRACE_MSG ("fail\n");
458+
INCREASE_RECURSION_DEPTH_COUNTER;
445459
return ECMA_VALUE_FALSE; /* fail */
446460
}
447461
case RE_OP_ASSERT_END:
@@ -457,6 +471,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
457471
if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
458472
{
459473
JERRY_TRACE_MSG ("fail\n");
474+
INCREASE_RECURSION_DEPTH_COUNTER;
460475
return ECMA_VALUE_FALSE; /* fail */
461476
}
462477

@@ -467,6 +482,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
467482
}
468483

469484
JERRY_TRACE_MSG ("fail\n");
485+
INCREASE_RECURSION_DEPTH_COUNTER;
470486
return ECMA_VALUE_FALSE; /* fail */
471487
}
472488
case RE_OP_ASSERT_WORD_BOUNDARY:
@@ -498,6 +514,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
498514
if (is_wordchar_left == is_wordchar_right)
499515
{
500516
JERRY_TRACE_MSG ("fail\n");
517+
INCREASE_RECURSION_DEPTH_COUNTER;
501518
return ECMA_VALUE_FALSE; /* fail */
502519
}
503520
}
@@ -509,6 +526,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
509526
if (is_wordchar_left != is_wordchar_right)
510527
{
511528
JERRY_TRACE_MSG ("fail\n");
529+
INCREASE_RECURSION_DEPTH_COUNTER;
512530
return ECMA_VALUE_FALSE; /* fail */
513531
}
514532
}
@@ -563,6 +581,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
563581

564582
if (!ECMA_IS_VALUE_ERROR (match_value))
565583
{
584+
INCREASE_RECURSION_DEPTH_COUNTER;
566585
if (ecma_is_value_true (match_value))
567586
{
568587
*out_str_p = sub_str_p;
@@ -588,6 +607,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
588607
if (str_curr_p >= re_ctx_p->input_end_p)
589608
{
590609
JERRY_TRACE_MSG ("fail\n");
610+
INCREASE_RECURSION_DEPTH_COUNTER;
591611
return ECMA_VALUE_FALSE; /* fail */
592612
}
593613

@@ -618,6 +638,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
618638
if (!is_match)
619639
{
620640
JERRY_TRACE_MSG ("fail\n");
641+
INCREASE_RECURSION_DEPTH_COUNTER;
621642
return ECMA_VALUE_FALSE; /* fail */
622643
}
623644
}
@@ -627,6 +648,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
627648
if (is_match)
628649
{
629650
JERRY_TRACE_MSG ("fail\n");
651+
INCREASE_RECURSION_DEPTH_COUNTER;
630652
return ECMA_VALUE_FALSE; /* fail */
631653
}
632654
}
@@ -657,6 +679,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
657679
if (str_curr_p >= re_ctx_p->input_end_p)
658680
{
659681
JERRY_TRACE_MSG ("fail\n");
682+
INCREASE_RECURSION_DEPTH_COUNTER;
660683
return ECMA_VALUE_FALSE; /* fail */
661684
}
662685

@@ -666,6 +689,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
666689
if (ch1 != ch2)
667690
{
668691
JERRY_TRACE_MSG ("fail\n");
692+
INCREASE_RECURSION_DEPTH_COUNTER;
669693
return ECMA_VALUE_FALSE; /* fail */
670694
}
671695
}
@@ -689,6 +713,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
689713
if (ecma_is_value_true (match_value))
690714
{
691715
*out_str_p = sub_str_p;
716+
INCREASE_RECURSION_DEPTH_COUNTER;
692717
return match_value; /* match */
693718
}
694719
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -703,13 +728,15 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
703728
bc_p = old_bc_p;
704729

705730
re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = old_start_p;
731+
INCREASE_RECURSION_DEPTH_COUNTER;
706732
return ECMA_VALUE_FALSE; /* fail */
707733
}
708734
case RE_OP_SAVE_AND_MATCH:
709735
{
710736
JERRY_TRACE_MSG ("End of pattern is reached: match\n");
711737
re_ctx_p->saved_p[RE_GLOBAL_END_IDX] = str_curr_p;
712738
*out_str_p = str_curr_p;
739+
INCREASE_RECURSION_DEPTH_COUNTER;
713740
return ECMA_VALUE_TRUE; /* match */
714741
}
715742
case RE_OP_ALTERNATIVE:
@@ -774,6 +801,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
774801
if (ecma_is_value_true (match_value))
775802
{
776803
*out_str_p = sub_str_p;
804+
INCREASE_RECURSION_DEPTH_COUNTER;
777805
return match_value; /* match */
778806
}
779807
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -832,6 +860,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
832860
if (ecma_is_value_true (match_value))
833861
{
834862
*out_str_p = sub_str_p;
863+
INCREASE_RECURSION_DEPTH_COUNTER;
835864
return match_value; /* match */
836865
}
837866
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -856,6 +885,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
856885
if (ecma_is_value_true (match_value))
857886
{
858887
*out_str_p = sub_str_p;
888+
INCREASE_RECURSION_DEPTH_COUNTER;
859889
return match_value; /* match */
860890
}
861891
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -865,6 +895,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
865895
}
866896

867897
re_ctx_p->saved_p[start_idx] = old_start_p;
898+
INCREASE_RECURSION_DEPTH_COUNTER;
868899
return ECMA_VALUE_FALSE; /* fail */
869900
}
870901
case RE_OP_CAPTURE_NON_GREEDY_GROUP_END:
@@ -910,6 +941,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
910941
if (ecma_is_value_true (match_value))
911942
{
912943
*out_str_p = sub_str_p;
944+
INCREASE_RECURSION_DEPTH_COUNTER;
913945
return match_value; /* match */
914946
}
915947
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -958,6 +990,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
958990
if (re_ctx_p->num_of_iterations_p[iter_idx] >= min
959991
&& str_curr_p== re_ctx_p->saved_p[start_idx])
960992
{
993+
INCREASE_RECURSION_DEPTH_COUNTER;
961994
return ECMA_VALUE_FALSE; /* fail */
962995
}
963996

@@ -979,6 +1012,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
9791012
if (ecma_is_value_true (match_value))
9801013
{
9811014
*out_str_p = sub_str_p;
1015+
INCREASE_RECURSION_DEPTH_COUNTER;
9821016
return match_value; /* match */
9831017
}
9841018
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1003,6 +1037,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10031037
if (ecma_is_value_true (match_value))
10041038
{
10051039
*out_str_p = sub_str_p;
1040+
INCREASE_RECURSION_DEPTH_COUNTER;
10061041
return match_value; /* match */
10071042
}
10081043
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1024,6 +1059,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10241059
if (ecma_is_value_true (match_value))
10251060
{
10261061
*out_str_p = sub_str_p;
1062+
INCREASE_RECURSION_DEPTH_COUNTER;
10271063
return match_value; /* match */
10281064
}
10291065
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1035,6 +1071,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10351071
/* restore if fails */
10361072
re_ctx_p->saved_p[end_idx] = old_end_p;
10371073
re_ctx_p->num_of_iterations_p[iter_idx]--;
1074+
INCREASE_RECURSION_DEPTH_COUNTER;
10381075
return ECMA_VALUE_FALSE; /* fail */
10391076
}
10401077
case RE_OP_NON_GREEDY_ITERATOR:
@@ -1059,6 +1096,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10591096
if (ecma_is_value_true (match_value))
10601097
{
10611098
*out_str_p = sub_str_p;
1099+
INCREASE_RECURSION_DEPTH_COUNTER;
10621100
return match_value; /* match */
10631101
}
10641102
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1082,6 +1120,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10821120
str_curr_p = sub_str_p;
10831121
num_of_iter++;
10841122
}
1123+
INCREASE_RECURSION_DEPTH_COUNTER;
10851124
return ECMA_VALUE_FALSE; /* fail */
10861125
}
10871126
default:
@@ -1125,6 +1164,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
11251164
if (ecma_is_value_true (match_value))
11261165
{
11271166
*out_str_p = sub_str_p;
1167+
INCREASE_RECURSION_DEPTH_COUNTER;
11281168
return match_value; /* match */
11291169
}
11301170
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1140,6 +1180,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
11401180
lit_utf8_read_prev (&str_curr_p);
11411181
num_of_iter--;
11421182
}
1183+
INCREASE_RECURSION_DEPTH_COUNTER;
11431184
return ECMA_VALUE_FALSE; /* fail */
11441185
}
11451186
}
@@ -1232,6 +1273,9 @@ ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */
12321273
re_ctx.input_start_p = input_curr_p;
12331274
const lit_utf8_byte_t *input_end_p = re_ctx.input_start_p + input_buffer_size;
12341275
re_ctx.input_end_p = input_end_p;
1276+
#ifdef REGEXP_RECURSION_LIMIT
1277+
re_ctx.recursion_depth = REGEXP_RECURSION_LIMIT;
1278+
#endif /* REGEXP_RECURSION_LIMIT */
12351279

12361280
/* 1. Read bytecode header and init regexp matcher context. */
12371281
re_ctx.flags = bc_p->header.status_flags;

jerry-core/ecma/operations/ecma-regexp-object.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@
1818

1919
#ifndef CONFIG_DISABLE_REGEXP_BUILTIN
2020

21+
#ifdef REGEXP_RECURSION_LIMIT
22+
#define INCREASE_RECURSION_DEPTH_COUNTER (++re_ctx_p->recursion_depth)
23+
#else
24+
#define INCREASE_RECURSION_DEPTH_COUNTER
25+
#endif /* REGEXP_RECURSION_LIMIT */
26+
2127
#include "ecma-globals.h"
2228
#include "re-compiler.h"
2329

@@ -46,6 +52,9 @@ typedef struct
4652
const lit_utf8_byte_t **saved_p; /**< saved result string pointers, ECMA 262 v5, 15.10.2.1, State */
4753
const lit_utf8_byte_t *input_start_p; /**< start of input pattern string */
4854
const lit_utf8_byte_t *input_end_p; /**< end of input pattern string */
55+
#ifdef REGEXP_RECURSION_LIMIT
56+
uint32_t recursion_depth; /**< recursion depth limit */
57+
#endif /* REGEXP_RECURSION_LIMIT */
4958
uint32_t num_of_captures; /**< number of capture groups */
5059
uint32_t num_of_non_captures; /**< number of non-capture groups */
5160
uint32_t *num_of_iterations_p; /**< number of iterations */

jerry-core/parser/regexp/re-compiler.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,12 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
249249
uint32_t idx;
250250
re_bytecode_ctx_t *bc_ctx_p = re_ctx_p->bytecode_ctx_p;
251251
ecma_value_t ret_value = ECMA_VALUE_EMPTY;
252+
#ifdef REGEXP_RECURSION_LIMIT
253+
if (--re_ctx_p->recursion_depth == 0)
254+
{
255+
return ecma_raise_range_error ("RegExp executor recursion limit is exceeded.");
256+
}
257+
#endif /* REGEXP_RECURSION_LIMIT */
252258

253259
uint32_t alterantive_offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
254260
bool should_loop = true;
@@ -440,6 +446,7 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
440446
else
441447
{
442448
re_insert_u32 (bc_ctx_p, alterantive_offset, re_get_bytecode_length (bc_ctx_p) - alterantive_offset);
449+
INCREASE_RECURSION_DEPTH_COUNTER;
443450
should_loop = false;
444451
}
445452
break;
@@ -453,6 +460,7 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
453460
else
454461
{
455462
re_insert_u32 (bc_ctx_p, alterantive_offset, re_get_bytecode_length (bc_ctx_p) - alterantive_offset);
463+
INCREASE_RECURSION_DEPTH_COUNTER;
456464
should_loop = false;
457465
}
458466

@@ -559,7 +567,9 @@ re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, /**< [out] point
559567
re_ctx.flags = flags;
560568
re_ctx.highest_backref = 0;
561569
re_ctx.num_of_non_captures = 0;
562-
570+
#ifdef REGEXP_RECURSION_LIMIT
571+
re_ctx.recursion_depth = REGEXP_RECURSION_LIMIT;
572+
#endif /* REGEXP_RECURSION_LIMIT */
563573
re_bytecode_ctx_t bc_ctx;
564574
bc_ctx.block_start_p = NULL;
565575
bc_ctx.block_end_p = NULL;

jerry-core/parser/regexp/re-compiler.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ typedef struct
4141
uint32_t num_of_captures; /**< number of capture groups */
4242
uint32_t num_of_non_captures; /**< number of non-capture groups */
4343
uint32_t highest_backref; /**< highest backreference */
44+
#ifdef REGEXP_RECURSION_LIMIT
45+
uint32_t recursion_depth; /**< recursion depth limit */
46+
#endif /* REGEXP_RECURSION_LIMIT */
4447
re_bytecode_ctx_t *bytecode_ctx_p; /**< pointer of RegExp bytecode context */
4548
re_token_t current_token; /**< current token */
4649
re_parser_ctx_t *parser_ctx_p; /**< pointer of RegExp parser context */

0 commit comments

Comments
 (0)