Skip to content

Commit 559750e

Browse files
author
Istvan Miklos
committed
Add RegExp recursion depth limit
The regexp engine does not have any recursion depth check, thus it can cause problems with various regexps. Added a new build option `--regexp-recursion-limit N` whose default value is 1000. For unlimited recursion depth use 0. Also added a build-option-test for the unlimited recursion depth. Fixes #2448 JerryScript-DCO-1.0-Signed-off-by: Istvan Miklos [email protected]
1 parent 9ab33e8 commit 559750e

File tree

8 files changed

+85
-1
lines changed

8 files changed

+85
-1
lines changed

jerry-core/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ message(STATUS "FEATURE_SYSTEM_ALLOCATOR " ${FEATURE_SYSTEM_ALLOCATOR})
9494
message(STATUS "FEATURE_VALGRIND " ${FEATURE_VALGRIND})
9595
message(STATUS "FEATURE_VM_EXEC_STOP " ${FEATURE_VM_EXEC_STOP})
9696
message(STATUS "MEM_HEAP_SIZE_KB " ${MEM_HEAP_SIZE_KB})
97+
message(STATUS "REGEXP_RECURSION_LIMIT " ${REGEXP_RECURSION_LIMIT})
9798

9899
# Include directories
99100
set(INCLUDE_CORE_PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
@@ -242,6 +243,11 @@ if(FEATURE_REGEXP_STRICT_MODE)
242243
set(DEFINES_JERRY ${DEFINES_JERRY} ENABLE_REGEXP_STRICT_MODE)
243244
endif()
244245

246+
# RegExp recursion depth limit
247+
if(REGEXP_RECURSION_LIMIT)
248+
set(DEFINES_JERRY ${DEFINES_JERRY} REGEXP_RECURSION_LIMIT=${REGEXP_RECURSION_LIMIT})
249+
endif()
250+
245251
# RegExp byte-code dumps
246252
if(FEATURE_REGEXP_DUMP)
247253
set(DEFINES_JERRY ${DEFINES_JERRY} REGEXP_DUMP_BYTE_CODE)

jerry-core/ecma/operations/ecma-regexp-object.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@
6363
*/
6464
#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0)
6565

66+
/**
67+
* Check RegExp recursion depth limit
68+
*/
69+
#ifdef REGEXP_RECURSION_LIMIT
70+
JERRY_STATIC_ASSERT (REGEXP_RECURSION_LIMIT > 0, regexp_recursion_limit_must_be_greater_than_zero);
71+
#endif /* REGEXP_RECURSION_LIMIT */
72+
6673
/**
6774
* Parse RegExp flags (global, ignoreCase, multiline)
6875
*
@@ -364,6 +371,12 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
364371
const lit_utf8_byte_t *str_p, /**< input string pointer */
365372
const lit_utf8_byte_t **out_str_p) /**< [out] matching substring iterator */
366373
{
374+
#ifdef REGEXP_RECURSION_LIMIT
375+
if (--re_ctx_p->recursion_counter == 0)
376+
{
377+
return ecma_raise_range_error ("RegExp executor recursion limit is exceeded.");
378+
}
379+
#endif /* REGEXP_RECURSION_LIMIT */
367380
const lit_utf8_byte_t *str_curr_p = str_p;
368381

369382
while (true)
@@ -376,12 +389,14 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
376389
{
377390
JERRY_TRACE_MSG ("Execute RE_OP_MATCH: match\n");
378391
*out_str_p = str_curr_p;
392+
REGEXP_INCREASE_RECURSION_COUNTER ();
379393
return ECMA_VALUE_TRUE; /* match */
380394
}
381395
case RE_OP_CHAR:
382396
{
383397
if (str_curr_p >= re_ctx_p->input_end_p)
384398
{
399+
REGEXP_INCREASE_RECURSION_COUNTER ();
385400
return ECMA_VALUE_FALSE; /* fail */
386401
}
387402

@@ -393,6 +408,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
393408
if (ch1 != ch2)
394409
{
395410
JERRY_TRACE_MSG ("fail\n");
411+
REGEXP_INCREASE_RECURSION_COUNTER ();
396412
return ECMA_VALUE_FALSE; /* fail */
397413
}
398414

@@ -404,6 +420,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
404420
{
405421
if (str_curr_p >= re_ctx_p->input_end_p)
406422
{
423+
REGEXP_INCREASE_RECURSION_COUNTER ();
407424
return ECMA_VALUE_FALSE; /* fail */
408425
}
409426

@@ -413,6 +430,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
413430
if (lit_char_is_line_terminator (ch))
414431
{
415432
JERRY_TRACE_MSG ("fail\n");
433+
REGEXP_INCREASE_RECURSION_COUNTER ();
416434
return ECMA_VALUE_FALSE; /* fail */
417435
}
418436

@@ -432,6 +450,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
432450
if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
433451
{
434452
JERRY_TRACE_MSG ("fail\n");
453+
REGEXP_INCREASE_RECURSION_COUNTER ();
435454
return ECMA_VALUE_FALSE; /* fail */
436455
}
437456

@@ -442,6 +461,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
442461
}
443462

444463
JERRY_TRACE_MSG ("fail\n");
464+
REGEXP_INCREASE_RECURSION_COUNTER ();
445465
return ECMA_VALUE_FALSE; /* fail */
446466
}
447467
case RE_OP_ASSERT_END:
@@ -457,6 +477,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
457477
if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
458478
{
459479
JERRY_TRACE_MSG ("fail\n");
480+
REGEXP_INCREASE_RECURSION_COUNTER ();
460481
return ECMA_VALUE_FALSE; /* fail */
461482
}
462483

@@ -467,6 +488,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
467488
}
468489

469490
JERRY_TRACE_MSG ("fail\n");
491+
REGEXP_INCREASE_RECURSION_COUNTER ();
470492
return ECMA_VALUE_FALSE; /* fail */
471493
}
472494
case RE_OP_ASSERT_WORD_BOUNDARY:
@@ -498,6 +520,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
498520
if (is_wordchar_left == is_wordchar_right)
499521
{
500522
JERRY_TRACE_MSG ("fail\n");
523+
REGEXP_INCREASE_RECURSION_COUNTER ();
501524
return ECMA_VALUE_FALSE; /* fail */
502525
}
503526
}
@@ -509,6 +532,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
509532
if (is_wordchar_left != is_wordchar_right)
510533
{
511534
JERRY_TRACE_MSG ("fail\n");
535+
REGEXP_INCREASE_RECURSION_COUNTER ();
512536
return ECMA_VALUE_FALSE; /* fail */
513537
}
514538
}
@@ -563,6 +587,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
563587

564588
if (!ECMA_IS_VALUE_ERROR (match_value))
565589
{
590+
REGEXP_INCREASE_RECURSION_COUNTER ();
566591
if (ecma_is_value_true (match_value))
567592
{
568593
*out_str_p = sub_str_p;
@@ -588,6 +613,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
588613
if (str_curr_p >= re_ctx_p->input_end_p)
589614
{
590615
JERRY_TRACE_MSG ("fail\n");
616+
REGEXP_INCREASE_RECURSION_COUNTER ();
591617
return ECMA_VALUE_FALSE; /* fail */
592618
}
593619

@@ -618,6 +644,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
618644
if (!is_match)
619645
{
620646
JERRY_TRACE_MSG ("fail\n");
647+
REGEXP_INCREASE_RECURSION_COUNTER ();
621648
return ECMA_VALUE_FALSE; /* fail */
622649
}
623650
}
@@ -627,6 +654,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
627654
if (is_match)
628655
{
629656
JERRY_TRACE_MSG ("fail\n");
657+
REGEXP_INCREASE_RECURSION_COUNTER ();
630658
return ECMA_VALUE_FALSE; /* fail */
631659
}
632660
}
@@ -657,6 +685,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
657685
if (str_curr_p >= re_ctx_p->input_end_p)
658686
{
659687
JERRY_TRACE_MSG ("fail\n");
688+
REGEXP_INCREASE_RECURSION_COUNTER ();
660689
return ECMA_VALUE_FALSE; /* fail */
661690
}
662691

@@ -666,6 +695,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
666695
if (ch1 != ch2)
667696
{
668697
JERRY_TRACE_MSG ("fail\n");
698+
REGEXP_INCREASE_RECURSION_COUNTER ();
669699
return ECMA_VALUE_FALSE; /* fail */
670700
}
671701
}
@@ -689,6 +719,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
689719
if (ecma_is_value_true (match_value))
690720
{
691721
*out_str_p = sub_str_p;
722+
REGEXP_INCREASE_RECURSION_COUNTER ();
692723
return match_value; /* match */
693724
}
694725
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -703,13 +734,15 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
703734
bc_p = old_bc_p;
704735

705736
re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = old_start_p;
737+
REGEXP_INCREASE_RECURSION_COUNTER ();
706738
return ECMA_VALUE_FALSE; /* fail */
707739
}
708740
case RE_OP_SAVE_AND_MATCH:
709741
{
710742
JERRY_TRACE_MSG ("End of pattern is reached: match\n");
711743
re_ctx_p->saved_p[RE_GLOBAL_END_IDX] = str_curr_p;
712744
*out_str_p = str_curr_p;
745+
REGEXP_INCREASE_RECURSION_COUNTER ();
713746
return ECMA_VALUE_TRUE; /* match */
714747
}
715748
case RE_OP_ALTERNATIVE:
@@ -774,6 +807,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
774807
if (ecma_is_value_true (match_value))
775808
{
776809
*out_str_p = sub_str_p;
810+
REGEXP_INCREASE_RECURSION_COUNTER ();
777811
return match_value; /* match */
778812
}
779813
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -832,6 +866,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
832866
if (ecma_is_value_true (match_value))
833867
{
834868
*out_str_p = sub_str_p;
869+
REGEXP_INCREASE_RECURSION_COUNTER ();
835870
return match_value; /* match */
836871
}
837872
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -856,6 +891,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
856891
if (ecma_is_value_true (match_value))
857892
{
858893
*out_str_p = sub_str_p;
894+
REGEXP_INCREASE_RECURSION_COUNTER ();
859895
return match_value; /* match */
860896
}
861897
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -865,6 +901,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
865901
}
866902

867903
re_ctx_p->saved_p[start_idx] = old_start_p;
904+
REGEXP_INCREASE_RECURSION_COUNTER ();
868905
return ECMA_VALUE_FALSE; /* fail */
869906
}
870907
case RE_OP_CAPTURE_NON_GREEDY_GROUP_END:
@@ -910,6 +947,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
910947
if (ecma_is_value_true (match_value))
911948
{
912949
*out_str_p = sub_str_p;
950+
REGEXP_INCREASE_RECURSION_COUNTER ();
913951
return match_value; /* match */
914952
}
915953
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -958,6 +996,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
958996
if (re_ctx_p->num_of_iterations_p[iter_idx] >= min
959997
&& str_curr_p== re_ctx_p->saved_p[start_idx])
960998
{
999+
REGEXP_INCREASE_RECURSION_COUNTER ();
9611000
return ECMA_VALUE_FALSE; /* fail */
9621001
}
9631002

@@ -979,6 +1018,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
9791018
if (ecma_is_value_true (match_value))
9801019
{
9811020
*out_str_p = sub_str_p;
1021+
REGEXP_INCREASE_RECURSION_COUNTER ();
9821022
return match_value; /* match */
9831023
}
9841024
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1003,6 +1043,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10031043
if (ecma_is_value_true (match_value))
10041044
{
10051045
*out_str_p = sub_str_p;
1046+
REGEXP_INCREASE_RECURSION_COUNTER ();
10061047
return match_value; /* match */
10071048
}
10081049
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1024,6 +1065,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10241065
if (ecma_is_value_true (match_value))
10251066
{
10261067
*out_str_p = sub_str_p;
1068+
REGEXP_INCREASE_RECURSION_COUNTER ();
10271069
return match_value; /* match */
10281070
}
10291071
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1035,6 +1077,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10351077
/* restore if fails */
10361078
re_ctx_p->saved_p[end_idx] = old_end_p;
10371079
re_ctx_p->num_of_iterations_p[iter_idx]--;
1080+
REGEXP_INCREASE_RECURSION_COUNTER ();
10381081
return ECMA_VALUE_FALSE; /* fail */
10391082
}
10401083
case RE_OP_NON_GREEDY_ITERATOR:
@@ -1059,6 +1102,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10591102
if (ecma_is_value_true (match_value))
10601103
{
10611104
*out_str_p = sub_str_p;
1105+
REGEXP_INCREASE_RECURSION_COUNTER ();
10621106
return match_value; /* match */
10631107
}
10641108
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1082,6 +1126,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10821126
str_curr_p = sub_str_p;
10831127
num_of_iter++;
10841128
}
1129+
REGEXP_INCREASE_RECURSION_COUNTER ();
10851130
return ECMA_VALUE_FALSE; /* fail */
10861131
}
10871132
default:
@@ -1125,6 +1170,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
11251170
if (ecma_is_value_true (match_value))
11261171
{
11271172
*out_str_p = sub_str_p;
1173+
REGEXP_INCREASE_RECURSION_COUNTER ();
11281174
return match_value; /* match */
11291175
}
11301176
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1140,6 +1186,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
11401186
lit_utf8_read_prev (&str_curr_p);
11411187
num_of_iter--;
11421188
}
1189+
REGEXP_INCREASE_RECURSION_COUNTER ();
11431190
return ECMA_VALUE_FALSE; /* fail */
11441191
}
11451192
}
@@ -1232,6 +1279,9 @@ ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */
12321279
re_ctx.input_start_p = input_curr_p;
12331280
const lit_utf8_byte_t *input_end_p = re_ctx.input_start_p + input_buffer_size;
12341281
re_ctx.input_end_p = input_end_p;
1282+
#ifdef REGEXP_RECURSION_LIMIT
1283+
re_ctx.recursion_counter = REGEXP_RECURSION_LIMIT;
1284+
#endif /* REGEXP_RECURSION_LIMIT */
12351285

12361286
/* 1. Read bytecode header and init regexp matcher context. */
12371287
re_ctx.flags = bc_p->header.status_flags;

jerry-core/ecma/operations/ecma-regexp-object.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@
1818

1919
#ifndef CONFIG_DISABLE_REGEXP_BUILTIN
2020

21+
#ifdef REGEXP_RECURSION_LIMIT
22+
#define REGEXP_INCREASE_RECURSION_COUNTER () (++re_ctx_p->recursion_counter)
23+
#else
24+
#define REGEXP_INCREASE_RECURSION_COUNTER ()
25+
#endif /* REGEXP_RECURSION_LIMIT */
26+
2127
#include "ecma-globals.h"
2228
#include "re-compiler.h"
2329

@@ -46,6 +52,9 @@ typedef struct
4652
const lit_utf8_byte_t **saved_p; /**< saved result string pointers, ECMA 262 v5, 15.10.2.1, State */
4753
const lit_utf8_byte_t *input_start_p; /**< start of input pattern string */
4854
const lit_utf8_byte_t *input_end_p; /**< end of input pattern string */
55+
#ifdef REGEXP_RECURSION_LIMIT
56+
uint32_t recursion_counter; /**< RegExp recursion counter */
57+
#endif /* REGEXP_RECURSION_LIMIT */
4958
uint32_t num_of_captures; /**< number of capture groups */
5059
uint32_t num_of_non_captures; /**< number of non-capture groups */
5160
uint32_t *num_of_iterations_p; /**< number of iterations */

jerry-core/parser/regexp/re-compiler.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,12 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
249249
uint32_t idx;
250250
re_bytecode_ctx_t *bc_ctx_p = re_ctx_p->bytecode_ctx_p;
251251
ecma_value_t ret_value = ECMA_VALUE_EMPTY;
252+
#ifdef REGEXP_RECURSION_LIMIT
253+
if (--re_ctx_p->recursion_counter == 0)
254+
{
255+
return ecma_raise_range_error ("RegExp executor recursion limit is exceeded.");
256+
}
257+
#endif /* REGEXP_RECURSION_LIMIT */
252258

253259
uint32_t alterantive_offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
254260
bool should_loop = true;
@@ -440,6 +446,7 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
440446
else
441447
{
442448
re_insert_u32 (bc_ctx_p, alterantive_offset, re_get_bytecode_length (bc_ctx_p) - alterantive_offset);
449+
REGEXP_INCREASE_RECURSION_COUNTER ();
443450
should_loop = false;
444451
}
445452
break;
@@ -453,6 +460,7 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
453460
else
454461
{
455462
re_insert_u32 (bc_ctx_p, alterantive_offset, re_get_bytecode_length (bc_ctx_p) - alterantive_offset);
463+
REGEXP_INCREASE_RECURSION_COUNTER ();
456464
should_loop = false;
457465
}
458466

@@ -559,7 +567,9 @@ re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, /**< [out] point
559567
re_ctx.flags = flags;
560568
re_ctx.highest_backref = 0;
561569
re_ctx.num_of_non_captures = 0;
562-
570+
#ifdef REGEXP_RECURSION_LIMIT
571+
re_ctx.recursion_counter = REGEXP_RECURSION_LIMIT;
572+
#endif /* REGEXP_RECURSION_LIMIT */
563573
re_bytecode_ctx_t bc_ctx;
564574
bc_ctx.block_start_p = NULL;
565575
bc_ctx.block_end_p = NULL;

0 commit comments

Comments
 (0)