Skip to content

Commit c23cf41

Browse files
Istvan Miklosakosthekiss
authored andcommitted
Add RegExp recursion depth limit (#2543)
The regexp engine does not have any recursion depth check, thus it can cause problems with various regexps. Added a new build option `--regexp-recursion-limit N` whose default value is 0, which is for unlimited recursion depth. Also added a build-option-test. Fixes #2448 Fixes #2190 JerryScript-DCO-1.0-Signed-off-by: Istvan Miklos [email protected]
1 parent 162e2dd commit c23cf41

File tree

7 files changed

+105
-0
lines changed

7 files changed

+105
-0
lines changed

jerry-core/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ set(FEATURE_SYSTEM_ALLOCATOR OFF CACHE BOOL "Enable system allocator?")
3939
set(FEATURE_VALGRIND OFF CACHE BOOL "Enable Valgrind support?")
4040
set(FEATURE_VM_EXEC_STOP OFF CACHE BOOL "Enable VM execution stopping?")
4141
set(MEM_HEAP_SIZE_KB "512" CACHE STRING "Size of memory heap, in kilobytes")
42+
set(REGEXP_RECURSION_LIMIT "0" CACHE STRING "Limit of regexp recursion depth")
4243

4344
# Option overrides
4445
if(USING_MSVC)
@@ -94,6 +95,7 @@ message(STATUS "FEATURE_SYSTEM_ALLOCATOR " ${FEATURE_SYSTEM_ALLOCATOR})
9495
message(STATUS "FEATURE_VALGRIND " ${FEATURE_VALGRIND})
9596
message(STATUS "FEATURE_VM_EXEC_STOP " ${FEATURE_VM_EXEC_STOP})
9697
message(STATUS "MEM_HEAP_SIZE_KB " ${MEM_HEAP_SIZE_KB})
98+
message(STATUS "REGEXP_RECURSION_LIMIT " ${REGEXP_RECURSION_LIMIT})
9799

98100
# Include directories
99101
set(INCLUDE_CORE_PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
@@ -228,6 +230,11 @@ if(FEATURE_REGEXP_STRICT_MODE)
228230
set(DEFINES_JERRY ${DEFINES_JERRY} ENABLE_REGEXP_STRICT_MODE)
229231
endif()
230232

233+
# RegExp recursion depth limit
234+
if(REGEXP_RECURSION_LIMIT)
235+
set(DEFINES_JERRY ${DEFINES_JERRY} REGEXP_RECURSION_LIMIT=${REGEXP_RECURSION_LIMIT})
236+
endif()
237+
231238
# RegExp byte-code dumps
232239
if(FEATURE_REGEXP_DUMP)
233240
set(DEFINES_JERRY ${DEFINES_JERRY} REGEXP_DUMP_BYTE_CODE)

jerry-core/ecma/operations/ecma-regexp-object.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@
6363
*/
6464
#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0)
6565

66+
/*
67+
* Check RegExp recursion depth limit
68+
*/
69+
#ifdef REGEXP_RECURSION_LIMIT
70+
JERRY_STATIC_ASSERT (REGEXP_RECURSION_LIMIT > 0, regexp_recursion_limit_must_be_greater_than_zero);
71+
#endif /* REGEXP_RECURSION_LIMIT */
72+
6673
/**
6774
* Parse RegExp flags (global, ignoreCase, multiline)
6875
*
@@ -344,6 +351,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
344351
const lit_utf8_byte_t *str_p, /**< input string pointer */
345352
const lit_utf8_byte_t **out_str_p) /**< [out] matching substring iterator */
346353
{
354+
REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST ();
347355
const lit_utf8_byte_t *str_curr_p = str_p;
348356

349357
while (true)
@@ -356,12 +364,14 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
356364
{
357365
JERRY_TRACE_MSG ("Execute RE_OP_MATCH: match\n");
358366
*out_str_p = str_curr_p;
367+
REGEXP_RECURSION_COUNTER_INCREASE ();
359368
return ECMA_VALUE_TRUE; /* match */
360369
}
361370
case RE_OP_CHAR:
362371
{
363372
if (str_curr_p >= re_ctx_p->input_end_p)
364373
{
374+
REGEXP_RECURSION_COUNTER_INCREASE ();
365375
return ECMA_VALUE_FALSE; /* fail */
366376
}
367377

@@ -373,6 +383,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
373383
if (ch1 != ch2)
374384
{
375385
JERRY_TRACE_MSG ("fail\n");
386+
REGEXP_RECURSION_COUNTER_INCREASE ();
376387
return ECMA_VALUE_FALSE; /* fail */
377388
}
378389

@@ -384,6 +395,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
384395
{
385396
if (str_curr_p >= re_ctx_p->input_end_p)
386397
{
398+
REGEXP_RECURSION_COUNTER_INCREASE ();
387399
return ECMA_VALUE_FALSE; /* fail */
388400
}
389401

@@ -393,6 +405,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
393405
if (lit_char_is_line_terminator (ch))
394406
{
395407
JERRY_TRACE_MSG ("fail\n");
408+
REGEXP_RECURSION_COUNTER_INCREASE ();
396409
return ECMA_VALUE_FALSE; /* fail */
397410
}
398411

@@ -412,6 +425,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
412425
if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
413426
{
414427
JERRY_TRACE_MSG ("fail\n");
428+
REGEXP_RECURSION_COUNTER_INCREASE ();
415429
return ECMA_VALUE_FALSE; /* fail */
416430
}
417431

@@ -422,6 +436,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
422436
}
423437

424438
JERRY_TRACE_MSG ("fail\n");
439+
REGEXP_RECURSION_COUNTER_INCREASE ();
425440
return ECMA_VALUE_FALSE; /* fail */
426441
}
427442
case RE_OP_ASSERT_END:
@@ -437,6 +452,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
437452
if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
438453
{
439454
JERRY_TRACE_MSG ("fail\n");
455+
REGEXP_RECURSION_COUNTER_INCREASE ();
440456
return ECMA_VALUE_FALSE; /* fail */
441457
}
442458

@@ -447,6 +463,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
447463
}
448464

449465
JERRY_TRACE_MSG ("fail\n");
466+
REGEXP_RECURSION_COUNTER_INCREASE ();
450467
return ECMA_VALUE_FALSE; /* fail */
451468
}
452469
case RE_OP_ASSERT_WORD_BOUNDARY:
@@ -478,6 +495,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
478495
if (is_wordchar_left == is_wordchar_right)
479496
{
480497
JERRY_TRACE_MSG ("fail\n");
498+
REGEXP_RECURSION_COUNTER_INCREASE ();
481499
return ECMA_VALUE_FALSE; /* fail */
482500
}
483501
}
@@ -489,6 +507,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
489507
if (is_wordchar_left != is_wordchar_right)
490508
{
491509
JERRY_TRACE_MSG ("fail\n");
510+
REGEXP_RECURSION_COUNTER_INCREASE ();
492511
return ECMA_VALUE_FALSE; /* fail */
493512
}
494513
}
@@ -556,6 +575,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
556575
}
557576

558577
JMEM_FINALIZE_LOCAL_ARRAY (saved_bck_p);
578+
REGEXP_RECURSION_COUNTER_INCREASE ();
559579
return match_value;
560580
}
561581
case RE_OP_CHAR_CLASS:
@@ -568,6 +588,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
568588
if (str_curr_p >= re_ctx_p->input_end_p)
569589
{
570590
JERRY_TRACE_MSG ("fail\n");
591+
REGEXP_RECURSION_COUNTER_INCREASE ();
571592
return ECMA_VALUE_FALSE; /* fail */
572593
}
573594

@@ -598,6 +619,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
598619
if (!is_match)
599620
{
600621
JERRY_TRACE_MSG ("fail\n");
622+
REGEXP_RECURSION_COUNTER_INCREASE ();
601623
return ECMA_VALUE_FALSE; /* fail */
602624
}
603625
}
@@ -607,6 +629,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
607629
if (is_match)
608630
{
609631
JERRY_TRACE_MSG ("fail\n");
632+
REGEXP_RECURSION_COUNTER_INCREASE ();
610633
return ECMA_VALUE_FALSE; /* fail */
611634
}
612635
}
@@ -637,6 +660,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
637660
if (str_curr_p >= re_ctx_p->input_end_p)
638661
{
639662
JERRY_TRACE_MSG ("fail\n");
663+
REGEXP_RECURSION_COUNTER_INCREASE ();
640664
return ECMA_VALUE_FALSE; /* fail */
641665
}
642666

@@ -646,6 +670,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
646670
if (ch1 != ch2)
647671
{
648672
JERRY_TRACE_MSG ("fail\n");
673+
REGEXP_RECURSION_COUNTER_INCREASE ();
649674
return ECMA_VALUE_FALSE; /* fail */
650675
}
651676
}
@@ -669,6 +694,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
669694
if (ecma_is_value_true (match_value))
670695
{
671696
*out_str_p = sub_str_p;
697+
REGEXP_RECURSION_COUNTER_INCREASE ();
672698
return match_value; /* match */
673699
}
674700
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -683,13 +709,15 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
683709
bc_p = old_bc_p;
684710

685711
re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = old_start_p;
712+
REGEXP_RECURSION_COUNTER_INCREASE ();
686713
return ECMA_VALUE_FALSE; /* fail */
687714
}
688715
case RE_OP_SAVE_AND_MATCH:
689716
{
690717
JERRY_TRACE_MSG ("End of pattern is reached: match\n");
691718
re_ctx_p->saved_p[RE_GLOBAL_END_IDX] = str_curr_p;
692719
*out_str_p = str_curr_p;
720+
REGEXP_RECURSION_COUNTER_INCREASE ();
693721
return ECMA_VALUE_TRUE; /* match */
694722
}
695723
case RE_OP_ALTERNATIVE:
@@ -754,6 +782,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
754782
if (ecma_is_value_true (match_value))
755783
{
756784
*out_str_p = sub_str_p;
785+
REGEXP_RECURSION_COUNTER_INCREASE ();
757786
return match_value; /* match */
758787
}
759788
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -812,6 +841,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
812841
if (ecma_is_value_true (match_value))
813842
{
814843
*out_str_p = sub_str_p;
844+
REGEXP_RECURSION_COUNTER_INCREASE ();
815845
return match_value; /* match */
816846
}
817847
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -836,6 +866,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
836866
if (ecma_is_value_true (match_value))
837867
{
838868
*out_str_p = sub_str_p;
869+
REGEXP_RECURSION_COUNTER_INCREASE ();
839870
return match_value; /* match */
840871
}
841872
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -845,6 +876,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
845876
}
846877

847878
re_ctx_p->saved_p[start_idx] = old_start_p;
879+
REGEXP_RECURSION_COUNTER_INCREASE ();
848880
return ECMA_VALUE_FALSE; /* fail */
849881
}
850882
case RE_OP_CAPTURE_NON_GREEDY_GROUP_END:
@@ -890,6 +922,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
890922
if (ecma_is_value_true (match_value))
891923
{
892924
*out_str_p = sub_str_p;
925+
REGEXP_RECURSION_COUNTER_INCREASE ();
893926
return match_value; /* match */
894927
}
895928
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -938,6 +971,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
938971
if (re_ctx_p->num_of_iterations_p[iter_idx] >= min
939972
&& str_curr_p== re_ctx_p->saved_p[start_idx])
940973
{
974+
REGEXP_RECURSION_COUNTER_INCREASE ();
941975
return ECMA_VALUE_FALSE; /* fail */
942976
}
943977

@@ -959,6 +993,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
959993
if (ecma_is_value_true (match_value))
960994
{
961995
*out_str_p = sub_str_p;
996+
REGEXP_RECURSION_COUNTER_INCREASE ();
962997
return match_value; /* match */
963998
}
964999
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -983,6 +1018,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
9831018
if (ecma_is_value_true (match_value))
9841019
{
9851020
*out_str_p = sub_str_p;
1021+
REGEXP_RECURSION_COUNTER_INCREASE ();
9861022
return match_value; /* match */
9871023
}
9881024
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1004,6 +1040,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10041040
if (ecma_is_value_true (match_value))
10051041
{
10061042
*out_str_p = sub_str_p;
1043+
REGEXP_RECURSION_COUNTER_INCREASE ();
10071044
return match_value; /* match */
10081045
}
10091046
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1015,6 +1052,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10151052
/* restore if fails */
10161053
re_ctx_p->saved_p[end_idx] = old_end_p;
10171054
re_ctx_p->num_of_iterations_p[iter_idx]--;
1055+
REGEXP_RECURSION_COUNTER_INCREASE ();
10181056
return ECMA_VALUE_FALSE; /* fail */
10191057
}
10201058
case RE_OP_NON_GREEDY_ITERATOR:
@@ -1039,6 +1077,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10391077
if (ecma_is_value_true (match_value))
10401078
{
10411079
*out_str_p = sub_str_p;
1080+
REGEXP_RECURSION_COUNTER_INCREASE ();
10421081
return match_value; /* match */
10431082
}
10441083
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1062,6 +1101,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10621101
str_curr_p = sub_str_p;
10631102
num_of_iter++;
10641103
}
1104+
REGEXP_RECURSION_COUNTER_INCREASE ();
10651105
return ECMA_VALUE_FALSE; /* fail */
10661106
}
10671107
default:
@@ -1105,6 +1145,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
11051145
if (ecma_is_value_true (match_value))
11061146
{
11071147
*out_str_p = sub_str_p;
1148+
REGEXP_RECURSION_COUNTER_INCREASE ();
11081149
return match_value; /* match */
11091150
}
11101151
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1120,6 +1161,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
11201161
lit_utf8_read_prev (&str_curr_p);
11211162
num_of_iter--;
11221163
}
1164+
REGEXP_RECURSION_COUNTER_INCREASE ();
11231165
return ECMA_VALUE_FALSE; /* fail */
11241166
}
11251167
}
@@ -1208,6 +1250,7 @@ ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */
12081250
re_ctx.input_start_p = input_curr_p;
12091251
const lit_utf8_byte_t *input_end_p = re_ctx.input_start_p + input_buffer_size;
12101252
re_ctx.input_end_p = input_end_p;
1253+
REGEXP_RECURSION_COUNTER_INIT ();
12111254

12121255
/* 1. Read bytecode header and init regexp matcher context. */
12131256
re_ctx.flags = bc_p->header.status_flags;

jerry-core/ecma/operations/ecma-regexp-object.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,46 @@
2828
* @{
2929
*/
3030

31+
#ifdef REGEXP_RECURSION_LIMIT
32+
/**
33+
* Decrease the recursion counter and test it.
34+
* If the counter reaches the limit of the recursion depth
35+
* it will return with a range error.
36+
*/
37+
#define REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST() \
38+
do \
39+
{ \
40+
if (--re_ctx_p->recursion_counter == 0) \
41+
{ \
42+
return ecma_raise_range_error (ECMA_ERR_MSG ("RegExp recursion limit is exceeded.")); \
43+
} \
44+
} \
45+
while (0)
46+
/**
47+
* Increase the recursion counter.
48+
*/
49+
#define REGEXP_RECURSION_COUNTER_INCREASE() (++re_ctx_p->recursion_counter)
50+
/**
51+
* Set the recursion counter to the max depth of the recursion.
52+
*/
53+
#define REGEXP_RECURSION_COUNTER_INIT() (re_ctx.recursion_counter = REGEXP_RECURSION_LIMIT)
54+
#else /* !REGEXP_RECURSION_LIMIT */
55+
/**
56+
* Decrease the recursion counter and test it.
57+
* If the counter reaches the limit of the recursion depth
58+
* it will return with a range error.
59+
*/
60+
#define REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST()
61+
/**
62+
* Increase the recursion counter.
63+
*/
64+
#define REGEXP_RECURSION_COUNTER_INCREASE()
65+
/**
66+
* Set the recursion counter to the max depth of the recursion.
67+
*/
68+
#define REGEXP_RECURSION_COUNTER_INIT()
69+
#endif /* REGEXP_RECURSION_LIMIT */
70+
3171
/**
3272
* RegExp flags
3373
* Note:
@@ -48,6 +88,9 @@ typedef struct
4888
const lit_utf8_byte_t **saved_p; /**< saved result string pointers, ECMA 262 v5, 15.10.2.1, State */
4989
const lit_utf8_byte_t *input_start_p; /**< start of input pattern string */
5090
const lit_utf8_byte_t *input_end_p; /**< end of input pattern string */
91+
#ifdef REGEXP_RECURSION_LIMIT
92+
uint32_t recursion_counter; /**< RegExp recursion counter */
93+
#endif /* REGEXP_RECURSION_LIMIT */
5194
uint32_t num_of_captures; /**< number of capture groups */
5295
uint32_t num_of_non_captures; /**< number of non-capture groups */
5396
uint32_t *num_of_iterations_p; /**< number of iterations */

0 commit comments

Comments
 (0)