Skip to content

Commit e027b4d

Browse files
committed
Enable regular expressions.
- add regular expressions support to JS parser and interpreter; - add tests for regular expressions. JerryScript-DCO-1.0-Signed-off-by: Szilard Ledan [email protected] JerryScript-DCO-1.0-Signed-off-by: László Langó [email protected]
1 parent f992f5d commit e027b4d

19 files changed

+1087
-6
lines changed

jerry-core/parser/js/lexer.cpp

Lines changed: 95 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,14 @@
1515
*/
1616

1717
#include "ecma-helpers.h"
18+
#include "ecma-exceptions.h"
1819
#include "jrt-libc-includes.h"
1920
#include "jsp-mm.h"
2021
#include "lexer.h"
22+
#include "mem-allocator.h"
23+
#include "opcodes.h"
24+
#include "parser.h"
25+
#include "stack.h"
2126
#include "syntax-errors.h"
2227

2328
static token saved_token, prev_token, sent_token, empty_token;
@@ -961,6 +966,76 @@ parse_string (void)
961966
return ret;
962967
} /* parse_string */
963968

969+
/**
970+
* Parse string literal (ECMA-262 v5, 7.8.5)
971+
*/
972+
static token
973+
parse_regexp (void)
974+
{
975+
token result;
976+
bool is_char_class = false;
977+
978+
/* Eat up '/' */
979+
JERRY_ASSERT ((ecma_char_t) LA (0) == '/');
980+
consume_char ();
981+
new_token ();
982+
983+
while (true)
984+
{
985+
ecma_char_t c = (ecma_char_t) LA (0);
986+
987+
if (c == '\0')
988+
{
989+
PARSE_ERROR ("Unclosed string", token_start - buffer_start);
990+
}
991+
else if (c == '\n')
992+
{
993+
PARSE_ERROR ("RegExp literal shall not contain newline character", token_start - buffer_start);
994+
}
995+
else if (c == '\\')
996+
{
997+
consume_char ();
998+
}
999+
else if (c == '[')
1000+
{
1001+
is_char_class = true;
1002+
}
1003+
else if (c == ']')
1004+
{
1005+
is_char_class = false;
1006+
}
1007+
else if (c == '/' && !is_char_class)
1008+
{
1009+
/* Eat up '/' */
1010+
consume_char ();
1011+
break;
1012+
}
1013+
1014+
consume_char ();
1015+
}
1016+
1017+
/* Try to parse RegExp flags */
1018+
while (true)
1019+
{
1020+
ecma_char_t c = (ecma_char_t) LA (0);
1021+
1022+
if (c == '\0'
1023+
|| !ecma_char_is_word_char (c)
1024+
|| ecma_char_is_line_terminator (c))
1025+
{
1026+
break;
1027+
}
1028+
consume_char ();
1029+
}
1030+
1031+
result = convert_string_to_token (TOK_REGEXP,
1032+
(const ecma_char_t*) token_start,
1033+
static_cast<ecma_length_t> (buffer - token_start));
1034+
1035+
token_start = NULL;
1036+
return result;
1037+
} /* parse_regexp */
1038+
9641039
static void
9651040
grobble_whitespaces (void)
9661041
{
@@ -1084,10 +1159,27 @@ lexer_next_token_private (void)
10841159
}
10851160
}
10861161

1087-
if (c == '/' && LA (1) == '/')
1162+
1163+
if (c == '/')
10881164
{
1089-
replace_comment_by_newline ();
1090-
return lexer_next_token_private ();
1165+
if (LA (1) == '/')
1166+
{
1167+
replace_comment_by_newline ();
1168+
return lexer_next_token_private ();
1169+
}
1170+
else if (!(sent_token.type == TOK_NAME
1171+
|| sent_token.type == TOK_NULL
1172+
|| sent_token.type == TOK_BOOL
1173+
|| sent_token.type == TOK_CLOSE_BRACE
1174+
|| sent_token.type == TOK_CLOSE_SQUARE
1175+
|| sent_token.type == TOK_CLOSE_PAREN
1176+
|| sent_token.type == TOK_SMALL_INT
1177+
|| sent_token.type == TOK_NUMBER
1178+
|| sent_token.type == TOK_STRING
1179+
|| sent_token.type == TOK_REGEXP))
1180+
{
1181+
return parse_regexp ();
1182+
}
10911183
}
10921184

10931185
switch (c)
@@ -1203,7 +1295,6 @@ lexer_next_token (void)
12031295

12041296
prev_token = sent_token;
12051297
sent_token = lexer_next_token_private ();
1206-
12071298
if (sent_token.type == TOK_NEWLINE)
12081299
{
12091300
dump_current_line ();

jerry-core/parser/js/lexer.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ typedef enum __attr_packed___
9999
TOK_OPEN_PAREN, // (
100100
TOK_CLOSE_PAREN, //)
101101
TOK_OPEN_SQUARE, // [
102-
TOK_CLOSE_SQUARE, // [
102+
TOK_CLOSE_SQUARE, // ]
103103

104104
TOK_DOT, // .
105105
TOK_SEMICOLON, // ;
@@ -152,6 +152,7 @@ typedef enum __attr_packed___
152152
TOK_DIV, // /
153153
TOK_DIV_EQ, // /=
154154
TOK_EMPTY,
155+
TOK_REGEXP, // RegularExpressionLiteral (/.../gim)
155156
} token_type;
156157

157158
typedef size_t locus;
@@ -170,6 +171,9 @@ typedef struct
170171
#define TOKEN_EMPTY_INITIALIZER {0, TOK_EMPTY, 0}
171172

172173
void lexer_init (const char *, size_t, bool);
174+
void lexer_init_source (const char *, size_t);
175+
176+
void lexer_free (void);
173177

174178
token lexer_next_token (void);
175179
void lexer_save_token (token);

jerry-core/parser/js/opcodes-dumper.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -843,6 +843,34 @@ dump_number_assignment_res (lit_cpointer_t lit_id)
843843
return op;
844844
}
845845

846+
void
847+
dump_regexp_assignment (operand op, lit_cpointer_t lit_id)
848+
{
849+
switch (op.type)
850+
{
851+
case OPERAND_LITERAL:
852+
{
853+
const opcode_t opcode = getop_assignment (LITERAL_TO_REWRITE, OPCODE_ARG_TYPE_REGEXP, LITERAL_TO_REWRITE);
854+
serializer_dump_op_meta (create_op_meta_101 (opcode, op.data.lit_id, lit_id));
855+
break;
856+
}
857+
case OPERAND_TMP:
858+
{
859+
const opcode_t opcode = getop_assignment (op.data.uid, OPCODE_ARG_TYPE_REGEXP, LITERAL_TO_REWRITE);
860+
serializer_dump_op_meta (create_op_meta_001 (opcode, lit_id));
861+
break;
862+
}
863+
}
864+
}
865+
866+
operand
867+
dump_regexp_assignment_res (lit_cpointer_t lit_id)
868+
{
869+
operand op = tmp_operand ();
870+
dump_regexp_assignment (op, lit_id);
871+
return op;
872+
}
873+
846874
void
847875
dump_smallint_assignment (operand op, idx_t uid)
848876
{

jerry-core/parser/js/opcodes-dumper.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ void dump_string_assignment (operand, lit_cpointer_t);
6969
operand dump_string_assignment_res (lit_cpointer_t);
7070
void dump_number_assignment (operand, lit_cpointer_t);
7171
operand dump_number_assignment_res (lit_cpointer_t);
72+
void dump_regexp_assignment (operand, lit_cpointer_t);
73+
operand dump_regexp_assignment_res (lit_cpointer_t);
7274
void dump_smallint_assignment (operand, idx_t);
7375
operand dump_smallint_assignment_res (idx_t);
7476
void dump_undefined_assignment (operand);

jerry-core/parser/js/parser.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/* Copyright 2014-2015 Samsung Electronics Co., Ltd.
2+
* Copyright 2015 University of Szeged.
23
*
34
* Licensed under the Apache License, Version 2.0 (the "License");
45
* you may not use this file except in compliance with the License.
@@ -22,6 +23,7 @@
2223
#include "opcodes-dumper.h"
2324
#include "opcodes-native-call.h"
2425
#include "parser.h"
26+
#include "re-parser.h"
2527
#include "scopes-tree.h"
2628
#include "serializer.h"
2729
#include "stack.h"
@@ -745,6 +747,7 @@ parse_object_literal (void)
745747
| 'false'
746748
| number_literal
747749
| string_literal
750+
| regexp_literal
748751
; */
749752
static operand
750753
parse_literal (void)
@@ -753,6 +756,7 @@ parse_literal (void)
753756
{
754757
case TOK_NUMBER: return dump_number_assignment_res (token_data_as_lit_cp ());
755758
case TOK_STRING: return dump_string_assignment_res (token_data_as_lit_cp ());
759+
case TOK_REGEXP: return dump_regexp_assignment_res (token_data_as_lit_cp ());
756760
case TOK_NULL: return dump_null_assignment_res ();
757761
case TOK_BOOL: return dump_boolean_assignment_res ((bool) token_data ());
758762
case TOK_SMALL_INT: return dump_smallint_assignment_res ((idx_t) token_data ());
@@ -786,6 +790,7 @@ parse_primary_expression (void)
786790
case TOK_BOOL:
787791
case TOK_SMALL_INT:
788792
case TOK_NUMBER:
793+
case TOK_REGEXP:
789794
case TOK_STRING: return parse_literal ();
790795
case TOK_NAME: return literal_operand (token_data_as_lit_cp ());
791796
case TOK_OPEN_SQUARE: return parse_array_literal ();

jerry-core/parser/js/scopes-tree.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ generate_opcode (scopes_tree tree, opcode_counter_t opc_index, lit_id_hash_table
291291
}
292292
case OPCODE_ARG_TYPE_NUMBER:
293293
case OPCODE_ARG_TYPE_NUMBER_NEGATE:
294+
case OPCODE_ARG_TYPE_REGEXP:
294295
case OPCODE_ARG_TYPE_STRING:
295296
case OPCODE_ARG_TYPE_VARIABLE:
296297
{
@@ -430,6 +431,7 @@ count_new_literals_in_opcode (scopes_tree tree, opcode_counter_t opc_index)
430431
}
431432
case OPCODE_ARG_TYPE_NUMBER:
432433
case OPCODE_ARG_TYPE_NUMBER_NEGATE:
434+
case OPCODE_ARG_TYPE_REGEXP:
433435
case OPCODE_ARG_TYPE_STRING:
434436
case OPCODE_ARG_TYPE_VARIABLE:
435437
{

jerry-core/vm/opcodes-ecma-support.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "ecma-objects.h"
3030
#include "ecma-objects-general.h"
3131
#include "ecma-reference.h"
32+
#include "ecma-regexp-object.h"
3233
#include "ecma-try-catch-macro.h"
3334
#include "serializer.h"
3435

jerry-core/vm/opcodes.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/* Copyright 2014-2015 Samsung Electronics Co., Ltd.
2+
* Copyright 2015 University of Szeged.
23
*
34
* Licensed under the Apache License, Version 2.0 (the "License");
45
* you may not use this file except in compliance with the License.
@@ -162,6 +163,65 @@ opfunc_assignment (opcode_t opdata, /**< operation data */
162163
dst_var_idx,
163164
ecma_make_number_value (num_p));
164165
}
166+
else if (type_value_right == OPCODE_ARG_TYPE_REGEXP)
167+
{
168+
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
169+
lit_cpointer_t lit_cp = serializer_get_literal_cp_by_uid (src_val_descr,
170+
int_data->opcodes_p,
171+
int_data->pos);
172+
ecma_string_t *string_p = ecma_new_ecma_string_from_lit_cp (lit_cp);
173+
174+
int32_t re_str_len = ecma_string_get_length (string_p);
175+
MEM_DEFINE_LOCAL_ARRAY (re_str_p, re_str_len + 1, ecma_char_t);
176+
177+
ssize_t zt_str_size = (ssize_t) sizeof (ecma_char_t) * (re_str_len + 1);
178+
ecma_string_to_zt_string (string_p, re_str_p, zt_str_size);
179+
180+
ecma_char_t *ch_p = re_str_p;
181+
ecma_char_t *last_slash_p = NULL;
182+
while (*ch_p)
183+
{
184+
if (*ch_p == '/')
185+
{
186+
last_slash_p = ch_p;
187+
}
188+
ch_p++;
189+
}
190+
191+
JERRY_ASSERT (last_slash_p != NULL);
192+
JERRY_ASSERT ((re_str_p < last_slash_p) && (last_slash_p < ch_p));
193+
JERRY_ASSERT ((last_slash_p - re_str_p) > 0);
194+
ecma_string_t *pattern_p = ecma_new_ecma_string (re_str_p, (ecma_length_t) (last_slash_p - re_str_p));
195+
ecma_string_t *flags_p = NULL;
196+
197+
if ((ch_p - last_slash_p) > 1)
198+
{
199+
flags_p = ecma_new_ecma_string (last_slash_p + 1, (ecma_length_t) ((ch_p - last_slash_p - 1)));
200+
}
201+
202+
ECMA_TRY_CATCH (regexp_obj_value,
203+
ecma_op_create_regexp_object (pattern_p, flags_p),
204+
ret_value);
205+
206+
ret_value = set_variable_value (int_data,
207+
int_data->pos,
208+
dst_var_idx,
209+
regexp_obj_value);
210+
211+
ECMA_FINALIZE (regexp_obj_value);
212+
213+
ecma_deref_ecma_string (pattern_p);
214+
if (flags_p != NULL)
215+
{
216+
ecma_deref_ecma_string (flags_p);
217+
}
218+
219+
MEM_FINALIZE_LOCAL_ARRAY (re_str_p)
220+
ecma_deref_ecma_string (string_p);
221+
#else
222+
JERRY_UNIMPLEMENTED ("Regular Expressions are not supported in compact profile!");
223+
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
224+
}
165225
else
166226
{
167227
JERRY_ASSERT (type_value_right == OPCODE_ARG_TYPE_SMALLINT_NEGATE);

jerry-core/vm/opcodes.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ typedef enum
5050
OPCODE_ARG_TYPE_NUMBER, /**< index of number literal */
5151
OPCODE_ARG_TYPE_NUMBER_NEGATE, /**< index of number literal with negation */
5252
OPCODE_ARG_TYPE_STRING, /**< index of string literal */
53-
OPCODE_ARG_TYPE_VARIABLE /**< index of variable name */
53+
OPCODE_ARG_TYPE_VARIABLE, /**< index of string literal with variable name */
54+
OPCODE_ARG_TYPE_REGEXP /**< index of string literal with regular expression */
5455
} opcode_arg_type_operand;
5556

5657
/**

0 commit comments

Comments
 (0)