Skip to content

Commit 704eb45

Browse files
Daniel BallaLaszloLango
authored andcommitted
[API] Add RegExp C API (#2542)
This patch supports creating a RegExp object through the C API. JerryScript-DCO-1.0-Signed-off-by: Daniel Balla [email protected]
1 parent 47fa590 commit 704eb45

File tree

7 files changed

+227
-15
lines changed

7 files changed

+227
-15
lines changed

docs/02.API-REFERENCE.md

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,16 @@ Possible compile time enabled feature types:
6262
- JERRY_FEATURE_LINE_INFO - line info available
6363
- JERRY_FEATURE_LOGGING - logging
6464

65+
## jerry_regexp_flags_t
66+
67+
RegExp object optional flags:
68+
69+
- JERRY_REGEXP_FLAG_GLOBAL - global match; find all matches rather than stopping after the first match
70+
- JERRY_REGEXP_FLAG_IGNORE_CASE - ignore case
71+
- JERRY_REGEXP_FLAG_MULTILINE - multiline; treat beginning and end characters (^ and $) as working over
72+
multiple lines (i.e., match the beginning or end of each line (delimited by \n or \r), not only the
73+
very beginning or end of the whole input string)
74+
6575
## jerry_parse_opts_t
6676

6777
Option bits for [jerry_parse](#jerry_parse) and
@@ -3406,6 +3416,74 @@ jerry_create_string_sz (const jerry_char_t *str_p,
34063416
- [jerry_create_string_from_utf8](#jerry_create_string_from_utf8)
34073417

34083418

3419+
## jerry_create_regexp
3420+
3421+
**Summary**
3422+
3423+
Returns a jerry_value_t RegExp object or an error, if the construction of the object fails.
3424+
Optional flags can be set using [jerry_regexp_flags_t](#jerry_regexp_flags_t);
3425+
3426+
**Prototype**
3427+
```c
3428+
jerry_value_t
3429+
jerry_create_regexp (const jerry_char_t *pattern_p, jerry_regexp_flags_t flags);
3430+
```
3431+
3432+
- `pattern_p` - the RegExp pattern as a zero-terminated UTF-8 string
3433+
- `flags` - optional flags for the RegExp object
3434+
- return value - the RegExp object as a `jerry_value_t`
3435+
3436+
**Example**
3437+
3438+
```c
3439+
{
3440+
jerry_char_t pattern_p = "[cgt]gggtaaa|tttaccc[acg]";
3441+
jerry_regexp_flags_t pattern_flags = JERRY_REGEXP_FLAG_IGNORE_CASE;
3442+
3443+
jerry_value_t regexp = jerry_create_regexp (pattern_p, pattern_flags);
3444+
3445+
...
3446+
3447+
jerry_release_value (regexp);
3448+
}
3449+
```
3450+
3451+
3452+
## jerry_create_regexp_sz
3453+
3454+
**Summary**
3455+
3456+
Returns a jerry_value_t RegExp object or an error, if the construction of the object fails.
3457+
Optional flags can be set using [jerry_regexp_flags_t](#jerry_regexp_flags_t);
3458+
3459+
**Prototype**
3460+
```c
3461+
jerry_value_t
3462+
jerry_create_regexp_sz (const jerry_char_t *pattern_p, jerry_size_t pattern_size, jerry_regexp_flags_t flags);
3463+
```
3464+
3465+
- `pattern_p` - the RegExp pattern as a zero-terminated UTF-8 string
3466+
- `pattern_size` - size of the `pattern`
3467+
- `flags` - optional flags for the RegExp object
3468+
- return value - the RegExp object as a `jerry_value_t`
3469+
3470+
**Example**
3471+
3472+
```c
3473+
{
3474+
jerry_char_t pattern_p = "[cgt]gggtaaa|tttaccc[acg]";
3475+
jerry_size_t pattern_size = sizeof (pattern_p) - 1;
3476+
jerry_regexp_flags_t pattern_flags = JERRY_REGEXP_FLAG_IGNORE_CASE;
3477+
3478+
jerry_value_t regexp = jerry_create_regexp_sz (pattern_p, pattern_size, pattern_flags);
3479+
3480+
...
3481+
3482+
jerry_release_value (regexp);
3483+
}
3484+
```
3485+
3486+
34093487
## jerry_create_typedarray
34103488

34113489
**Summary**

jerry-core/api/jerry.c

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "ecma-literal-storage.h"
3232
#include "ecma-objects.h"
3333
#include "ecma-objects-general.h"
34+
#include "ecma-regexp-object.h"
3435
#include "ecma-promise-object.h"
3536
#include "ecma-typedarray-object.h"
3637
#include "jcontext.h"
@@ -59,6 +60,13 @@ JERRY_STATIC_ASSERT ((int) ECMA_INIT_EMPTY == (int) JERRY_INIT_EMPTY
5960
&& (int) ECMA_INIT_MEM_STATS == (int) JERRY_INIT_MEM_STATS,
6061
ecma_init_flag_t_must_be_equal_to_jerry_init_flag_t);
6162

63+
#ifndef CONFIG_DISABLE_REGEXP_BUILTIN
64+
JERRY_STATIC_ASSERT ((int) RE_FLAG_GLOBAL == (int) JERRY_REGEXP_FLAG_GLOBAL
65+
&& (int) RE_FLAG_MULTILINE == (int) JERRY_REGEXP_FLAG_MULTILINE
66+
&& (int) RE_FLAG_IGNORE_CASE == (int) JERRY_REGEXP_FLAG_IGNORE_CASE,
67+
re_flags_t_must_be_equal_to_jerry_regexp_flags_t);
68+
#endif /* !CONFIG_DISABLE_REGEXP_BUILTIN */
69+
6270
#if defined JERRY_DISABLE_JS_PARSER && !defined JERRY_ENABLE_SNAPSHOT_EXEC
6371
#error JERRY_ENABLE_SNAPSHOT_EXEC must be defined if JERRY_DISABLE_JS_PARSER is defined!
6472
#endif /* JERRY_DISABLE_JS_PARSER && !JERRY_ENABLE_SNAPSHOT_EXEC */
@@ -1484,6 +1492,52 @@ jerry_create_string_sz (const jerry_char_t *str_p, /**< pointer to string */
14841492
return ecma_make_string_value (ecma_str_p);
14851493
} /* jerry_create_string_sz */
14861494

1495+
/**
1496+
* Calculates the size of the given pattern and creates a RegExp object.
1497+
*
1498+
* @return value of the constructed RegExp object.
1499+
*/
1500+
jerry_value_t
1501+
jerry_create_regexp (const jerry_char_t *pattern_p, /**< zero-terminated UTF-8 string as RegExp pattern */
1502+
jerry_regexp_flags_t flags) /**< optional RegExp flags */
1503+
{
1504+
return jerry_create_regexp_sz (pattern_p, lit_zt_utf8_string_size (pattern_p), flags);
1505+
} /* jerry_create_regexp */
1506+
1507+
/**
1508+
* Creates a RegExp object with the given pattern and flags.
1509+
*
1510+
* @return value of the constructed RegExp object.
1511+
*/
1512+
jerry_value_t
1513+
jerry_create_regexp_sz (const jerry_char_t *pattern_p, /**< zero-terminated UTF-8 string as RegExp pattern */
1514+
jerry_size_t pattern_size, /**< length of the pattern */
1515+
jerry_regexp_flags_t flags) /**< optional RegExp flags */
1516+
{
1517+
jerry_assert_api_available ();
1518+
1519+
#ifndef CONFIG_DISABLE_REGEXP_BUILTIN
1520+
if (!lit_is_valid_utf8_string (pattern_p, pattern_size))
1521+
{
1522+
return jerry_throw (ecma_raise_common_error (ECMA_ERR_MSG ("Input must be a valid utf8 string")));
1523+
}
1524+
1525+
ecma_string_t *ecma_pattern = ecma_new_ecma_string_from_utf8 (pattern_p, pattern_size);
1526+
1527+
jerry_value_t ret_val = ecma_op_create_regexp_object (ecma_pattern, flags);
1528+
1529+
ecma_deref_ecma_string (ecma_pattern);
1530+
return ret_val;
1531+
1532+
#else /* CONFIG_DISABLE_REGEXP_BUILTIN */
1533+
JERRY_UNUSED (pattern_p);
1534+
JERRY_UNUSED (pattern_size);
1535+
JERRY_UNUSED (flags);
1536+
1537+
return jerry_throw (ecma_raise_type_error (ECMA_ERR_MSG ("RegExp is not supported.")));
1538+
#endif /* !CONFIG_DISABLE_REGEXP_BUILTIN */
1539+
} /* jerry_create_regexp_sz */
1540+
14871541
/**
14881542
* Get length of an array object
14891543
*

jerry-core/ecma/builtin-objects/ecma-builtin-regexp.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,15 @@ ecma_builtin_regexp_dispatch_construct (const ecma_value_t *arguments_list_p, /*
110110
ECMA_FINALIZE (flags_str_value);
111111
}
112112

113+
uint16_t flags = 0;
114+
if (ecma_is_value_empty (ret_value) && (flags_string_p != NULL))
115+
{
116+
ret_value = re_parse_regexp_flags (flags_string_p, &flags);
117+
}
118+
113119
if (ecma_is_value_empty (ret_value))
114120
{
115-
ret_value = ecma_op_create_regexp_object (pattern_string_p, flags_string_p);
121+
ret_value = ecma_op_create_regexp_object (pattern_string_p, flags);
116122
}
117123

118124
if (pattern_string_p != NULL)

jerry-core/ecma/operations/ecma-regexp-object.c

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -249,22 +249,10 @@ ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *bytecode_p) /**<
249249
*/
250250
ecma_value_t
251251
ecma_op_create_regexp_object (ecma_string_t *pattern_p, /**< input pattern */
252-
ecma_string_t *flags_str_p) /**< flags */
252+
uint16_t flags) /**< flags */
253253
{
254254
JERRY_ASSERT (pattern_p != NULL);
255255
ecma_value_t ret_value = ECMA_VALUE_EMPTY;
256-
uint16_t flags = 0;
257-
258-
if (flags_str_p != NULL)
259-
{
260-
ECMA_TRY_CATCH (empty, re_parse_regexp_flags (flags_str_p, &flags), ret_value);
261-
ECMA_FINALIZE (empty);
262-
263-
if (!ecma_is_value_empty (ret_value))
264-
{
265-
return ret_value;
266-
}
267-
}
268256

269257
ecma_object_t *re_prototype_obj_p = ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP_PROTOTYPE);
270258

jerry-core/ecma/operations/ecma-regexp-object.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030

3131
/**
3232
* RegExp flags
33+
* Note:
34+
* This enum has to be kept in sync with jerry_regexp_flags_t.
3335
*/
3436
typedef enum
3537
{
@@ -53,7 +55,7 @@ typedef struct
5355
} re_matcher_ctx_t;
5456

5557
ecma_value_t ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *bytecode_p);
56-
ecma_value_t ecma_op_create_regexp_object (ecma_string_t *pattern_p, ecma_string_t *flags_str_p);
58+
ecma_value_t ecma_op_create_regexp_object (ecma_string_t *pattern_p, uint16_t flags);
5759
ecma_value_t ecma_regexp_exec_helper (ecma_value_t regexp_value, ecma_value_t input_string, bool ignore_global);
5860
ecma_value_t ecma_regexp_read_pattern_str_helper (ecma_value_t pattern_arg, ecma_string_t **pattern_string_p);
5961
ecma_char_t re_canonicalize (ecma_char_t ch, bool is_ignorecase);

jerry-core/include/jerryscript-core.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,16 @@ typedef enum
115115
JERRY_GC_SEVERITY_HIGH /**< free as much memory as possible */
116116
} jerry_gc_mode_t;
117117

118+
/**
119+
* Jerry regexp flags.
120+
*/
121+
typedef enum
122+
{
123+
JERRY_REGEXP_FLAG_GLOBAL = (1u << 1), /**< Globally scan string */
124+
JERRY_REGEXP_FLAG_IGNORE_CASE = (1u << 2), /**< Ignore case */
125+
JERRY_REGEXP_FLAG_MULTILINE = (1u << 3) /**< Multiline string scan */
126+
} jerry_regexp_flags_t;
127+
118128
/**
119129
* Character type of JerryScript.
120130
*/
@@ -448,6 +458,9 @@ jerry_value_t jerry_create_number_nan (void);
448458
jerry_value_t jerry_create_null (void);
449459
jerry_value_t jerry_create_object (void);
450460
jerry_value_t jerry_create_promise (void);
461+
jerry_value_t jerry_create_regexp (const jerry_char_t *pattern, jerry_regexp_flags_t flags);
462+
jerry_value_t jerry_create_regexp_sz (const jerry_char_t *pattern, jerry_size_t pattern_size,
463+
jerry_regexp_flags_t flags);
451464
jerry_value_t jerry_create_string_from_utf8 (const jerry_char_t *str_p);
452465
jerry_value_t jerry_create_string_sz_from_utf8 (const jerry_char_t *str_p, jerry_size_t str_size);
453466
jerry_value_t jerry_create_string (const jerry_char_t *str_p);

tests/unit-core/test-regexp.c

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/* Copyright JS Foundation and other contributors, http://js.foundation
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
#include "jerryscript.h"
17+
18+
#include "test-common.h"
19+
20+
int
21+
main (void)
22+
{
23+
TEST_INIT ();
24+
jerry_init (JERRY_INIT_EMPTY);
25+
26+
jerry_value_t global_obj_val = jerry_get_global_object ();
27+
28+
jerry_char_t pattern[] = "[^.]+";
29+
jerry_regexp_flags_t flags = JERRY_REGEXP_FLAG_GLOBAL | JERRY_REGEXP_FLAG_MULTILINE;
30+
jerry_value_t regex_obj = jerry_create_regexp (pattern, flags);
31+
TEST_ASSERT (jerry_value_is_object (regex_obj));
32+
33+
const jerry_char_t func_resource[] = "unknown";
34+
const jerry_char_t func_arg_list[] = "regex";
35+
const jerry_char_t func_src[] = "return [regex.exec('something.domain.com'), regex.multiline, regex.global];";
36+
jerry_value_t func_val = jerry_parse_function (func_resource,
37+
sizeof (func_resource) - 1,
38+
func_arg_list,
39+
sizeof (func_arg_list) - 1,
40+
func_src,
41+
sizeof (func_src) - 1,
42+
JERRY_PARSE_NO_OPTS);
43+
44+
jerry_value_t res = jerry_call_function (func_val, global_obj_val, &regex_obj, 1);
45+
jerry_value_t regex_res = jerry_get_property_by_index (res, 0);
46+
jerry_value_t regex_res_str = jerry_get_property_by_index (regex_res, 0);
47+
jerry_value_t is_multiline = jerry_get_property_by_index (res, 1);
48+
jerry_value_t is_global = jerry_get_property_by_index (res, 2);
49+
50+
jerry_size_t str_size = jerry_get_string_size (regex_res_str);
51+
jerry_char_t res_buff[str_size];
52+
jerry_size_t res_size = jerry_string_to_char_buffer (regex_res_str, res_buff, str_size);
53+
54+
const char expected_result[] = "something";
55+
TEST_ASSERT (res_size == (sizeof (expected_result) - 1));
56+
TEST_ASSERT (strncmp (expected_result, (const char *) res_buff, res_size) == 0);
57+
TEST_ASSERT (jerry_get_boolean_value (is_multiline));
58+
TEST_ASSERT (jerry_get_boolean_value (is_global));
59+
60+
jerry_release_value (regex_obj);
61+
jerry_release_value (res);
62+
jerry_release_value (func_val);
63+
jerry_release_value (regex_res);
64+
jerry_release_value (regex_res_str);
65+
jerry_release_value (is_multiline);
66+
jerry_release_value (is_global);
67+
jerry_release_value (global_obj_val);
68+
69+
jerry_cleanup ();
70+
return 0;
71+
} /* main */

0 commit comments

Comments
 (0)