From 23b41ff9d8b30412e701bdd09b298fd5faed453d Mon Sep 17 00:00:00 2001 From: Zoltan Herczeg Date: Mon, 3 Aug 2015 05:37:25 -0700 Subject: [PATCH] Implement String.prototype.search, and some minor regexp refactors. JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg@inf.u-szeged.hu --- .../ecma-builtin-regexp-prototype.cpp | 19 +-- .../ecma-builtin-string-prototype.cpp | 110 +++++++++++++++--- .../ecma-builtin-string-prototype.inc.h | 2 +- .../ecma/operations/ecma-regexp-object.cpp | 79 +++++++++---- .../ecma/operations/ecma-regexp-object.h | 4 +- tests/jerry/string-prototype-search.js | 62 ++++++++++ 6 files changed, 211 insertions(+), 65 deletions(-) create mode 100644 tests/jerry/string-prototype-search.js diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.cpp b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.cpp index 92890efbdc..dd93827147 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.cpp +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.cpp @@ -67,30 +67,13 @@ ecma_builtin_regexp_prototype_exec (ecma_value_t this_arg, /**< this argument */ { ECMA_TRY_CATCH (obj_this, ecma_op_to_object (this_arg), ret_value); - ecma_object_t *obj_p = ecma_get_object_from_value (obj_this); - ecma_property_t *bytecode_prop_p = ecma_get_internal_property (obj_p, ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE); - re_bytecode_t *bytecode_p = ECMA_GET_POINTER (re_bytecode_t, bytecode_prop_p->u.internal_property.value); - ECMA_TRY_CATCH (input_str_value, ecma_op_to_string (arg), ret_value); - ecma_string_t *input_str_p = ecma_get_string_from_value (input_str_value); - - /* Convert ecma_String_t *to regexp_bytecode_t* */ - lit_utf8_size_t input_str_size = ecma_string_get_size (input_str_p); - - MEM_DEFINE_LOCAL_ARRAY (input_utf8_buffer_p, input_str_size, lit_utf8_byte_t); - - ecma_string_to_utf8_string (input_str_p, input_utf8_buffer_p, (ssize_t) input_str_size); - lit_utf8_iterator_t iter = lit_utf8_iterator_create (input_utf8_buffer_p, input_str_size); - - ret_value = ecma_regexp_exec_helper (obj_p, bytecode_p, &iter); - - MEM_FINALIZE_LOCAL_ARRAY (input_utf8_buffer_p); + ret_value = ecma_regexp_exec_helper (obj_this, input_str_value, false); ECMA_FINALIZE (input_str_value); - ECMA_FINALIZE (obj_this); } diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp index 930d253877..4ea2268183 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp @@ -31,6 +31,10 @@ #include "jrt-libc-includes.h" #include "lit-char-helpers.h" +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN +#include "ecma-regexp-object.h" +#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ + #ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_STRING_BUILTIN #define ECMA_BUILTINS_INTERNAL @@ -560,15 +564,10 @@ ecma_builtin_string_prototype_object_match (ecma_value_t this_arg, /**< this arg JERRY_ASSERT (ecma_is_value_boolean (global_value)); - ecma_value_t exec_arguments[1] = { this_to_string_value }; - if (!ecma_is_value_true (global_value)) { /* 7. */ - ret_value = ecma_builtin_regexp_prototype_dispatch_routine (LIT_MAGIC_STRING_EXEC, - regexp_value, - exec_arguments, - 1); + ret_value = ecma_regexp_exec_helper (regexp_value, this_to_string_value, false); } else { @@ -608,10 +607,7 @@ ecma_builtin_string_prototype_object_match (ecma_value_t this_arg, /**< this arg { /* 8.f.i. */ ECMA_TRY_CATCH (exec_value, - ecma_builtin_regexp_prototype_dispatch_routine (LIT_MAGIC_STRING_EXEC, - regexp_value, - exec_arguments, - 1), + ecma_regexp_exec_helper (regexp_value, this_to_string_value, false), ret_value); if (ecma_is_value_null (exec_value)) @@ -829,13 +825,10 @@ ecma_builtin_string_prototype_object_replace_match (ecma_builtin_replace_search_ if (context_p->is_regexp) { - ecma_value_t exec_arguments[1] = { context_p->input_string }; - ECMA_TRY_CATCH (match_value, - ecma_builtin_regexp_prototype_dispatch_routine (LIT_MAGIC_STRING_EXEC, - context_p->regexp_or_search_string, - exec_arguments, - 1), + ecma_regexp_exec_helper (context_p->regexp_or_search_string, + context_p->input_string, + false), ret_value); if (!ecma_is_value_null (match_value)) @@ -1504,7 +1497,6 @@ ecma_builtin_string_prototype_object_replace (ecma_value_t this_arg, /**< this a return ret_value; } /* ecma_builtin_string_prototype_object_replace */ -#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ /** * The String.prototype object's 'search' routine @@ -1517,11 +1509,91 @@ ecma_builtin_string_prototype_object_replace (ecma_value_t this_arg, /**< this a */ static ecma_completion_value_t ecma_builtin_string_prototype_object_search (ecma_value_t this_arg, /**< this argument */ - ecma_value_t arg) /**< routine's argument */ + ecma_value_t regexp_arg) /**< routine's argument */ { - ECMA_BUILTIN_CP_UNIMPLEMENTED (this_arg, arg); + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + /* 1. */ + ECMA_TRY_CATCH (check_coercible_value, + ecma_op_check_object_coercible (this_arg), + ret_value); + + /* 2. */ + ECMA_TRY_CATCH (to_string_value, + ecma_op_to_string (this_arg), + ret_value); + + ecma_value_t regexp_value = ecma_make_simple_value (ECMA_SIMPLE_VALUE_EMPTY); + + /* 3. */ + if (ecma_is_value_object (regexp_arg) + && ecma_object_get_class_name (ecma_get_object_from_value (regexp_arg)) == LIT_MAGIC_STRING_REGEXP_UL) + { + regexp_value = ecma_copy_value (regexp_arg, true); + } + else + { + /* 4. */ + ecma_value_t regexp_arguments[1] = { regexp_arg }; + + ECMA_TRY_CATCH (new_regexp_value, + ecma_builtin_regexp_dispatch_construct (regexp_arguments, 1), + ret_value); + + regexp_value = ecma_copy_value (new_regexp_value, true); + + ECMA_FINALIZE (new_regexp_value); + } + + /* 5. */ + if (ecma_is_completion_value_empty (ret_value)) + { + ECMA_TRY_CATCH (match_result, + ecma_regexp_exec_helper (regexp_value, to_string_value, true), + ret_value); + + ecma_number_t offset = -1; + + if (!ecma_is_value_null (match_result)) + { + JERRY_ASSERT (ecma_is_value_object (match_result)); + + ecma_object_t *match_object_p = ecma_get_object_from_value (match_result); + ecma_string_t *index_string_p = ecma_get_magic_string (LIT_MAGIC_STRING_INDEX); + + ECMA_TRY_CATCH (index_value, + ecma_op_object_get (match_object_p, index_string_p), + ret_value); + + JERRY_ASSERT (ecma_is_value_number (index_value)); + + offset = *ecma_get_number_from_value (index_value); + + ECMA_FINALIZE (index_value); + ecma_deref_ecma_string (index_string_p); + } + + if (ecma_is_completion_value_empty (ret_value)) + { + ecma_number_t *offset_number_p = ecma_alloc_number (); + *offset_number_p = offset; + + ret_value = ecma_make_normal_completion_value (ecma_make_number_value (offset_number_p)); + } + + ECMA_FINALIZE (match_result); + ecma_free_value (regexp_value, true); + } + + ECMA_FINALIZE (to_string_value); + ECMA_FINALIZE (check_coercible_value); + + /* 6. */ + return ret_value; } /* ecma_builtin_string_prototype_object_search */ +#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ + /** * The String.prototype object's 'slice' routine * diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.inc.h b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.inc.h index 26ac82de0f..033413ab7a 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.inc.h +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.inc.h @@ -71,9 +71,9 @@ ROUTINE (LIT_MAGIC_STRING_LOCALE_COMPARE_UL, ecma_builtin_string_prototype_objec #ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN ROUTINE (LIT_MAGIC_STRING_MATCH, ecma_builtin_string_prototype_object_match, 1, 1) ROUTINE (LIT_MAGIC_STRING_REPLACE, ecma_builtin_string_prototype_object_replace, 2, 2) +ROUTINE (LIT_MAGIC_STRING_SEARCH, ecma_builtin_string_prototype_object_search, 1, 1) #endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ -ROUTINE (LIT_MAGIC_STRING_SEARCH, ecma_builtin_string_prototype_object_search, 1, 1) ROUTINE (LIT_MAGIC_STRING_SPLIT, ecma_builtin_string_prototype_object_split, 2, 2) ROUTINE (LIT_MAGIC_STRING_SUBSTRING, ecma_builtin_string_prototype_object_substring, 2, 2) ROUTINE (LIT_MAGIC_STRING_TO_LOWER_CASE_UL, ecma_builtin_string_prototype_object_to_lower_case, 0, 0) diff --git a/jerry-core/ecma/operations/ecma-regexp-object.cpp b/jerry-core/ecma/operations/ecma-regexp-object.cpp index 011de97ccf..52dbc4c7aa 100644 --- a/jerry-core/ecma/operations/ecma-regexp-object.cpp +++ b/jerry-core/ecma/operations/ecma-regexp-object.cpp @@ -1176,19 +1176,45 @@ re_set_result_array_properties (ecma_object_t *array_obj_p, /**< result array */ * Returned value must be freed with ecma_free_completion_value */ ecma_completion_value_t -ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */ - re_bytecode_t *bc_p, /**< start of the RegExp bytecode */ - lit_utf8_iterator_t *iter_p) /**< input string iterator */ +ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */ + ecma_value_t input_string, /**< input string */ + bool ignore_global) /**< ignore global flag */ { ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + JERRY_ASSERT (ecma_is_value_object (regexp_value)); + JERRY_ASSERT (ecma_is_value_string (input_string)); + + ecma_object_t *regexp_object_p = ecma_get_object_from_value (regexp_value); + + JERRY_ASSERT (ecma_object_get_class_name (regexp_object_p) == LIT_MAGIC_STRING_REGEXP_UL); + + ecma_property_t *bytecode_prop_p = ecma_get_internal_property (regexp_object_p, + ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE); + re_bytecode_t *bc_p = ECMA_GET_POINTER (re_bytecode_t, bytecode_prop_p->u.internal_property.value); + + ecma_string_t *input_string_p = ecma_get_string_from_value (input_string); + lit_utf8_size_t input_string_size = ecma_string_get_size (input_string_p); + + MEM_DEFINE_LOCAL_ARRAY (input_utf8_buffer_p, input_string_size, lit_utf8_byte_t); + + ecma_string_to_utf8_string (input_string_p, input_utf8_buffer_p, (ssize_t) input_string_size); + lit_utf8_iterator_t iterator = lit_utf8_iterator_create (input_utf8_buffer_p, input_string_size); + re_matcher_ctx_t re_ctx; - re_ctx.input_start_p = iter_p->buf_p; - re_ctx.input_end_p = iter_p->buf_p + iter_p->buf_size; + re_ctx.input_start_p = iterator.buf_p; + re_ctx.input_end_p = iterator.buf_p + iterator.buf_size; re_ctx.match_limit = 0; re_ctx.recursion_depth = 0; /* 1. Read bytecode header and init regexp matcher context. */ re_ctx.flags = (uint8_t) re_get_value (&bc_p); + + if (ignore_global) + { + re_ctx.flags &= (uint8_t) ~RE_FLAG_GLOBAL; + } + JERRY_DDLOG ("Exec with flags [global: %d, ignoreCase: %d, multiline: %d]\n", re_ctx.flags & RE_FLAG_GLOBAL, re_ctx.flags & RE_FLAG_IGNORE_CASE, @@ -1217,22 +1243,22 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */ bool is_match = false; re_ctx.num_of_iterations_p = num_of_iter_p; int32_t index = 0; - ecma_length_t input_str_len = lit_utf8_string_length (iter_p->buf_p, iter_p->buf_size); + ecma_length_t input_str_len = lit_utf8_string_length (iterator.buf_p, iterator.buf_size); - if (iter_p->buf_p && re_ctx.flags & RE_FLAG_GLOBAL) + if (iterator.buf_p && (re_ctx.flags & RE_FLAG_GLOBAL)) { ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL); - ecma_property_t *lastindex_prop_p = ecma_op_object_get_property (obj_p, magic_str_p); + ecma_property_t *lastindex_prop_p = ecma_op_object_get_property (regexp_object_p, magic_str_p); ECMA_OP_TO_NUMBER_TRY_CATCH (lastindex_num, lastindex_prop_p->u.named_data_property.value, ret_value) index = ecma_number_to_int32 (lastindex_num); - JERRY_ASSERT (iter_p->buf_pos.offset == 0 && !iter_p->buf_pos.is_non_bmp_middle); - if (!lit_utf8_iterator_is_eos (iter_p) + JERRY_ASSERT (iterator.buf_pos.offset == 0 && !iterator.buf_pos.is_non_bmp_middle); + if (!lit_utf8_iterator_is_eos (&iterator) && index <= (int32_t) input_str_len && index > 0) { - lit_utf8_iterator_advance (iter_p, (ecma_length_t) index); + lit_utf8_iterator_advance (&iterator, (ecma_length_t) index); } ECMA_OP_TO_NUMBER_FINALIZE (lastindex_num); ecma_deref_ecma_string (magic_str_p); @@ -1245,19 +1271,22 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */ { if (index < 0 || index > (int32_t) input_str_len) { - ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL); - ecma_number_t *lastindex_num_p = ecma_alloc_number (); - *lastindex_num_p = ECMA_NUMBER_ZERO; - ecma_op_object_put (obj_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true); - ecma_dealloc_number (lastindex_num_p); - ecma_deref_ecma_string (magic_str_p); + if (re_ctx.flags & RE_FLAG_GLOBAL) + { + ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL); + ecma_number_t *lastindex_num_p = ecma_alloc_number (); + *lastindex_num_p = ECMA_NUMBER_ZERO; + ecma_op_object_put (regexp_object_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true); + ecma_dealloc_number (lastindex_num_p); + ecma_deref_ecma_string (magic_str_p); + } is_match = false; break; } else { - ECMA_TRY_CATCH (match_value, re_match_regexp (&re_ctx, bc_p, *iter_p, &sub_iter), ret_value); + ECMA_TRY_CATCH (match_value, re_match_regexp (&re_ctx, bc_p, iterator, &sub_iter), ret_value); if (ecma_is_value_true (match_value)) { @@ -1265,9 +1294,9 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */ break; } - if (!lit_utf8_iterator_is_eos (iter_p)) + if (!lit_utf8_iterator_is_eos (&iterator)) { - lit_utf8_iterator_advance (iter_p, 1); + lit_utf8_iterator_advance (&iterator, 1); } index++; @@ -1275,12 +1304,12 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */ } } - if (iter_p->buf_p && re_ctx.flags & RE_FLAG_GLOBAL) + if (iterator.buf_p && (re_ctx.flags & RE_FLAG_GLOBAL)) { ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL); ecma_number_t *lastindex_num_p = ecma_alloc_number (); *lastindex_num_p = sub_iter.buf_pos.offset; - ecma_op_object_put (obj_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true); + ecma_op_object_put (regexp_object_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true); ecma_dealloc_number (lastindex_num_p); ecma_deref_ecma_string (magic_str_p); } @@ -1299,9 +1328,9 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */ { ecma_string_t *index_str_p = ecma_new_ecma_string_from_uint32 (i / 2); - /* Note: 'iter_p->buf_p == NULL' means the input is empty string */ + /* Note: 'iterator.buf_p == NULL' means the input is empty string */ if (((re_ctx.saved_p[i].buf_p && re_ctx.saved_p[i + 1].buf_p) - || (!iter_p->buf_p && !re_ctx.saved_p[i].buf_p && !re_ctx.saved_p[i + 1].buf_p)) + || (!iterator.buf_p && !re_ctx.saved_p[i].buf_p && !re_ctx.saved_p[i + 1].buf_p)) && re_ctx.saved_p[i + 1].buf_pos.offset >= re_ctx.saved_p[i].buf_pos.offset) { ecma_length_t capture_str_len; @@ -1336,8 +1365,10 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */ ret_value = ecma_make_normal_completion_value (ecma_make_simple_value (ECMA_SIMPLE_VALUE_NULL)); } } + MEM_FINALIZE_LOCAL_ARRAY (num_of_iter_p); MEM_FINALIZE_LOCAL_ARRAY (saved_p); + MEM_FINALIZE_LOCAL_ARRAY (input_utf8_buffer_p); return ret_value; } /* ecma_regexp_exec_helper */ diff --git a/jerry-core/ecma/operations/ecma-regexp-object.h b/jerry-core/ecma/operations/ecma-regexp-object.h index 7293ee811f..eb5ca6a538 100644 --- a/jerry-core/ecma/operations/ecma-regexp-object.h +++ b/jerry-core/ecma/operations/ecma-regexp-object.h @@ -59,9 +59,7 @@ extern ecma_completion_value_t ecma_op_create_regexp_object (ecma_string_t *pattern_p, ecma_string_t *flags_str_p); extern ecma_completion_value_t -ecma_regexp_exec_helper (ecma_object_t *obj_p, - re_bytecode_t *bc_p, - lit_utf8_iterator_t *iter_p); +ecma_regexp_exec_helper (ecma_value_t, ecma_value_t, bool); /** * @} diff --git a/tests/jerry/string-prototype-search.js b/tests/jerry/string-prototype-search.js new file mode 100644 index 0000000000..f8aa78afda --- /dev/null +++ b/tests/jerry/string-prototype-search.js @@ -0,0 +1,62 @@ +// Copyright 2015 University of Szeged +// Copyright 2015 Samsung Electronics Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +assert ("abcabbcd".search (/abb+c/) === 3); +assert ("ababbccabd".search ("((?:(ax))|(bx)|ab*c+)") === 2); +assert ("acbaabcabcabc".search (/b+c/g) === 5); +assert ("abcabd".search ("c?a+d") === -1); + +assert (String.prototype.search.call ({}, "ec+t") === 4); + +try +{ + String.prototype.search.call (null, "u"); + assert (false); +} +catch (e) +{ + assert (e instanceof TypeError); +} + +var regexp = /x/g; +regexp.lastIndex = "index"; + +assert ("aaxbb".search (regexp) === 2); +assert ("aabb".search (regexp) === -1); +assert (regexp.lastIndex === "index"); + +Object.defineProperty(regexp, "lastIndex", { + configurable : false, + enumerable : false, + value : "index2", + writable : false +}); + +assert ("axb".search (regexp) === 1); +assert ("aabb".search (regexp) === -1); +assert (regexp.lastIndex === "index2"); + +assert ("##\ud801\udc00".search ("\ud801") === 2); +assert ("##\ud801\udc00".search ("\udc00") === 3); + +// The real "exec" never returns with a number. +Object.getPrototypeOf(/x/).exec = function () { return "???"; } + +assert (/y/.exec("y") === "???"); + +// Changing exec should not affect search. +assert ("ay".search (/y/) === 1); + +print ("DONE");