diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp index 548b8f132e..c2caf01004 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp @@ -1667,6 +1667,128 @@ ecma_builtin_string_prototype_object_slice (ecma_value_t this_arg, /**< this arg return ret_value; } /* ecma_builtin_string_prototype_object_slice */ +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + +/** + * The abstract SplitMatch routine for String.prototype.split() + * + * See also: + * ECMA-262 v5, 15.5.4.14 + * + * Used by: + * - The String.prototype.split routine. + * + * @return completion value - contains the value of the match + * - the index property of the completion value indicates the position of the + * first character in the input_string that matched + * + * Returned value must be freed with ecma_free_completion_value. + */ +static ecma_completion_value_t +ecma_builtin_helper_split_match (ecma_value_t input_string, /**< first argument */ + ecma_length_t start_idx, /**< second argument */ + ecma_value_t separator) /**< third argument */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + /* 1. */ + if (ecma_is_value_object (separator) + && ecma_object_get_class_name (ecma_get_object_from_value (separator)) == LIT_MAGIC_STRING_REGEXP_UL) + { + ecma_value_t regexp_value = ecma_copy_value (separator, false); + + ECMA_TRY_CATCH (to_string_val, + ecma_op_to_string (input_string), + ret_value); + + ecma_string_t *input_str_p = ecma_get_string_from_value (to_string_val); + ecma_string_t *substr_str_p = ecma_string_substr (input_str_p, start_idx, ecma_string_get_length (input_str_p)); + + ret_value = ecma_regexp_exec_helper (regexp_value, ecma_make_string_value (substr_str_p), true); + + if (!ecma_is_value_null (ecma_get_completion_value_value (ret_value))) + { + ecma_object_t *obj_p = ecma_get_object_from_completion_value (ret_value); + ecma_string_t *magic_index_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_INDEX); + ecma_property_t *index_prop_p = ecma_get_named_property (obj_p, magic_index_str_p); + + ecma_number_t *index_num_p = ecma_get_number_from_value (index_prop_p->u.named_data_property.value); + *index_num_p += start_idx; + + ecma_deref_ecma_string (magic_index_str_p); + } + + ecma_deref_ecma_string (substr_str_p); + ECMA_FINALIZE (to_string_val); + } + else + { + /* 2. */ + JERRY_ASSERT (ecma_is_value_string (input_string) && ecma_is_value_string (separator)); + + ecma_string_t *string_str_p = ecma_get_string_from_value (input_string); + ecma_string_t *separator_str_p = ecma_get_string_from_value (separator); + + /* 3. */ + ecma_length_t string_length = ecma_string_get_length (string_str_p); + ecma_length_t separator_length = ecma_string_get_length (separator_str_p); + + /* 4. */ + if (start_idx + separator_length > string_length) + { + ret_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_NULL); + } + else + { + bool is_different = false; + + /* 5. */ + for (ecma_length_t i = 0; i < separator_length && !is_different; i++) + { + ecma_char_t char_from_string = ecma_string_get_char_at_pos (string_str_p, start_idx + i); + ecma_char_t char_from_separator = ecma_string_get_char_at_pos (separator_str_p, i); + + if (char_from_string != char_from_separator) + { + is_different = true; + } + } + + if (!is_different) + { + /* 6-7. */ + ecma_completion_value_t match_array = ecma_op_create_array_object (0, 0, false); + ecma_object_t *match_array_p = ecma_get_object_from_completion_value (match_array); + ecma_string_t *zero_str_p = ecma_new_ecma_string_from_number (ECMA_NUMBER_ZERO); + + ecma_op_object_put (match_array_p, zero_str_p, ecma_make_string_value (separator_str_p), true); + + ecma_string_t *magic_index_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_INDEX); + ecma_property_t *index_prop_p = ecma_create_named_data_property (match_array_p, + magic_index_str_p, + true, false, false); + ecma_deref_ecma_string (magic_index_str_p); + + ecma_number_t *index_num_p = ecma_alloc_number (); + *index_num_p = ecma_uint32_to_number (start_idx); + + ecma_named_data_property_assign_value (match_array_p, index_prop_p, ecma_make_number_value (index_num_p)); + + ret_value = match_array; + + ecma_dealloc_number (index_num_p); + ecma_deref_ecma_string (zero_str_p); + } + else + { + ret_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_NULL); + } + } + } + + return ret_value; +} /* ecma_builtin_helper_split_match */ + /** * The String.prototype object's 'split' routine * @@ -1678,12 +1800,335 @@ ecma_builtin_string_prototype_object_slice (ecma_value_t this_arg, /**< this arg */ static ecma_completion_value_t ecma_builtin_string_prototype_object_split (ecma_value_t this_arg, /**< this argument */ - ecma_value_t arg1, /**< routine's first argument */ - ecma_value_t arg2) /**< routine's second argument */ + ecma_value_t arg1, /**< separator */ + ecma_value_t arg2) /**< limit */ { - ECMA_BUILTIN_CP_UNIMPLEMENTED (this_arg, arg1, arg2); + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + /* 1. */ + ECMA_TRY_CATCH (this_check_coercible_val, + ecma_op_check_object_coercible (this_arg), + ret_value); + + /* 2. */ + ECMA_TRY_CATCH (this_to_string_val, + ecma_op_to_string (this_arg), + ret_value); + + /* 3. */ + ecma_completion_value_t new_array = ecma_op_create_array_object (0, 0, false); + + /* 5. */ + ecma_length_t limit = 0; + + if (ecma_is_value_undefined (arg2)) + { + limit = (uint32_t) -1; + } + else + { + ECMA_OP_TO_NUMBER_TRY_CATCH (limit_num, arg2, ret_value); + + limit = ecma_number_to_uint32 (limit_num); + + ECMA_OP_TO_NUMBER_FINALIZE (limit_num); + } + + if (ecma_is_completion_value_empty (ret_value)) + { + /* This variable indicates that we should return with the current array, to avoid another operation. */ + bool should_return = false; + + /* 9. */ + if (limit == 0) + { + should_return = true; + } + else /* if (limit != 0) */ + { + ecma_object_t *new_array_p = ecma_get_object_from_completion_value (new_array); + + /* 10. */ + if (ecma_is_value_undefined (arg1)) + { + ecma_string_t *zero_str_p = ecma_new_ecma_string_from_number (ECMA_NUMBER_ZERO); + + ecma_completion_value_t put_comp = ecma_builtin_helper_def_prop (new_array_p, + zero_str_p, + this_to_string_val, + true, + true, + true, + false); + + JERRY_ASSERT (ecma_is_completion_value_normal_true (put_comp)); + + should_return = true; + + ecma_deref_ecma_string (zero_str_p); + } + else /* if (!ecma_is_value_undefined (arg1)) */ + { + /* 6. */ + const ecma_length_t string_length = ecma_string_get_length (ecma_get_string_from_value (this_to_string_val)); + + /* 8. */ + ecma_value_t separator = ecma_make_simple_value (ECMA_SIMPLE_VALUE_EMPTY); + + if (ecma_is_value_object (arg1) + && ecma_object_get_class_name (ecma_get_object_from_value (arg1)) == LIT_MAGIC_STRING_REGEXP_UL) + { + separator = ecma_copy_value (arg1, true); + } + else + { + ECMA_TRY_CATCH (separator_to_string_val, + ecma_op_to_string (arg1), + ret_value); + + separator = ecma_copy_value (separator_to_string_val, true); + + ECMA_FINALIZE (separator_to_string_val); + } + + /* 11. */ + if (string_length == 0 && ecma_is_completion_value_empty (ret_value)) + { + /* 11.a */ + ecma_completion_value_t match_result = ecma_builtin_helper_split_match (this_to_string_val, + 0, + separator); + + /* 11.b */ + if (!ecma_is_value_null (ecma_get_completion_value_value (match_result))) + { + should_return = true; + } + else + { + /* 11.c */ + ecma_string_t *zero_str_p = ecma_new_ecma_string_from_number (ECMA_NUMBER_ZERO); + + ecma_completion_value_t put_comp = ecma_builtin_helper_def_prop (new_array_p, + zero_str_p, + this_to_string_val, + true, + true, + true, + false); + + JERRY_ASSERT (ecma_is_completion_value_normal_true (put_comp)); + + /* 11.d */ + should_return = true; + + ecma_deref_ecma_string (zero_str_p); + } + + ecma_free_completion_value (match_result); + } + else /* if (string_length != 0) || !ecma_is_completion_value_empty (ret_value) */ + { + /* 4. */ + ecma_length_t new_array_length = 0; + + /* 7. */ + ecma_length_t start_pos = 0; + + /* 12. */ + ecma_length_t curr_pos = start_pos; + + bool separator_is_empty = false; + + /* 13. */ + while (curr_pos < string_length && !should_return && ecma_is_completion_value_empty (ret_value)) + { + ecma_completion_value_t match_result = ecma_builtin_helper_split_match (this_to_string_val, + curr_pos, + separator); + + /* 13.b */ + if (ecma_is_value_null (ecma_get_completion_value_value (match_result))) + { + curr_pos++; + } + else /* if (!ecma_is_value_null (ecma_get_completion_value_value (match_result))) */ + { + ecma_object_t *match_array_obj_p = ecma_get_object_from_completion_value (match_result); + + ecma_string_t *zero_str_p = ecma_new_ecma_string_from_number (ECMA_NUMBER_ZERO); + ecma_completion_value_t match_comp_value = ecma_op_object_get (match_array_obj_p, zero_str_p); + + JERRY_ASSERT (ecma_is_completion_value_normal (match_comp_value)); + + ecma_string_t *match_str_p = ecma_get_string_from_completion_value (match_comp_value); + ecma_length_t match_str_length = ecma_string_get_length (match_str_p); + + ecma_string_t *magic_empty_str_p = ecma_new_ecma_string_from_magic_string_id (LIT_MAGIC_STRING__EMPTY); + separator_is_empty = ecma_compare_ecma_strings (magic_empty_str_p, match_str_p); + + ecma_deref_ecma_string (magic_empty_str_p); + ecma_free_completion_value (match_comp_value); + ecma_deref_ecma_string (zero_str_p); + + ecma_string_t *magic_index_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_INDEX); + ecma_property_t *index_prop_p = ecma_get_named_property (match_array_obj_p, magic_index_str_p); + + ecma_number_t *index_num_p = ecma_get_number_from_value (index_prop_p->u.named_data_property.value); + JERRY_ASSERT (*index_num_p >= 0); + + uint32_t end_pos = ecma_number_to_uint32 (*index_num_p); + + if (separator_is_empty) + { + end_pos = curr_pos + 1; + } + + /* 13.c.iii.1-2 */ + ecma_string_t *substr_str_p = ecma_string_substr (ecma_get_string_from_value (this_to_string_val), + start_pos, + end_pos); + + ecma_string_t *array_length_str_p = ecma_new_ecma_string_from_uint32 (new_array_length); + + ecma_completion_value_t put_comp = ecma_builtin_helper_def_prop (new_array_p, + array_length_str_p, + ecma_make_string_value (substr_str_p), + true, + true, + true, + false); + + JERRY_ASSERT (ecma_is_completion_value_normal_true (put_comp)); + + /* 13.c.iii.3 */ + new_array_length++; + + /* 13.c.iii.4 */ + if (new_array_length == limit && ecma_is_completion_value_empty (ret_value)) + { + should_return = true; + } + + /* 13.c.iii.5 */ + start_pos = end_pos + match_str_length; + + ecma_string_t *magic_length_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LENGTH); + + ECMA_TRY_CATCH (array_length_val, + ecma_op_object_get (match_array_obj_p, magic_length_str_p), + ret_value); + + ECMA_OP_TO_NUMBER_TRY_CATCH (array_length_num, array_length_val, ret_value); + + /* The first item is the match object, thus we should skip it. */ + const uint32_t match_result_array_length = ecma_number_to_uint32 (array_length_num) - 1; + + /* 13.c.iii.6 */ + uint32_t i = 0; + + /* 13.c.iii.7 */ + while (i < match_result_array_length && ecma_is_completion_value_empty (ret_value)) + { + /* 13.c.iii.7.a */ + i++; + ecma_string_t *idx_str_p = ecma_new_ecma_string_from_uint32 (i); + ecma_string_t *new_array_idx_str_p = ecma_new_ecma_string_from_uint32 (new_array_length); + + ecma_completion_value_t match_comp_value = ecma_op_object_get (match_array_obj_p, idx_str_p); + + JERRY_ASSERT (ecma_is_completion_value_normal (match_comp_value)); + + ecma_value_t match_result_value = ecma_get_completion_value_value (match_comp_value); + + /* 13.c.iii.7.b */ + ecma_completion_value_t put_comp = ecma_builtin_helper_def_prop (new_array_p, + new_array_idx_str_p, + match_result_value, + true, + true, + true, + false); + + JERRY_ASSERT (ecma_is_completion_value_normal_true (put_comp)); + + /* 13.c.iii.7.c */ + new_array_length++; + + /* 13.c.iii.7.d */ + if (new_array_length == limit && ecma_is_completion_value_empty (ret_value)) + { + should_return = true; + } + + ecma_free_completion_value (match_comp_value); + ecma_deref_ecma_string (new_array_idx_str_p); + ecma_deref_ecma_string (idx_str_p); + } + + /* 13.c.iii.8 */ + curr_pos = start_pos; + + ECMA_OP_TO_NUMBER_FINALIZE (array_length_num); + ECMA_FINALIZE (array_length_val); + ecma_deref_ecma_string (magic_length_str_p); + ecma_deref_ecma_string (array_length_str_p); + ecma_deref_ecma_string (substr_str_p); + ecma_deref_ecma_string (magic_index_str_p); + } /* if (!ecma_is_value_null (ecma_get_completion_value_value (match_result))) */ + + ecma_free_completion_value (match_result); + + } /* while (curr_pos < string_length && !should_return && ecma_is_completion_value_empty (ret_value)) */ + + if (!should_return && !separator_is_empty && ecma_is_completion_value_empty (ret_value)) + { + /* 14. */ + ecma_string_t *substr_str_p; + substr_str_p = ecma_string_substr (ecma_get_string_from_value (this_to_string_val), + start_pos, + string_length); + + /* 15. */ + ecma_string_t *array_length_string_p = ecma_new_ecma_string_from_uint32 (new_array_length); + + ecma_completion_value_t put_comp = ecma_builtin_helper_def_prop (new_array_p, + array_length_string_p, + ecma_make_string_value (substr_str_p), + true, + true, + true, + false); + + JERRY_ASSERT (ecma_is_completion_value_normal_true (put_comp)); + + ecma_deref_ecma_string (array_length_string_p); + ecma_deref_ecma_string (substr_str_p); + } + } /* if (string_length != 0) || !ecma_is_completion_value_empty (ret_value) */ + + ecma_free_value (separator, true); + } /* if (!ecma_is_value_undefined (arg1)) */ + } /* if (limit != 0) */ + } /* if (ecma_is_completion_value_empty (ret_value)) */ + + if (ecma_is_completion_value_empty (ret_value)) + { + ret_value = new_array; + } + else + { + ecma_free_completion_value (new_array); + } + + ECMA_FINALIZE (this_to_string_val); + ECMA_FINALIZE (this_check_coercible_val); + + return ret_value; } /* ecma_builtin_string_prototype_object_split */ +#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ + /** * The String.prototype object's 'substring' routine * diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.inc.h b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.inc.h index 033413ab7a..08b1a332a1 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.inc.h +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.inc.h @@ -72,9 +72,9 @@ ROUTINE (LIT_MAGIC_STRING_LOCALE_COMPARE_UL, ecma_builtin_string_prototype_objec ROUTINE (LIT_MAGIC_STRING_MATCH, ecma_builtin_string_prototype_object_match, 1, 1) ROUTINE (LIT_MAGIC_STRING_REPLACE, ecma_builtin_string_prototype_object_replace, 2, 2) ROUTINE (LIT_MAGIC_STRING_SEARCH, ecma_builtin_string_prototype_object_search, 1, 1) +ROUTINE (LIT_MAGIC_STRING_SPLIT, ecma_builtin_string_prototype_object_split, 2, 2) #endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ -ROUTINE (LIT_MAGIC_STRING_SPLIT, ecma_builtin_string_prototype_object_split, 2, 2) ROUTINE (LIT_MAGIC_STRING_SUBSTRING, ecma_builtin_string_prototype_object_substring, 2, 2) ROUTINE (LIT_MAGIC_STRING_TO_LOWER_CASE_UL, ecma_builtin_string_prototype_object_to_lower_case, 0, 0) ROUTINE (LIT_MAGIC_STRING_TO_LOCALE_LOWER_CASE_UL, ecma_builtin_string_prototype_object_to_locale_lower_case, 0, 0) diff --git a/tests/jerry/string-prototype-split.js b/tests/jerry/string-prototype-split.js new file mode 100644 index 0000000000..b05c0248c4 --- /dev/null +++ b/tests/jerry/string-prototype-split.js @@ -0,0 +1,161 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var res = "".split(); +assert (res[0] === ""); + +res = "foo".split(); +assert (res[0] === "foo"); + +var str = "foo//bar/baz//foo"; +res = str.split(""); + +assert (res.length === 17); +for (var i = 0; i < res.length; i++) +{ + assert (res[i] === str[i]); +} + +res = str.split("", "foo"); +assert (res.length === 0); + +res = str.split("", "4"); +assert (res.length === 4); +for (var i = 0; i < res.length; i++) +{ + assert (res[i] === str[i]); +} + +res = str.split(undefined, undefined) +assert (res.length === 1); +assert (res[0] === "foo//bar/baz//foo"); + +res = str.split("a", false); +assert (res.length === 0); + +res = str.split("a", true); +assert (res.length === 1); +assert (res[0] === "foo//b"); + +res = str.split("a", NaN); +assert (res.length === 0); + +res = str.split("a", Infinity); +assert (res.length === 0); + +res = str.split(["o"]) +assert (res.length === 5); +assert (res[0] === "f"); +assert (res[1] === ""); +assert (res[2] === "//bar/baz//f"); +assert (res[3] === ""); +assert (res[4] === ""); + +res = str.split(["o", "/"]); +assert (res.length === 1); +assert (res[0] === "foo//bar/baz//foo"); + +res = str.split("a", ["2"]); +assert (res.length === 2); +assert (res[0] === "foo//b"); +assert (res[1] === "r/b"); + +res = str.split("a", ["2", "3"]); +assert (res.length === 0); + +var obj = {x: 12, b: undefined}; +res = str.split(obj, 4); +assert (res.length === 1); +assert (res[0] === "foo//bar/baz//foo"); + +res = str.split("o", obj); +assert (res.length === 0); + +res = str.split(false, true); +assert (res.length === 1); +assert (res[0] === "foo//bar/baz//foo"); + +res = str.split(/\/\//); +assert (res.length === 3); +assert (res[0] === "foo"); +assert (res[1] === "bar/baz"); +assert (res[2] === "foo"); + +res = str.split(/\/\//, 1); +assert (res.length === 1); +assert (res[0] === "foo"); + +res = str.split(/\/\//, -1); +assert (res.length === 3); +assert (res[0] === "foo"); +assert (res[1] === "bar/baz"); +assert (res[2] === "foo"); + +str = "fo123o12bar"; +res = str.split(12, undefined); +assert (res.length === 3); +assert (res[0] === "fo"); +assert (res[1] === "3o"); +assert (res[2] === "bar"); + +str = "aaabababaab"; +res = str.split(/aa+/); +assert (res.length === 3); +assert (res[0] === ""); +assert (res[1] === "babab"); +assert (res[2] === "b"); + +str = "Aboldandcoded"; +res = str.split(/<(\/)?([^<>]+)>/); +assert (res.length === 13); +var expected = ["A", undefined, "B", "bold", "/", "B", "and", undefined, "CODE", "coded", "/", "CODE", ""]; +for (var i = 0; i < res.length; i++) +{ + assert (res[i] === expected[i]); +} + +/* Check Object coercible */ +try { + String.prototype.split.call(undefined, ""); + assert (false); +} +catch (e) +{ + assert (e instanceof TypeError); +} + +/* Check toString conversion */ +try { + var obj = { toString: function() { throw new ReferenceError("foo"); } }; + String.prototype.split.call(obj, ""); + assert (false); +} +catch (e) +{ + assert (e instanceof ReferenceError); + assert (e.message === "foo"); +} + +/* Check Invalid RegExp */ +try { + var obj = { toString: function() { throw new ReferenceError("foo"); } }; + "".split(obj); + assert (false); +} +catch (e) +{ + assert (e instanceof ReferenceError); + assert (e.message === "foo"); +}