diff --git a/docs/02.API-REFERENCE.md b/docs/02.API-REFERENCE.md index 9d16c64b12..69103a9ee3 100644 --- a/docs/02.API-REFERENCE.md +++ b/docs/02.API-REFERENCE.md @@ -1233,6 +1233,7 @@ jerry_get_string_size (const jerry_value_t value); - [jerry_create_string](#jerry_create_string) - [jerry_get_string_length](#jerry_get_string_length) +- [jerry_is_valid_cesu8_string](#jerry_is_valid_cesu8_string) ## jerry_get_utf8_string_size @@ -1272,6 +1273,8 @@ jerry_get_utf8_string_size (const jerry_value_t value); - [jerry_create_string_from_utf8](#jerry_create_string_from_utf8) - [jerry_get_utf8_string_length](#jerry_get_utf8_string_length) +- [jerry_is_valid_utf8_string](#jerry_is_valid_utf8_string) + ## jerry_get_string_length @@ -1308,6 +1311,8 @@ jerry_get_string_length (const jerry_value_t value); - [jerry_create_string](#jerry_create_string) - [jerry_get_string_size](#jerry_get_string_size) +- [jerry_is_valid_cesu8_string](#jerry_is_valid_cesu8_string) + ## jerry_get_utf8_string_length @@ -1347,6 +1352,8 @@ jerry_get_utf8_string_length (const jerry_value_t value); - [jerry_create_string_from_utf8](#jerry_create_string_from_utf8) - [jerry_get_utf8_string_size](#jerry_get_utf8_string_size) +- [jerry_is_valid_utf8_string](#jerry_is_valid_utf8_string) + ## jerry_string_to_char_buffer @@ -1394,6 +1401,8 @@ jerry_string_to_char_buffer (const jerry_value_t value, - [jerry_create_string](#jerry_create_string) - [jerry_get_string_size](#jerry_get_string_size) +- [jerry_is_valid_cesu8_string](#jerry_is_valid_cesu8_string) + ## jerry_string_to_utf8_char_buffer @@ -1441,6 +1450,8 @@ jerry_string_to_utf8_char_buffer (const jerry_value_t value, - [jerry_create_string_from_utf8](#jerry_create_string_from_utf8) - [jerry_get_utf8_string_size](#jerry_get_utf8_string_size) +- [jerry_is_valid_utf8_string](#jerry_is_valid_utf8_string) + ## jerry_substring_to_char_buffer @@ -1496,6 +1507,8 @@ jerry_substring_to_char_buffer (const jerry_value_t value, - [jerry_create_string](#jerry_create_string) - [jerry_get_string_size](#jerry_get_string_size) - [jerry_get_string_length](#jerry_get_string_length) +- [jerry_is_valid_cesu8_string](#jerry_is_valid_cesu8_string) + ## jerry_substring_to_utf8_char_buffer @@ -1548,9 +1561,12 @@ jerry_substring_to_utf8_char_buffer (const jerry_value_t value, **See also** -- [jerry_create_string_from_utf8](#jerry_create_string) +- [jerry_create_string_from_utf8](#jerry_create_string_from_utf8) - [jerry_get_utf8_string_size](#jerry_get_utf8_string_size) - [jerry_get_utf8_string_length](#jerry_get_utf8_string_length) +- [jerry_is_valid_utf8_string](#jerry_is_valid_utf8_string) + + # Functions for array object values ## jerry_get_array_length @@ -2260,6 +2276,7 @@ jerry_create_string (const jerry_char_t *str_p); **See also** +- [jerry_is_valid_cesu8_string](#jerry_is_valid_cesu8_string) - [jerry_create_string_sz](#jerry_create_string_sz) @@ -2298,8 +2315,10 @@ jerry_create_string_sz (const jerry_char_t *str_p, **See also** +- [jerry_is_valid_cesu8_string](#jerry_is_valid_cesu8_string) - [jerry_create_string](#jerry_create_string) + ## jerry_create_string_from_utf8 **Summary** @@ -2333,6 +2352,7 @@ jerry_create_string_from_utf8 (const jerry_char_t *str_p); **See also** +- [jerry_is_valid_utf8_string](#jerry_is_valid_utf8_string) - [jerry_create_string_sz_from_utf8](#jerry_create_string_sz_from_utf8) @@ -2373,8 +2393,10 @@ jerry_create_string_sz (const jerry_char_t *str_p, **See also** +- [jerry_is_valid_utf8_string](#jerry_is_valid_utf8_string) - [jerry_create_string_from_utf8](#jerry_create_string_from_utf8) + ## jerry_create_undefined **Summary** @@ -3287,6 +3309,99 @@ bool foreach_function (const jerry_value_t prop_name, - [jerry_object_property_foreach_t](#jerry_object_property_foreach_t) +# Input validator functions + +## jerry_is_valid_utf8_string + +**Summary** + +Validate UTF-8 string. + +**Prototype** + +```c +bool +jerry_is_valid_utf8_string (const jerry_char_t *utf8_buf_p, /**< UTF-8 string */ + jerry_size_t buf_size) /**< string size */ +``` + +- `utf8_buf_p` - UTF-8 input string +- `buf_size` - input string size + +**Example** + +```c +{ + const jerry_char_t script[] = "print ('Hello, World!');"; + size_t script_size = strlen ((const char *) script); + + if (jerry_is_valid_utf8_string (script, (jerry_size_t) script_size)) + { + jerry_run_simple (script, script_size, JERRY_INIT_EMPTY); + } +} +``` + +**See also** + +- [jerry_run_simple](#jerry_run_simple) +- [jerry_create_string_from_utf8](#jerry_create_string_from_utf8) +- [jerry_create_string_sz_from_utf8](#jerry_create_string_sz_from_utf8) +- [jerry_get_utf8_string_size](#jerry_get_utf8_string_size) +- [jerry_get_utf8_string_length](#jerry_get_utf8_string_length) +- [jerry_string_to_utf8_char_buffer](#jerry_string_to_utf8_char_buffer) +- [jerry_substring_to_utf8_char_buffer](#jerry_substring_to_utf8_char_buffer) + +## jerry_is_valid_cesu8_string + +**Summary** + +Validate CESU-8 string. + +**Prototype** + +```c +bool +jerry_is_valid_cesu8_string (const jerry_char_t *cesu8_buf_p, /**< CESU-8 string */ + jerry_size_t buf_size) /**< string size */ +``` + +- `cesu8_buf_p` - CESU-8 input string +- `buf_size` - input string size + +**Example** + +```c +{ + jerry_init (JERRY_INIT_EMPTY); + + const jerry_char_t script[] = "Hello, World!"; + size_t script_size = strlen ((const char *) script); + + if (jerry_is_valid_cesu8_string (script, (jerry_size_t) script_size)) + { + jerry_value_t string_value = jerry_create_string_sz (script, + (jerry_size_t) script_size)); + + ... // usage of string_value + + jerry_release_value (string_value); + } + + jerry_cleanup (); +} +``` + +**See also** + +- [jerry_create_string](#jerry_create_string) +- [jerry_create_string_sz](#jerry_create_string_sz) +- [jerry_get_string_size](#jerry_get_string_size) +- [jerry_get_string_length](#jerry_get_string_length) +- [jerry_string_to_char_buffer](#jerry_string_to_char_buffer) +- [jerry_substring_to_char_buffer](#jerry_substring_to_char_buffer) + + # Snapshot functions ## jerry_parse_and_save_snapshot diff --git a/jerry-core/ecma/base/ecma-helpers-string.c b/jerry-core/ecma/base/ecma-helpers-string.c index 09992faf98..2af48ad1d1 100644 --- a/jerry-core/ecma/base/ecma-helpers-string.c +++ b/jerry-core/ecma/base/ecma-helpers-string.c @@ -151,7 +151,7 @@ ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, /**< utf-8 stri lit_utf8_size_t string_size) /**< string size */ { JERRY_ASSERT (string_p != NULL || string_size == 0); - JERRY_ASSERT (lit_is_cesu8_string_valid (string_p, string_size)); + JERRY_ASSERT (lit_is_valid_cesu8_string (string_p, string_size)); lit_magic_string_id_t magic_string_id = lit_is_utf8_string_magic (string_p, string_size); @@ -269,7 +269,7 @@ ecma_new_ecma_string_from_utf8_converted_to_cesu8 (const lit_utf8_byte_t *string { converted_string_size += string_size; - JERRY_ASSERT (lit_is_utf8_string_valid (string_p, string_size)); + JERRY_ASSERT (lit_is_valid_utf8_string (string_p, string_size)); lit_utf8_byte_t *data_p; diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-global.c b/jerry-core/ecma/builtin-objects/ecma-builtin-global.c index 58ae632a99..adba8936f5 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-global.c +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-global.c @@ -899,7 +899,7 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri, /**< uri argumen } if (!is_valid - || !lit_is_utf8_string_valid (octets, bytes_count)) + || !lit_is_valid_utf8_string (octets, bytes_count)) { ret_value = ecma_raise_uri_error (ECMA_ERR_MSG ("Invalid UTF8 string.")); break; @@ -923,7 +923,7 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri, /**< uri argumen { JERRY_ASSERT (output_start_p + output_size == output_char_p); - if (lit_is_cesu8_string_valid (output_start_p, output_size)) + if (lit_is_valid_cesu8_string (output_start_p, output_size)) { ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_size); ret_value = ecma_make_string_value (output_string_p); diff --git a/jerry-core/jerry.c b/jerry-core/jerry.c index 2f51612c91..8ae75382db 100644 --- a/jerry-core/jerry.c +++ b/jerry-core/jerry.c @@ -982,7 +982,7 @@ jerry_create_object (void) } /* jerry_create_object */ /** - * Create string from a valid UTF8 string + * Create string from a valid UTF-8 string * * Note: * returned value must be freed with jerry_release_value when it is no longer needed. @@ -996,7 +996,7 @@ jerry_create_string_from_utf8 (const jerry_char_t *str_p) /**< pointer to string } /* jerry_create_string_from_utf8 */ /** - * Create string from a valid UTF8 string + * Create string from a valid UTF-8 string * * Note: * returned value must be freed with jerry_release_value when it is no longer needed. @@ -1016,7 +1016,7 @@ jerry_create_string_sz_from_utf8 (const jerry_char_t *str_p, /**< pointer to str } /* jerry_create_string_sz_from_utf8 */ /** - * Create string from a valid CESU8 string + * Create string from a valid CESU-8 string * * Note: * returned value must be freed with jerry_release_value, when it is no longer needed. @@ -1030,7 +1030,7 @@ jerry_create_string (const jerry_char_t *str_p) /**< pointer to string */ } /* jerry_create_string */ /** - * Create string from a valid CESU8 string + * Create string from a valid CESU-8 string * * Note: * returned value must be freed with jerry_release_value when it is no longer needed. @@ -2006,6 +2006,34 @@ jerry_foreach_object_property (const jerry_value_t obj_val, /**< object value */ return false; } /* jerry_foreach_object_property */ +/** + * Validate UTF-8 string + * + * @return true - if UTF-8 string is well-formed + * false - otherwise + */ +bool +jerry_is_valid_utf8_string (const jerry_char_t *utf8_buf_p, /**< UTF-8 string */ + jerry_size_t buf_size) /**< string size */ +{ + return lit_is_valid_utf8_string ((lit_utf8_byte_t *) utf8_buf_p, + (lit_utf8_size_t) buf_size); +} /* jerry_is_valid_utf8_string */ + +/** + * Validate CESU-8 string + * + * @return true - if CESU-8 string is well-formed + * false - otherwise + */ +bool +jerry_is_valid_cesu8_string (const jerry_char_t *cesu8_buf_p, /**< CESU-8 string */ + jerry_size_t buf_size) /**< string size */ +{ + return lit_is_valid_cesu8_string ((lit_utf8_byte_t *) cesu8_buf_p, + (lit_utf8_size_t) buf_size); +} /* jerry_is_valid_cesu8_string */ + /** * @} */ diff --git a/jerry-core/jerryscript.h b/jerry-core/jerryscript.h index 4a43d1444e..b2688b7619 100644 --- a/jerry-core/jerryscript.h +++ b/jerry-core/jerryscript.h @@ -329,6 +329,12 @@ void jerry_set_object_native_handle (const jerry_value_t obj_val, uintptr_t hand bool jerry_foreach_object_property (const jerry_value_t obj_val, jerry_object_property_foreach_t foreach_p, void *user_data_p); +/** + * Input validator functions + */ +bool jerry_is_valid_utf8_string (const jerry_char_t *utf8_buf_p, jerry_size_t buf_size); +bool jerry_is_valid_cesu8_string (const jerry_char_t *cesu8_buf_p, jerry_size_t buf_size); + /** * Snapshot functions */ diff --git a/jerry-core/lit/lit-strings.c b/jerry-core/lit/lit-strings.c index 277286b7c4..b633df13ce 100644 --- a/jerry-core/lit/lit-strings.c +++ b/jerry-core/lit/lit-strings.c @@ -28,7 +28,7 @@ * false otherwise */ bool -lit_is_utf8_string_valid (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string */ +lit_is_valid_utf8_string (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string */ lit_utf8_size_t buf_size) /**< string size */ { lit_utf8_size_t idx = 0; @@ -116,7 +116,7 @@ lit_is_utf8_string_valid (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string * } return true; -} /* lit_is_utf8_string_valid */ +} /* lit_is_valid_utf8_string */ /** * Validate cesu-8 string @@ -125,14 +125,14 @@ lit_is_utf8_string_valid (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string * * false otherwise */ bool -lit_is_cesu8_string_valid (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string */ +lit_is_valid_cesu8_string (const lit_utf8_byte_t *cesu8_buf_p, /**< cesu-8 string */ lit_utf8_size_t buf_size) /**< string size */ { lit_utf8_size_t idx = 0; while (idx < buf_size) { - lit_utf8_byte_t c = utf8_buf_p[idx++]; + lit_utf8_byte_t c = cesu8_buf_p[idx++]; if ((c & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER) { continue; @@ -166,7 +166,7 @@ lit_is_cesu8_string_valid (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string for (lit_utf8_size_t offset = 0; offset < extra_bytes_count; ++offset) { - c = utf8_buf_p[idx + offset]; + c = cesu8_buf_p[idx + offset]; if ((c & LIT_UTF8_EXTRA_BYTE_MASK) != LIT_UTF8_EXTRA_BYTE_MARKER) { /* invalid continuation byte */ @@ -186,7 +186,7 @@ lit_is_cesu8_string_valid (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string } return true; -} /* lit_is_cesu8_string_valid */ +} /* lit_is_valid_cesu8_string */ /** * Check if the code point is UTF-16 low surrogate diff --git a/jerry-core/lit/lit-strings.h b/jerry-core/lit/lit-strings.h index 90f82de420..c83bf82c3b 100644 --- a/jerry-core/lit/lit-strings.h +++ b/jerry-core/lit/lit-strings.h @@ -85,8 +85,8 @@ #define LIT_UTF8_FIRST_BYTE_MAX LIT_UTF8_5_BYTE_MARKER /* validation */ -bool lit_is_utf8_string_valid (const lit_utf8_byte_t *utf8_buf_p, lit_utf8_size_t buf_size); -bool lit_is_cesu8_string_valid (const lit_utf8_byte_t *utf8_buf_p, lit_utf8_size_t buf_size); +bool lit_is_valid_utf8_string (const lit_utf8_byte_t *utf8_buf_p, lit_utf8_size_t buf_size); +bool lit_is_valid_cesu8_string (const lit_utf8_byte_t *cesu8_buf_p, lit_utf8_size_t buf_size); /* checks */ bool lit_is_code_point_utf16_low_surrogate (lit_code_point_t code_point); diff --git a/jerry-main/main-unix.c b/jerry-main/main-unix.c index ac7b13dfbe..a893342e22 100644 --- a/jerry-main/main-unix.c +++ b/jerry-main/main-unix.c @@ -663,6 +663,12 @@ main (int argc, break; } + if (!jerry_is_valid_utf8_string (source_p, (jerry_size_t) source_size)) + { + ret_value = jerry_create_error (JERRY_ERROR_COMMON, (jerry_char_t *) ("Input must be a valid UTF-8 string.")); + break; + } + if (jerry_is_feature_enabled (JERRY_FEATURE_SNAPSHOT_SAVE) && (is_save_snapshot_mode || is_save_literals_mode)) { static uint8_t snapshot_save_buffer[ JERRY_BUFFER_SIZE ]; @@ -686,6 +692,7 @@ main (int argc, fclose (snapshot_file_p); } } + if (!jerry_value_has_error_flag (ret_value) && is_save_literals_mode) { const size_t literal_buffer_size = jerry_parse_and_save_literals ((jerry_char_t *) source_p, diff --git a/tests/jerry/fail/1/regression-test-issue-1549.js b/tests/jerry/fail/1/regression-test-issue-1549.js new file mode 100644 index 0000000000..6e2e99b72a --- /dev/null +++ b/tests/jerry/fail/1/regression-test-issue-1549.js @@ -0,0 +1,15 @@ +// Copyright JS Foundation and other contributors, http://js.foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +va'�c= diff --git a/tests/unit/test-strings.c b/tests/unit/test-strings.c index 4ff6df8688..5cad4da577 100644 --- a/tests/unit/test-strings.c +++ b/tests/unit/test-strings.c @@ -87,7 +87,7 @@ generate_cesu8_string (lit_utf8_byte_t *buf_p, lit_utf8_size_t bytes_generated = generate_cesu8_char (char_size, buf_p); - TEST_ASSERT (lit_is_cesu8_string_valid (buf_p, bytes_generated)); + TEST_ASSERT (lit_is_valid_cesu8_string (buf_p, bytes_generated)); size += bytes_generated; buf_p += bytes_generated; @@ -183,19 +183,19 @@ main () /* Overlong-encoded code point */ lit_utf8_byte_t invalid_cesu8_string_1[] = {0xC0, 0x82}; - TEST_ASSERT (!lit_is_cesu8_string_valid (invalid_cesu8_string_1, sizeof (invalid_cesu8_string_1))); + TEST_ASSERT (!lit_is_valid_cesu8_string (invalid_cesu8_string_1, sizeof (invalid_cesu8_string_1))); /* Overlong-encoded code point */ lit_utf8_byte_t invalid_cesu8_string_2[] = {0xE0, 0x80, 0x81}; - TEST_ASSERT (!lit_is_cesu8_string_valid (invalid_cesu8_string_2, sizeof (invalid_cesu8_string_2))); + TEST_ASSERT (!lit_is_valid_cesu8_string (invalid_cesu8_string_2, sizeof (invalid_cesu8_string_2))); /* Pair of surrogates: 0xD901 0xDFF0 which encode Unicode character 0x507F0 */ lit_utf8_byte_t invalid_cesu8_string_3[] = {0xED, 0xA4, 0x81, 0xED, 0xBF, 0xB0}; - TEST_ASSERT (lit_is_cesu8_string_valid (invalid_cesu8_string_3, sizeof (invalid_cesu8_string_3))); + TEST_ASSERT (lit_is_valid_cesu8_string (invalid_cesu8_string_3, sizeof (invalid_cesu8_string_3))); /* Isolated high surrogate 0xD901 */ lit_utf8_byte_t valid_utf8_string_1[] = {0xED, 0xA4, 0x81}; - TEST_ASSERT (lit_is_cesu8_string_valid (valid_utf8_string_1, sizeof (valid_utf8_string_1))); + TEST_ASSERT (lit_is_valid_cesu8_string (valid_utf8_string_1, sizeof (valid_utf8_string_1))); lit_utf8_byte_t res_buf[3]; lit_utf8_size_t res_size;