From 7025f97fddbde6c0c7b87224ab86257676f48aab Mon Sep 17 00:00:00 2001 From: Ruben Ayrapetyan Date: Wed, 27 May 2015 18:33:22 +0300 Subject: [PATCH 1/2] Adding routine for conversion of any character sequence (not only strings contained in source code buffer) to lexer token. JerryScript-DCO-1.0-Signed-off-by: Ruben Ayrapetyan r.ayrapetyan@samsung.com --- jerry-core/parser/js/lexer.cpp | 66 ++++++++++++++++++++------------ jerry-core/parser/js/literal.cpp | 12 ++++-- jerry-core/parser/js/literal.h | 2 +- 3 files changed, 52 insertions(+), 28 deletions(-) diff --git a/jerry-core/parser/js/lexer.cpp b/jerry-core/parser/js/lexer.cpp index dc2ac29499..f4c769178a 100644 --- a/jerry-core/parser/js/lexer.cpp +++ b/jerry-core/parser/js/lexer.cpp @@ -118,34 +118,38 @@ current_token_equals_to (const char *str) return false; } +/** + * Compare specified string to literal + * + * @return true - if the literal contains exactly the specified string, + * false - otherwise. + */ static bool -current_token_equals_to_literal (literal lit) +string_equals_to_literal (const ecma_char_t *str_p, /**< characters buffer */ + ecma_length_t length, /**< string's length */ + literal lit) /**< literal */ { if (lit.type == LIT_STR) { - if (lit.data.lp.length != (ecma_length_t) (buffer - token_start)) - { - return false; - } - if (!strncmp ((const char *) lit.data.lp.str, token_start, lit.data.lp.length)) + if (lit.data.lp.length == length + && strncmp ((const char *) lit.data.lp.str, (const char*) str_p, length) == 0) { return true; } } else if (lit.type == LIT_MAGIC_STR) { - const char *str = (const char *) ecma_get_magic_string_zt (lit.data.magic_str_id); - if (strlen (str) != (size_t) (buffer - token_start)) - { - return false; - } - if (!strncmp (str, token_start, strlen (str))) + const char *magic_str_p = (const char *) ecma_get_magic_string_zt (lit.data.magic_str_id); + + if (strlen (magic_str_p) == length + && strncmp (magic_str_p, (const char*) str_p, length) == 0) { return true; } } + return false; -} +} /* string_equals_to_literal */ static literal adjust_string_ptrs (literal lit, size_t diff) @@ -189,32 +193,33 @@ add_string_to_string_cache (const ecma_char_t* str, ecma_length_t length) return res; } -static literal -add_current_token_to_string_cache (void) -{ - return add_string_to_string_cache ((ecma_char_t*) token_start, (ecma_length_t) (buffer - token_start)); -} - +/** + * Convert string to token of specified type + * + * @return token descriptor + */ static token -convert_current_token_to_token (token_type tt) +convert_string_to_token (token_type tt, /**< token type */ + const ecma_char_t *str_p, /**< characters buffer */ + ecma_length_t length) /**< string's length */ { - JERRY_ASSERT (token_start); + JERRY_ASSERT (str_p != NULL); for (literal_index_t i = 0; i < STACK_SIZE (literals); i++) { const literal lit = STACK_ELEMENT (literals, i); if ((lit.type == LIT_STR || lit.type == LIT_MAGIC_STR) - && current_token_equals_to_literal (lit)) + && string_equals_to_literal (str_p, length, lit)) { return create_token (tt, i); } } - literal lit = create_literal_from_str (token_start, (ecma_length_t) (buffer - token_start)); + literal lit = create_literal_from_str (str_p, length); JERRY_ASSERT (lit.type == LIT_STR || lit.type == LIT_MAGIC_STR); if (lit.type == LIT_STR) { - lit = add_current_token_to_string_cache (); + lit = add_string_to_string_cache (str_p, length); } STACK_PUSH (literals, lit); @@ -222,6 +227,19 @@ convert_current_token_to_token (token_type tt) return create_token (tt, (literal_index_t) (STACK_SIZE (literals) - 1)); } +/** + * Convert string, currently processed by lexer (see also: token_start, buffer) to token of specified type + * + * @return token descriptor + */ +static token +convert_current_token_to_token (token_type tt) /**< token type */ +{ + JERRY_ASSERT (token_start != NULL); + + return convert_string_to_token (tt, (const ecma_char_t*) token_start, (ecma_length_t) (buffer - token_start)); +} /* convert_current_token_to_token */ + /* If TOKEN represents a keyword, return decoded keyword, if TOKEN represents a Future Reserved Word, return KW_RESERVED, otherwise return KW_NONE. */ diff --git a/jerry-core/parser/js/literal.cpp b/jerry-core/parser/js/literal.cpp index b5de673d75..2fc010285c 100644 --- a/jerry-core/parser/js/literal.cpp +++ b/jerry-core/parser/js/literal.cpp @@ -39,11 +39,17 @@ create_literal_from_num (ecma_number_t num) return ret; } +/** + * Create literal from string + * + * @return literal descriptor + */ literal -create_literal_from_str (const char *s, ecma_length_t len) +create_literal_from_str (const ecma_char_t *s, /**< characters buffer */ + ecma_length_t len) /**< string's length */ { - return create_literal_from_zt ((const ecma_char_t *) s, len); -} + return create_literal_from_zt (s, len); +} /* create_literal_from_str */ literal create_literal_from_str_compute_len (const char *s) diff --git a/jerry-core/parser/js/literal.h b/jerry-core/parser/js/literal.h index 6678d71076..c089cb70f1 100644 --- a/jerry-core/parser/js/literal.h +++ b/jerry-core/parser/js/literal.h @@ -43,7 +43,7 @@ typedef struct literal create_empty_literal (void); literal create_literal_from_num (ecma_number_t); -literal create_literal_from_str (const char *, ecma_length_t); +literal create_literal_from_str (const ecma_char_t*, ecma_length_t); literal create_literal_from_str_compute_len (const char *); literal create_literal_from_zt (const ecma_char_t *, ecma_length_t); bool literal_equal (literal, literal); From 8b28cac99e94e6aad05239ec1d3c5e8484300134 Mon Sep 17 00:00:00 2001 From: Ruben Ayrapetyan Date: Wed, 27 May 2015 20:33:29 +0300 Subject: [PATCH 2/2] Implementing escape sequences support with the exception of "\0" ("") character and cases that depend on Unicode support. JerryScript-DCO-1.0-Signed-off-by: Ruben Ayrapetyan r.ayrapetyan@samsung.com --- jerry-core/ecma/base/ecma-helpers-char.cpp | 68 ++ jerry-core/ecma/base/ecma-helpers.h | 5 + jerry-core/parser/js/lexer.cpp | 805 +++++++++++------- tests/jerry-test-suite/precommit_test_list | 5 +- tests/jerry-test-suite/unsupported_list | 9 +- tests/jerry/escape_sequences.js | 39 + .../fail/12/escape_sequences_invalid_hex.js | 15 + .../12/escape_sequences_invalid_unicode.js | 15 + .../12/escape_sequences_invalid_variable.js | 15 + 9 files changed, 639 insertions(+), 337 deletions(-) create mode 100644 jerry-core/ecma/base/ecma-helpers-char.cpp create mode 100644 tests/jerry/escape_sequences.js create mode 100644 tests/jerry/fail/12/escape_sequences_invalid_hex.js create mode 100644 tests/jerry/fail/12/escape_sequences_invalid_unicode.js create mode 100644 tests/jerry/fail/12/escape_sequences_invalid_variable.js diff --git a/jerry-core/ecma/base/ecma-helpers-char.cpp b/jerry-core/ecma/base/ecma-helpers-char.cpp new file mode 100644 index 0000000000..f1f2c1fec1 --- /dev/null +++ b/jerry-core/ecma/base/ecma-helpers-char.cpp @@ -0,0 +1,68 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \addtogroup ecma ECMA + * @{ + * + * \addtogroup ecmahelpers Helpers for operations with ECMA characters + * @{ + */ + +#include "ecma-globals.h" +#include "ecma-helpers.h" + +/** + * Check if specified character is the newline character + * + * @return true - if the character is "" character according to ECMA-262 v5, Table 3, + * false - otherwise. + */ +bool +ecma_char_is_new_line (ecma_char_t c) /**< character value */ +{ + return (c == '\x0D'); +} /* ecma_char_is_new_line */ + +/** + * Check if specified character the carriage return character + * + * @return true - if the character is "" character according to ECMA-262 v5, Table 3, + * false - otherwise. + */ +bool +ecma_char_is_carriage_return (ecma_char_t c) /**< character value */ +{ + return (c == '\x0A'); +} /* ecma_char_is_carriage_return */ + +/** + * Check if specified character is one of LineTerminator (ECMA-262 v5, Table 3) characters + * + * @return true - if the character is one of LineTerminator characters, + * false - otherwise. + */ +bool +ecma_char_is_line_terminator (ecma_char_t c) /**< character value */ +{ + /* FIXME: Handle and (ECMA-262 v5, 7.3, Table 3) when Unicode would be supported */ + + return (ecma_char_is_carriage_return (c) + || ecma_char_is_new_line (c)); +} /* ecma_char_is_line_terminator */ + +/** + * @} + * @} + */ diff --git a/jerry-core/ecma/base/ecma-helpers.h b/jerry-core/ecma/base/ecma-helpers.h index b03f98c78d..a039dff5e4 100644 --- a/jerry-core/ecma/base/ecma-helpers.h +++ b/jerry-core/ecma/base/ecma-helpers.h @@ -313,6 +313,11 @@ extern ecma_number_t ecma_int32_to_number (int32_t value); extern ecma_number_t ecma_uint32_to_number (uint32_t value); extern ecma_length_t ecma_number_to_zt_string (ecma_number_t num, ecma_char_t *buffer_p, ssize_t buffer_size); +/* ecma-helpers-char.cpp */ +extern bool ecma_char_is_new_line (ecma_char_t c); +extern bool ecma_char_is_carriage_return (ecma_char_t c); +extern bool ecma_char_is_line_terminator (ecma_char_t c); + #endif /* !JERRY_ECMA_HELPERS_H */ /** diff --git a/jerry-core/parser/js/lexer.cpp b/jerry-core/parser/js/lexer.cpp index f4c769178a..615acd0165 100644 --- a/jerry-core/parser/js/lexer.cpp +++ b/jerry-core/parser/js/lexer.cpp @@ -104,20 +104,6 @@ create_token (token_type type, literal_index_t uid) return ret; } -static bool -current_token_equals_to (const char *str) -{ - if (strlen (str) != (size_t) (buffer - token_start)) - { - return false; - } - if (!strncmp (str, token_start, (size_t) (buffer - token_start))) - { - return true; - } - return false; -} - /** * Compare specified string to literal * @@ -228,269 +214,138 @@ convert_string_to_token (token_type tt, /**< token type */ } /** - * Convert string, currently processed by lexer (see also: token_start, buffer) to token of specified type + * Try to decore specified string as keyword * - * @return token descriptor + * @return if specified string represents a keyword, return corresponding keyword token, + * else if it is 'null' - return TOK_NULL token, + * else if it is 'true' or 'false' - return TOK_BOOL with corresponding boolean value, + * else - return empty_token. */ static token -convert_current_token_to_token (token_type tt) /**< token type */ -{ - JERRY_ASSERT (token_start != NULL); - - return convert_string_to_token (tt, (const ecma_char_t*) token_start, (ecma_length_t) (buffer - token_start)); -} /* convert_current_token_to_token */ - -/* If TOKEN represents a keyword, return decoded keyword, - if TOKEN represents a Future Reserved Word, return KW_RESERVED, - otherwise return KW_NONE. */ -static token -decode_keyword (void) +decode_keyword (const ecma_char_t *str_p, /**< characters buffer */ + size_t length) /**< string's length */ { - if (current_token_equals_to ("break")) - { - return create_token (TOK_KEYWORD, KW_BREAK); - } - if (current_token_equals_to ("case")) - { - return create_token (TOK_KEYWORD, KW_CASE); - } - if (current_token_equals_to ("catch")) - { - return create_token (TOK_KEYWORD, KW_CATCH); - } - if (current_token_equals_to ("class")) - { - return create_token (TOK_KEYWORD, KW_CLASS); - } - if (current_token_equals_to ("const")) - { - return create_token (TOK_KEYWORD, KW_CONST); - } - if (current_token_equals_to ("continue")) - { - return create_token (TOK_KEYWORD, KW_CONTINUE); - } - if (current_token_equals_to ("debugger")) - { - return create_token (TOK_KEYWORD, KW_DEBUGGER); - } - if (current_token_equals_to ("default")) - { - return create_token (TOK_KEYWORD, KW_DEFAULT); - } - if (current_token_equals_to ("delete")) - { - return create_token (TOK_KEYWORD, KW_DELETE); - } - if (current_token_equals_to ("do")) - { - return create_token (TOK_KEYWORD, KW_DO); - } - if (current_token_equals_to ("else")) - { - return create_token (TOK_KEYWORD, KW_ELSE); - } - if (current_token_equals_to ("enum")) - { - return create_token (TOK_KEYWORD, KW_ENUM); - } - if (current_token_equals_to ("export")) - { - return create_token (TOK_KEYWORD, KW_EXPORT); - } - if (current_token_equals_to ("extends")) - { - return create_token (TOK_KEYWORD, KW_EXTENDS); - } - if (current_token_equals_to ("false")) - { - return create_token (TOK_BOOL, false); - } - if (current_token_equals_to ("finally")) - { - return create_token (TOK_KEYWORD, KW_FINALLY); - } - if (current_token_equals_to ("for")) - { - return create_token (TOK_KEYWORD, KW_FOR); - } - if (current_token_equals_to ("function")) - { - return create_token (TOK_KEYWORD, KW_FUNCTION); - } - if (current_token_equals_to ("if")) - { - return create_token (TOK_KEYWORD, KW_IF); - } - if (current_token_equals_to ("in")) - { - return create_token (TOK_KEYWORD, KW_IN); - } - if (current_token_equals_to ("instanceof")) - { - return create_token (TOK_KEYWORD, KW_INSTANCEOF); - } - if (current_token_equals_to ("interface")) - { - if (strict_mode) - { - return create_token (TOK_KEYWORD, KW_INTERFACE); - } - else - { - return convert_current_token_to_token (TOK_NAME); - } - } - if (current_token_equals_to ("import")) - { - return create_token (TOK_KEYWORD, KW_IMPORT); - } - if (current_token_equals_to ("implements")) - { - if (strict_mode) - { - return create_token (TOK_KEYWORD, KW_IMPLEMENTS); - } - else + typedef struct + { + const char *keyword_p; + keyword keyword_id; + } kw_descr_t; + + const kw_descr_t keywords[] = + { +#define KW_DESCR(literal, keyword_id) { literal, keyword_id } + KW_DESCR ("break", KW_BREAK), + KW_DESCR ("case", KW_CASE), + KW_DESCR ("catch", KW_CATCH), + KW_DESCR ("class", KW_CLASS), + KW_DESCR ("const", KW_CONST), + KW_DESCR ("continue", KW_CONTINUE), + KW_DESCR ("debugger", KW_DEBUGGER), + KW_DESCR ("default", KW_DEFAULT), + KW_DESCR ("delete", KW_DELETE), + KW_DESCR ("do", KW_DO), + KW_DESCR ("else", KW_ELSE), + KW_DESCR ("enum", KW_ENUM), + KW_DESCR ("export", KW_EXPORT), + KW_DESCR ("extends", KW_EXTENDS), + KW_DESCR ("finally", KW_FINALLY), + KW_DESCR ("for", KW_FOR), + KW_DESCR ("function", KW_FUNCTION), + KW_DESCR ("if", KW_IF), + KW_DESCR ("in", KW_IN), + KW_DESCR ("instanceof", KW_INSTANCEOF), + KW_DESCR ("interface", KW_INTERFACE), + KW_DESCR ("import", KW_IMPORT), + KW_DESCR ("implements", KW_IMPLEMENTS), + KW_DESCR ("let", KW_LET), + KW_DESCR ("new", KW_NEW), + KW_DESCR ("package", KW_PACKAGE), + KW_DESCR ("private", KW_PRIVATE), + KW_DESCR ("protected", KW_PROTECTED), + KW_DESCR ("public", KW_PUBLIC), + KW_DESCR ("return", KW_RETURN), + KW_DESCR ("static", KW_STATIC), + KW_DESCR ("super", KW_SUPER), + KW_DESCR ("switch", KW_SWITCH), + KW_DESCR ("this", KW_THIS), + KW_DESCR ("throw", KW_THROW), + KW_DESCR ("try", KW_TRY), + KW_DESCR ("typeof", KW_TYPEOF), + KW_DESCR ("var", KW_VAR), + KW_DESCR ("void", KW_VOID), + KW_DESCR ("while", KW_WHILE), + KW_DESCR ("with", KW_WITH), + KW_DESCR ("yield", KW_YIELD) +#undef KW_DESCR + }; + + keyword kw = KW_NONE; + + for (uint32_t i = 0; i < sizeof (keywords) / sizeof (kw_descr_t); i++) + { + if (strlen (keywords[i].keyword_p) == length + && !strncmp (keywords[i].keyword_p, (const char *) str_p, length)) { - return convert_current_token_to_token (TOK_NAME); - } - } - if (current_token_equals_to ("let")) - { - if (strict_mode) - { - return create_token (TOK_KEYWORD, KW_LET); - } - else - { - return convert_current_token_to_token (TOK_NAME); - } - } - if (current_token_equals_to ("new")) - { - return create_token (TOK_KEYWORD, KW_NEW); - } - if (current_token_equals_to ("null")) - { - return create_token (TOK_NULL, 0); - } - if (current_token_equals_to ("package")) - { - if (strict_mode) - { - return create_token (TOK_KEYWORD, KW_PACKAGE); - } - else - { - return convert_current_token_to_token (TOK_NAME); - } - } - if (current_token_equals_to ("private")) - { - if (strict_mode) - { - return create_token (TOK_KEYWORD, KW_PRIVATE); - } - else - { - return convert_current_token_to_token (TOK_NAME); - } - } - if (current_token_equals_to ("protected")) - { - if (strict_mode) - { - return create_token (TOK_KEYWORD, KW_PROTECTED); - } - else - { - return convert_current_token_to_token (TOK_NAME); + kw = keywords[i].keyword_id; + break; } } - if (current_token_equals_to ("public")) + + if (!strict_mode) { - if (strict_mode) - { - return create_token (TOK_KEYWORD, KW_PUBLIC); - } - else + switch (kw) { - return convert_current_token_to_token (TOK_NAME); + case KW_INTERFACE: + case KW_IMPLEMENTS: + case KW_LET: + case KW_PACKAGE: + case KW_PRIVATE: + case KW_PROTECTED: + case KW_PUBLIC: + case KW_STATIC: + case KW_YIELD: + { + return convert_string_to_token (TOK_NAME, str_p, (ecma_length_t) length); + } + + default: + { + break; + } } } - if (current_token_equals_to ("return")) + + if (kw != KW_NONE) { - return create_token (TOK_KEYWORD, KW_RETURN); + return create_token (TOK_KEYWORD, kw); } - if (current_token_equals_to ("static")) + else { - if (strict_mode) + const ecma_char_t *false_p = ecma_get_magic_string_zt (ECMA_MAGIC_STRING_FALSE); + const ecma_char_t *true_p = ecma_get_magic_string_zt (ECMA_MAGIC_STRING_TRUE); + const ecma_char_t *null_p = ecma_get_magic_string_zt (ECMA_MAGIC_STRING_NULL); + + if (strlen ((const char*) false_p) == length + && !strncmp ((const char*) str_p, (const char*) false_p, length)) { - return create_token (TOK_KEYWORD, KW_STATIC); + return create_token (TOK_BOOL, false); } - else + else if (strlen ((const char*) true_p) == length + && !strncmp ((const char*) str_p, (const char*) true_p, length)) { - return convert_current_token_to_token (TOK_NAME); + return create_token (TOK_BOOL, true); } - } - if (current_token_equals_to ("super")) - { - return create_token (TOK_KEYWORD, KW_SUPER); - } - if (current_token_equals_to ("switch")) - { - return create_token (TOK_KEYWORD, KW_SWITCH); - } - if (current_token_equals_to ("this")) - { - return create_token (TOK_KEYWORD, KW_THIS); - } - if (current_token_equals_to ("throw")) - { - return create_token (TOK_KEYWORD, KW_THROW); - } - if (current_token_equals_to ("true")) - { - return create_token (TOK_BOOL, true); - } - if (current_token_equals_to ("try")) - { - return create_token (TOK_KEYWORD, KW_TRY); - } - if (current_token_equals_to ("typeof")) - { - return create_token (TOK_KEYWORD, KW_TYPEOF); - } - if (current_token_equals_to ("var")) - { - return create_token (TOK_KEYWORD, KW_VAR); - } - if (current_token_equals_to ("void")) - { - return create_token (TOK_KEYWORD, KW_VOID); - } - if (current_token_equals_to ("while")) - { - return create_token (TOK_KEYWORD, KW_WHILE); - } - if (current_token_equals_to ("with")) - { - return create_token (TOK_KEYWORD, KW_WITH); - } - if (current_token_equals_to ("yield")) - { - if (strict_mode) + else if (strlen ((const char*) null_p) == length + && !strncmp ((const char*) str_p, (const char*) null_p, length)) { - return create_token (TOK_KEYWORD, KW_YIELD); + return create_token (TOK_NULL, 0); } else { - return convert_current_token_to_token (TOK_NAME); + return empty_token; } } - return empty_token; -} +} /* decode_keyword */ static token convert_seen_num_to_token (ecma_number_t num) @@ -636,51 +491,6 @@ consume_char (void) } \ while (0) -static token -parse_name (void) -{ - char c = LA (0); - bool every_char_islower = islower (c); - token known_token = empty_token; - - JERRY_ASSERT (isalpha (c) || c == '$' || c == '_'); - - new_token (); - consume_char (); - while (true) - { - c = LA (0); - if (c == '\0') - { - break; - } - if (!isalpha (c) && !isdigit (c) && c != '$' && c != '_') - { - break; - } - if (every_char_islower && (!islower (c))) - { - every_char_islower = false; - } - consume_char (); - } - - if (every_char_islower) - { - known_token = decode_keyword (); - if (!is_empty (known_token)) - { - goto end; - } - } - - known_token = convert_current_token_to_token (TOK_NAME); - -end: - token_start = NULL; - return known_token; -} - static uint32_t hex_to_int (char hex) { @@ -712,6 +522,339 @@ hex_to_int (char hex) } } +/** + * Try to decode specified character as SingleEscapeCharacter (ECMA-262, v5, 7.8.4) + * + * If specified character is a SingleEscapeCharacter, convert it according to ECMA-262 v5, Table 4. + * Otherwise, output it as is. + * + * @return true - if specified character is a SingleEscapeCharacter, + * false - otherwise. + */ +static bool +convert_single_escape_character (ecma_char_t c, /**< character to decode */ + ecma_char_t *out_converted_char_p) /**< out: decoded character */ +{ + ecma_char_t converted_char; + bool is_single_escape_character = true; + + switch (c) + { + case 'b': + { + converted_char = (ecma_char_t) '\b'; + break; + } + + case 't': + { + converted_char = (ecma_char_t) '\t'; + break; + } + + case 'n': + { + converted_char = (ecma_char_t) '\n'; + break; + } + + case 'v': + { + converted_char = (ecma_char_t) '\v'; + break; + } + + case 'f': + { + converted_char = (ecma_char_t) '\f'; + break; + } + + case 'r': + { + converted_char = (ecma_char_t) '\r'; + break; + } + + case '"': + case '\'': + case '\\': + { + converted_char = (ecma_char_t) c; + break; + } + + default: + { + converted_char = (ecma_char_t) c; + is_single_escape_character = false; + break; + } + } + + if (out_converted_char_p != NULL) + { + *out_converted_char_p = converted_char; + } + + return is_single_escape_character; +} /* convert_single_escape_character */ + +/** + * Convert specified string to token of specified type, transforming escape sequences + * + * @return token descriptor + */ +static token +convert_string_to_token_transform_escape_seq (token_type tok_type, /**< type of token to produce */ + const char *source_str_p, /**< string to convert, + * located in source buffer */ + size_t source_str_size) /**< size of the string */ +{ + token ret; + + if (source_str_size == 0) + { + return convert_string_to_token (tok_type, + ecma_get_magic_string_zt (ECMA_MAGIC_STRING__EMPTY), + 0); + } + else + { + JERRY_ASSERT (source_str_p != NULL); + } + + MEM_DEFINE_LOCAL_ARRAY (str_buf_p, + source_str_size, + ecma_char_t); + + const char *source_str_iter_p = source_str_p; + ecma_char_t *str_buf_iter_p = str_buf_p; + + bool is_correct_sequence = true; + bool every_char_islower = true; + bool every_char_allowed_in_identifier = true; + + while (source_str_iter_p < source_str_p + source_str_size) + { + ecma_char_t converted_char; + + if (*source_str_iter_p != '\\') + { + converted_char = (ecma_char_t) *source_str_iter_p++; + + JERRY_ASSERT (str_buf_iter_p <= str_buf_p + source_str_size); + JERRY_ASSERT (source_str_iter_p <= source_str_p + source_str_size); + } + else + { + source_str_iter_p++; + + const ecma_char_t escape_character = (ecma_char_t) *source_str_iter_p++; + JERRY_ASSERT (source_str_iter_p <= source_str_p + source_str_size); + + if (isdigit (escape_character)) + { + if (escape_character == '0') + { + JERRY_UNIMPLEMENTED (" character is not currently supported.\n"); + } + else + { + /* Implementation-defined (ECMA-262 v5, B.1.2): octal escape sequences are not implemented */ + is_correct_sequence = false; + break; + } + } + else if (escape_character == 'u' + || escape_character == 'x') + { + const uint32_t hex_chars_num = (escape_character == 'u' ? 4u : 2u); + + if (source_str_iter_p + hex_chars_num > source_str_p + source_str_size) + { + is_correct_sequence = false; + break; + } + + bool chars_are_hex = true; + uint16_t char_code = 0; + + for (uint32_t i = 0; i < hex_chars_num; i++) + { + const char nc = *source_str_iter_p++; + + if (!isxdigit (nc)) + { + chars_are_hex = false; + break; + } + else + { + /* + * Check that highest 4 bits are zero, so the value would not overflow. + */ + JERRY_ASSERT ((char_code & 0xF000u) == 0); + + char_code = (uint16_t) (char_code << 4u); + char_code = (uint16_t) (char_code + hex_to_int (nc)); + } + } + + JERRY_ASSERT (str_buf_iter_p <= str_buf_p + source_str_size); + JERRY_ASSERT (source_str_iter_p <= source_str_p + source_str_size); + + if (!chars_are_hex) + { + is_correct_sequence = false; + break; + } + + /* + * In CONFIG_ECMA_CHAR_ASCII mode size of ecma_char_t is 1 byte, so the conversion + * would ignore highest part of 2-byte value, and in CONFIG_ECMA_CHAR_UTF16 mode this + * would be just an assignment of 2-byte value. + */ + converted_char = (ecma_char_t) char_code; + } + else if (ecma_char_is_line_terminator (escape_character)) + { + if (source_str_iter_p + 1 <= source_str_p + source_str_size) + { + char nc = *source_str_iter_p; + + if (escape_character == '\x0D' + && nc == '\x0A') + { + source_str_iter_p++; + } + } + + continue; + } + else + { + convert_single_escape_character ((ecma_char_t) escape_character, &converted_char); + } + } + + *str_buf_iter_p++ = converted_char; + JERRY_ASSERT (str_buf_iter_p <= str_buf_p + source_str_size); + + if (!islower (converted_char)) + { + every_char_islower = false; + + if (!isalpha (converted_char) + && !isdigit (converted_char) + && converted_char != '$' + && converted_char != '_') + { + every_char_allowed_in_identifier = false; + } + } + } + + if (is_correct_sequence) + { + ecma_length_t length = (ecma_length_t) (str_buf_iter_p - str_buf_p); + ret = empty_token; + + if (tok_type == TOK_NAME) + { + if (every_char_islower) + { + ret = decode_keyword (str_buf_p, length); + } + else if (!every_char_allowed_in_identifier) + { + PARSE_ERROR ("Malformed identifier name", source_str_p - buffer_start); + } + } + + if (is_empty (ret)) + { + ret = convert_string_to_token (tok_type, str_buf_p, length); + } + } + else + { + PARSE_ERROR ("Malformed escape sequence", source_str_p - buffer_start); + } + + MEM_FINALIZE_LOCAL_ARRAY (str_buf_p); + + return ret; +} /* convert_string_to_token_transform_escape_seq */ + +/** + * Parse identifier (ECMA-262 v5, 7.6) or keyword (7.6.1.1) + */ +static token +parse_name (void) +{ + ecma_char_t c = (ecma_char_t) LA (0); + + token known_token = empty_token; + + JERRY_ASSERT (isalpha (c) || c == '$' || c == '_'); + + new_token (); + + while (true) + { + c = (ecma_char_t) LA (0); + + if (!isalpha (c) + && !isdigit (c) + && c != '$' + && c != '_' + && c != '\\') + { + break; + } + else + { + consume_char (); + + if (c == '\\') + { + bool is_correct_sequence = (LA (0) == 'u'); + if (is_correct_sequence) + { + consume_char (); + } + + for (uint32_t i = 0; + is_correct_sequence && i < 4; + i++) + { + if (!isxdigit (LA (0))) + { + is_correct_sequence = false; + break; + } + + consume_char (); + } + + if (!is_correct_sequence) + { + PARSE_ERROR ("Malformed escape sequence", token_start - buffer_start); + } + } + } + } + + known_token = convert_string_to_token_transform_escape_seq (TOK_NAME, + token_start, + (size_t) (buffer - token_start)); + + token_start = NULL; + + return known_token; +} /* parse_name */ + /* In this function we cannot use strtol function since there is no octal literals in ECMAscript. */ static token @@ -938,68 +1081,68 @@ parse_number (void) } } +/** + * Parse string literal (ECMA-262 v5, 7.8.4) + */ static token parse_string (void) { - char c = LA (0); - bool is_double_quoted; - token result; - + ecma_char_t c = (ecma_char_t) LA (0); JERRY_ASSERT (c == '\'' || c == '"'); - is_double_quoted = (c == '"'); - - // Eat up '"' consume_char (); new_token (); - while (true) + const bool is_double_quoted = (c == '"'); + const char end_char = (is_double_quoted ? '"' : '\''); + + do { - c = LA (0); + c = (ecma_char_t) LA (0); + consume_char (); + if (c == '\0') { PARSE_ERROR ("Unclosed string", token_start - buffer_start); } - if (c == '\n') + else if (ecma_char_is_line_terminator (c)) { PARSE_ERROR ("String literal shall not contain newline character", token_start - buffer_start); } - if (c == '\\') + else if (c == '\\') { - /* Only single escape character is allowed. */ - if (LA (1) == 'x' || LA (1) == 'u' || isdigit (LA (1))) + ecma_char_t nc = (ecma_char_t) LA (0); + + if (convert_single_escape_character (nc, NULL)) { - // PARSE_WARN ("Escape sequences are ignored yet", token_start - buffer_start); consume_char (); - consume_char (); - continue; } - if ((LA (1) == '\'' && !is_double_quoted) - || (LA (1) == '"' && is_double_quoted) - || LA (1) == '\n') + else if (ecma_char_is_line_terminator (nc)) { consume_char (); - consume_char (); - continue; + + if (ecma_char_is_carriage_return (nc)) + { + nc = (ecma_char_t) LA (0); + + if (ecma_char_is_new_line (nc)) + { + consume_char (); + } + } } } - else if ((c == '\'' && !is_double_quoted) - || (c == '"' && is_double_quoted)) - { - break; - } - - consume_char (); } + while (c != end_char); - result = convert_current_token_to_token (TOK_STRING); + token ret = convert_string_to_token_transform_escape_seq (TOK_STRING, + token_start, + (size_t) (buffer - token_start) - 1u); - // Eat up '"' - consume_char (); token_start = NULL; - return result; -} + return ret; +} /* parse_string */ static void grobble_whitespaces (void) diff --git a/tests/jerry-test-suite/precommit_test_list b/tests/jerry-test-suite/precommit_test_list index 9a9f3f091e..e7c528d975 100644 --- a/tests/jerry-test-suite/precommit_test_list +++ b/tests/jerry-test-suite/precommit_test_list @@ -1,7 +1,7 @@ +./tests/jerry-test-suite/06/06-001.js ./tests/jerry-test-suite/06/06-002.js ./tests/jerry-test-suite/06/06-003.js ./tests/jerry-test-suite/06/06-004.js -./tests/jerry-test-suite/06/06-005.js ./tests/jerry-test-suite/07/07.06/07.06.01/07.06.01-001.js ./tests/jerry-test-suite/07/07.09/07.09-001.js ./tests/jerry-test-suite/07/07.09/07.09-002.js @@ -31,7 +31,6 @@ ./tests/jerry-test-suite/08/08.03/08.03-003.js ./tests/jerry-test-suite/08/08.03/08.03-004.js ./tests/jerry-test-suite/08/08.04/08.04-001.js -./tests/jerry-test-suite/08/08.04/08.04-002.js ./tests/jerry-test-suite/08/08.04/08.04-003.js ./tests/jerry-test-suite/08/08.04/08.04-004.js ./tests/jerry-test-suite/08/08.04/08.04-005.js @@ -46,6 +45,7 @@ ./tests/jerry-test-suite/08/08.04/08.04-014.js ./tests/jerry-test-suite/08/08.04/08.04-015.js ./tests/jerry-test-suite/08/08.04/08.04-016.js +./tests/jerry-test-suite/08/08.04/08.04-017.js ./tests/jerry-test-suite/08/08.05/08.05-001.js ./tests/jerry-test-suite/08/08.05/08.05-002.js ./tests/jerry-test-suite/08/08.05/08.05-003.js @@ -868,7 +868,6 @@ ./tests/jerry-test-suite/12/12.05/12.05-003.js ./tests/jerry-test-suite/12/12.05/12.05-004.js ./tests/jerry-test-suite/12/12.05/12.05-005.js -./tests/jerry-test-suite/12/12.05/12.05-006.js ./tests/jerry-test-suite/12/12.05/12.05-007.js ./tests/jerry-test-suite/12/12.05/12.05-008.js ./tests/jerry-test-suite/12/12.06/12.06.01/12.06.01-001.js diff --git a/tests/jerry-test-suite/unsupported_list b/tests/jerry-test-suite/unsupported_list index d530667182..4f11d42431 100644 --- a/tests/jerry-test-suite/unsupported_list +++ b/tests/jerry-test-suite/unsupported_list @@ -1,6 +1,9 @@ -> Escape sequences -./tests/jerry-test-suite/06/06-001.js -./tests/jerry-test-suite/08/08.04/08.04-017.js +> Escape sequences ( character case) +./tests/jerry-test-suite/08/08.04/08.04-002.js +./tests/jerry-test-suite/12/12.05/12.05-006.js + +> Unicode +./tests/jerry-test-suite/06/06-005.js > Regular expressions ./tests/jerry-test-suite/07/07.08/07.08.05/07.08.05-001.js diff --git a/tests/jerry/escape_sequences.js b/tests/jerry/escape_sequences.js new file mode 100644 index 0000000000..f1fdb39224 --- /dev/null +++ b/tests/jerry/escape_sequences.js @@ -0,0 +1,39 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +assert ('abcd\ +efgh' === 'abcdefgh'); + +assert ('\'' === "'"); +assert ("\'" === "'"); +assert ('\"' === '"'); +assert ("\"" === '"'); + +// +// TODO +// Extend the test by verifying character codes after String.charCodeAt would be implemented +// +assert ((new String ('\\')).length === 1); +assert ((new String ('\b')).length === 1); +assert ((new String ('\f')).length === 1); +assert ((new String ('\n')).length === 1); +assert ((new String ('\r')).length === 1); +assert ((new String ('\t')).length === 1); +assert ((new String ('\v')).length === 1); + +// 'p' is not SingleEscapeCharacter +assert ('\p' === 'p'); + +var v\u0061riable = 'valu\u0065'; +assert (variable === 'value'); diff --git a/tests/jerry/fail/12/escape_sequences_invalid_hex.js b/tests/jerry/fail/12/escape_sequences_invalid_hex.js new file mode 100644 index 0000000000..818a62afa4 --- /dev/null +++ b/tests/jerry/fail/12/escape_sequences_invalid_hex.js @@ -0,0 +1,15 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var str = '\x5t'; diff --git a/tests/jerry/fail/12/escape_sequences_invalid_unicode.js b/tests/jerry/fail/12/escape_sequences_invalid_unicode.js new file mode 100644 index 0000000000..6ce0392d6e --- /dev/null +++ b/tests/jerry/fail/12/escape_sequences_invalid_unicode.js @@ -0,0 +1,15 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var str = '\u004t'; diff --git a/tests/jerry/fail/12/escape_sequences_invalid_variable.js b/tests/jerry/fail/12/escape_sequences_invalid_variable.js new file mode 100644 index 0000000000..2beae2d231 --- /dev/null +++ b/tests/jerry/fail/12/escape_sequences_invalid_variable.js @@ -0,0 +1,15 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var a\u0028bcd;