diff --git a/jerry-core/ecma/base/ecma-globals.h b/jerry-core/ecma/base/ecma-globals.h index 5f484892f3..045be8a184 100644 --- a/jerry-core/ecma/base/ecma-globals.h +++ b/jerry-core/ecma/base/ecma-globals.h @@ -697,8 +697,8 @@ typedef struct typedef enum { ECMA_STRING_CONTAINER_LIT_TABLE, /**< actual data is in literal table */ - ECMA_STRING_CONTAINER_HEAP_CHUNKS, /**< actual data is on the heap - in a ecma_collection_chunk_t chain */ + ECMA_STRING_CONTAINER_HEAP_ASCII_STRING, /**< actual data is on the heap as an ascii string */ + ECMA_STRING_CONTAINER_HEAP_UTF8_STRING, /**< actual data is on the heap as an utf-8 string */ ECMA_STRING_CONTAINER_HEAP_NUMBER, /**< actual data is on the heap as a ecma_number_t */ ECMA_STRING_CONTAINER_UINT32_IN_DESC, /**< actual data is UInt32-represeneted Number stored locally in the string's descriptor */ @@ -755,7 +755,18 @@ typedef struct ecma_string_t mem_cpointer_t lit_cp; /** Compressed pointer to an ecma_collection_header_t */ - mem_cpointer_t collection_cp; + mem_cpointer_t utf8_collection_cp; + + /** + * Actual data of an ascii string type + */ + struct + { + /** Compressed pointer to a raw character array */ + mem_cpointer_t ascii_collection_cp; + /** Size of ascii string in bytes */ + uint16_t size; + } ascii_string; /** Compressed pointer to an ecma_number_t */ mem_cpointer_t number_cp; diff --git a/jerry-core/ecma/base/ecma-helpers-string.c b/jerry-core/ecma/base/ecma-helpers-string.c index 44ffab39e2..b0a8a7b7fc 100644 --- a/jerry-core/ecma/base/ecma-helpers-string.c +++ b/jerry-core/ecma/base/ecma-helpers-string.c @@ -173,20 +173,37 @@ ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, /**< utf-8 stri return ecma_get_magic_string_ex (magic_string_ex_id); } - JERRY_ASSERT (string_size > 0); + JERRY_ASSERT (string_size > 0 && string_size <= UINT16_MAX); ecma_string_t *string_desc_p = ecma_alloc_string (); - string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_CHUNKS | ECMA_STRING_REF_ONE; string_desc_p->hash = lit_utf8_string_calc_hash (string_p, string_size); string_desc_p->u.common_field = 0; - const size_t data_size = string_size + sizeof (ecma_string_heap_header_t); - ecma_string_heap_header_t *data_p = (ecma_string_heap_header_t *) mem_heap_alloc_block (data_size); - data_p->size = (uint16_t) string_size; - data_p->length = (uint16_t) lit_utf8_string_length (string_p, string_size); - memcpy (data_p + 1, string_p, string_size); - ECMA_SET_NON_NULL_POINTER (string_desc_p->u.collection_cp, data_p); + ecma_length_t string_length = lit_utf8_string_length (string_p, string_size); + + if (string_size == string_length) + { + string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_ASCII_STRING | ECMA_STRING_REF_ONE; + const size_t data_size = string_size; + lit_utf8_byte_t *data_p = (lit_utf8_byte_t *) mem_heap_alloc_block (data_size); + string_desc_p->u.ascii_string.size = (uint16_t) string_size; + memcpy (data_p, string_p, string_size); + ECMA_SET_NON_NULL_POINTER (string_desc_p->u.ascii_string.ascii_collection_cp, data_p); + } + else + { + string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE; + const size_t data_size = string_size + sizeof (ecma_string_heap_header_t); + ecma_string_heap_header_t *data_p = (ecma_string_heap_header_t *) mem_heap_alloc_block (data_size); + + JERRY_ASSERT (string_length <= UINT16_MAX); + + data_p->size = (uint16_t) string_size; + data_p->length = (uint16_t) string_length; + memcpy (data_p + 1, string_p, string_size); + ECMA_SET_NON_NULL_POINTER (string_desc_p->u.utf8_collection_cp, data_p); + } return string_desc_p; } /* ecma_new_ecma_string_from_utf8 */ @@ -343,31 +360,54 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */ const lit_utf8_size_t new_size = str1_size + str2_size; + JERRY_ASSERT (new_size <= UINT16_MAX); + ecma_string_t *string_desc_p = ecma_alloc_string (); - string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_CHUNKS | ECMA_STRING_REF_ONE; string_desc_p->u.common_field = 0; - const size_t data_size = new_size + sizeof (ecma_string_heap_header_t); - ecma_string_heap_header_t *data_p = (ecma_string_heap_header_t *) mem_heap_alloc_block (data_size); - - lit_utf8_size_t bytes_copied = ecma_string_to_utf8_string (string1_p, - (lit_utf8_byte_t *) (data_p + 1), - str1_size); - JERRY_ASSERT (bytes_copied == str1_size); + ecma_length_t string_length = ecma_string_get_length (string1_p) + ecma_string_get_length (string2_p); + if (new_size == string_length) + { + string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_ASCII_STRING | ECMA_STRING_REF_ONE; + const size_t data_size = new_size; + lit_utf8_byte_t *data_p = (lit_utf8_byte_t *) mem_heap_alloc_block (data_size); + lit_utf8_size_t bytes_copied = ecma_string_to_utf8_string (string1_p, data_p, str1_size); + JERRY_ASSERT (bytes_copied == str1_size); + bytes_copied = ecma_string_to_utf8_string (string2_p, data_p + str1_size, str2_size); + JERRY_ASSERT (bytes_copied == str2_size); - bytes_copied = ecma_string_to_utf8_string (string2_p, - (lit_utf8_byte_t *) (data_p + 1) + str1_size, - str2_size); - JERRY_ASSERT (bytes_copied == str2_size); + string_desc_p->u.ascii_string.size = (uint16_t) new_size; - data_p->size = (uint16_t) new_size; - data_p->length = (uint16_t) (ecma_string_get_length (string1_p) + ecma_string_get_length (string2_p)); - string_desc_p->hash = lit_utf8_string_hash_combine (string1_p->hash, - (lit_utf8_byte_t *) (data_p + 1) + str1_size, - (lit_utf8_size_t) str2_size); + string_desc_p->hash = lit_utf8_string_hash_combine (string1_p->hash, data_p + str1_size, str2_size); - ECMA_SET_NON_NULL_POINTER (string_desc_p->u.collection_cp, data_p); + ECMA_SET_NON_NULL_POINTER (string_desc_p->u.ascii_string.ascii_collection_cp, data_p); + } + else + { + string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE; + const size_t data_size = new_size + sizeof (ecma_string_heap_header_t); + ecma_string_heap_header_t *data_p = (ecma_string_heap_header_t *) mem_heap_alloc_block (data_size); + lit_utf8_size_t bytes_copied = ecma_string_to_utf8_string (string1_p, + (lit_utf8_byte_t *) (data_p + 1), + str1_size); + JERRY_ASSERT (bytes_copied == str1_size); + + bytes_copied = ecma_string_to_utf8_string (string2_p, + (lit_utf8_byte_t *) (data_p + 1) + str1_size, + str2_size); + JERRY_ASSERT (bytes_copied == str2_size); + JERRY_ASSERT (string_length <= UINT16_MAX); + + data_p->size = (uint16_t) new_size; + data_p->length = (uint16_t) string_length; + + string_desc_p->hash = lit_utf8_string_hash_combine (string1_p->hash, + (lit_utf8_byte_t *) (data_p + 1) + str1_size, + (lit_utf8_size_t) str2_size); + + ECMA_SET_NON_NULL_POINTER (string_desc_p->u.utf8_collection_cp, data_p); + } return string_desc_p; } /* ecma_concat_ecma_strings */ @@ -410,19 +450,39 @@ ecma_copy_ecma_string (ecma_string_t *string_desc_p) /**< string descriptor */ break; } - case ECMA_STRING_CONTAINER_HEAP_CHUNKS: + case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING: { new_str_p = ecma_alloc_string (); *new_str_p = *string_desc_p; + new_str_p->refs_and_container = ECMA_STRING_SET_REF_TO_ONE (new_str_p->refs_and_container); const ecma_string_heap_header_t *data_p = ECMA_GET_NON_NULL_POINTER (ecma_string_heap_header_t, - string_desc_p->u.collection_cp); + string_desc_p->u.utf8_collection_cp); JERRY_ASSERT (data_p != NULL); const size_t data_size = data_p->size + sizeof (ecma_string_heap_header_t); ecma_string_heap_header_t *new_data_p = (ecma_string_heap_header_t *) mem_heap_alloc_block (data_size); - memcpy (new_data_p, data_p, data_p->size + sizeof (ecma_string_heap_header_t)); + memcpy (new_data_p, data_p, data_size); - ECMA_SET_NON_NULL_POINTER (new_str_p->u.collection_cp, data_p); + ECMA_SET_NON_NULL_POINTER (new_str_p->u.utf8_collection_cp, new_data_p); + + break; + } + + case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING: + { + new_str_p = ecma_alloc_string (); + *new_str_p = *string_desc_p; + new_str_p->refs_and_container = ECMA_STRING_SET_REF_TO_ONE (new_str_p->refs_and_container); + + const lit_utf8_byte_t *data_p = ECMA_GET_NON_NULL_POINTER (lit_utf8_byte_t, + string_desc_p->u.ascii_string.ascii_collection_cp); + + JERRY_ASSERT (data_p != NULL); + const size_t data_size = string_desc_p->u.ascii_string.size; + lit_utf8_byte_t *new_data_p = (lit_utf8_byte_t *) mem_heap_alloc_block (data_size); + memcpy (new_data_p, data_p, data_size); + + ECMA_SET_NON_NULL_POINTER (new_str_p->u.ascii_string.ascii_collection_cp, new_data_p); break; } @@ -488,15 +548,24 @@ ecma_deref_ecma_string (ecma_string_t *string_p) /**< ecma-string */ switch (ECMA_STRING_GET_CONTAINER (string_p)) { - case ECMA_STRING_CONTAINER_HEAP_CHUNKS: + case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING: { ecma_string_heap_header_t *const data_p = ECMA_GET_NON_NULL_POINTER (ecma_string_heap_header_t, - string_p->u.collection_cp); + string_p->u.utf8_collection_cp); mem_heap_free_block (data_p, data_p->size + sizeof (ecma_string_heap_header_t)); break; } + case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING: + { + lit_utf8_byte_t *const data_p = ECMA_GET_NON_NULL_POINTER (lit_utf8_byte_t, + string_p->u.ascii_string.ascii_collection_cp); + + mem_heap_free_block (data_p, string_p->u.ascii_string.size); + + break; + } case ECMA_STRING_CONTAINER_HEAP_NUMBER: { ecma_number_t *num_p = ECMA_GET_NON_NULL_POINTER (ecma_number_t, @@ -545,7 +614,8 @@ ecma_string_to_number (const ecma_string_t *str_p) /**< ecma-string */ } case ECMA_STRING_CONTAINER_LIT_TABLE: - case ECMA_STRING_CONTAINER_HEAP_CHUNKS: + case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING: + case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING: case ECMA_STRING_CONTAINER_MAGIC_STRING: case ECMA_STRING_CONTAINER_MAGIC_STRING_EX: { @@ -623,14 +693,22 @@ ecma_string_to_utf8_string (const ecma_string_t *string_desc_p, /**< ecma-string switch (ECMA_STRING_GET_CONTAINER (string_desc_p)) { - case ECMA_STRING_CONTAINER_HEAP_CHUNKS: + case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING: { const ecma_string_heap_header_t *data_p = ECMA_GET_NON_NULL_POINTER (ecma_string_heap_header_t, - string_desc_p->u.collection_cp); + string_desc_p->u.utf8_collection_cp); size = data_p->size; memcpy (buffer_p, data_p + 1, size); break; } + case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING: + { + const lit_utf8_byte_t *data_p = ECMA_GET_NON_NULL_POINTER (lit_utf8_byte_t, + string_desc_p->u.ascii_string.ascii_collection_cp); + size = string_desc_p->u.ascii_string.size; + memcpy (buffer_p, data_p, size); + break; + } case ECMA_STRING_CONTAINER_LIT_TABLE: { const lit_literal_t lit = lit_get_literal_by_cp (string_desc_p->u.lit_cp); @@ -767,15 +845,24 @@ ecma_string_raw_chars (const ecma_string_t *string_p, /**< ecma-string */ result_p = lit_charset_literal_get_charset (lit); break; } - case ECMA_STRING_CONTAINER_HEAP_CHUNKS: + case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING: { const ecma_string_heap_header_t *data_p = ECMA_GET_NON_NULL_POINTER (ecma_string_heap_header_t, - string_p->u.collection_cp); + string_p->u.utf8_collection_cp); size = data_p->size; length = data_p->length; result_p = (const lit_utf8_byte_t *) (data_p + 1); break; } + case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING: + { + const lit_utf8_byte_t *data_p = ECMA_GET_NON_NULL_POINTER (lit_utf8_byte_t, + string_p->u.ascii_string.ascii_collection_cp); + size = string_p->u.ascii_string.size; + length = size; + result_p = data_p; + break; + } case ECMA_STRING_CONTAINER_HEAP_NUMBER: { size = ecma_string_get_heap_number_size (string_p->u.number_cp); @@ -868,7 +955,8 @@ ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /* ecma-stri default: { JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_NUMBER - || ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_CHUNKS); + || ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING + || ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_ASCII_STRING); break; } } @@ -907,12 +995,12 @@ ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /* ecma-stri return (*num1_p == *num2_p); } - case ECMA_STRING_CONTAINER_HEAP_CHUNKS: + case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING: { const ecma_string_heap_header_t *data1_p = ECMA_GET_NON_NULL_POINTER (ecma_string_heap_header_t, - string1_p->u.collection_cp); + string1_p->u.utf8_collection_cp); const ecma_string_heap_header_t *data2_p = ECMA_GET_NON_NULL_POINTER (ecma_string_heap_header_t, - string2_p->u.collection_cp); + string2_p->u.utf8_collection_cp); if (data1_p->length != data2_p->length) { @@ -921,6 +1009,15 @@ ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /* ecma-stri return !strncmp ((char *) (data1_p + 1), (char *) (data2_p + 1), strings_size); } + case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING: + { + const lit_utf8_byte_t *data1_p = ECMA_GET_NON_NULL_POINTER (lit_utf8_byte_t, + string1_p->u.ascii_string.ascii_collection_cp); + const lit_utf8_byte_t *data2_p = ECMA_GET_NON_NULL_POINTER (lit_utf8_byte_t, + string2_p->u.ascii_string.ascii_collection_cp); + + return !strncmp ((char *) (data1_p), (char *) (data2_p), strings_size); + } default: { JERRY_UNREACHABLE (); @@ -933,13 +1030,20 @@ ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /* ecma-stri bool is_utf8_string1_on_heap = false; bool is_utf8_string2_on_heap = false; - if (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_CHUNKS) + if (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING) { const ecma_string_heap_header_t *const data_p = ECMA_GET_NON_NULL_POINTER (ecma_string_heap_header_t, - string1_p->u.collection_cp); + string1_p->u.utf8_collection_cp); utf8_string1_p = (lit_utf8_byte_t *) (data_p + 1); } + else if (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_ASCII_STRING) + { + lit_utf8_byte_t *const data_p = ECMA_GET_NON_NULL_POINTER (lit_utf8_byte_t, + string1_p->u.ascii_string.ascii_collection_cp); + + utf8_string1_p = data_p; + } else if (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_LIT_TABLE) { const lit_literal_t lit = lit_get_literal_by_cp (string1_p->u.lit_cp); @@ -957,13 +1061,20 @@ ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /* ecma-stri is_utf8_string1_on_heap = true; } - if (ECMA_STRING_GET_CONTAINER (string2_p) == ECMA_STRING_CONTAINER_HEAP_CHUNKS) + if (ECMA_STRING_GET_CONTAINER (string2_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING) { const ecma_string_heap_header_t *const data_p = ECMA_GET_NON_NULL_POINTER (ecma_string_heap_header_t, - string2_p->u.collection_cp); + string2_p->u.utf8_collection_cp); utf8_string2_p = (lit_utf8_byte_t *) (data_p + 1); } + else if (ECMA_STRING_GET_CONTAINER (string2_p) == ECMA_STRING_CONTAINER_HEAP_ASCII_STRING) + { + lit_utf8_byte_t *const data_p = ECMA_GET_NON_NULL_POINTER (lit_utf8_byte_t, + string2_p->u.ascii_string.ascii_collection_cp); + + utf8_string2_p = data_p; + } else if (ECMA_STRING_GET_CONTAINER (string2_p) == ECMA_STRING_CONTAINER_LIT_TABLE) { const lit_literal_t lit = lit_get_literal_by_cp (string2_p->u.lit_cp); @@ -1048,14 +1159,22 @@ ecma_compare_ecma_strings_relational (const ecma_string_t *string1_p, /**< ecma- lit_utf8_byte_t utf8_string2_buffer[ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER]; lit_utf8_size_t utf8_string2_size; - if (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_CHUNKS) + if (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING) { const ecma_string_heap_header_t *const data_p = ECMA_GET_NON_NULL_POINTER (ecma_string_heap_header_t, - string1_p->u.collection_cp); + string1_p->u.utf8_collection_cp); utf8_string1_p = (lit_utf8_byte_t *) (data_p + 1); utf8_string1_size = (lit_utf8_size_t) data_p->size; } + else if (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_ASCII_STRING) + { + lit_utf8_byte_t *const data_p = ECMA_GET_NON_NULL_POINTER (lit_utf8_byte_t, + string1_p->u.ascii_string.ascii_collection_cp); + + utf8_string1_p = data_p; + utf8_string1_size = (lit_utf8_size_t) string1_p->u.ascii_string.size; + } else if (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_LIT_TABLE) { const lit_literal_t lit = lit_get_literal_by_cp (string1_p->u.lit_cp); @@ -1082,14 +1201,22 @@ ecma_compare_ecma_strings_relational (const ecma_string_t *string1_p, /**< ecma- JERRY_ASSERT (bytes_copied == utf8_string1_size); } - if (ECMA_STRING_GET_CONTAINER (string2_p) == ECMA_STRING_CONTAINER_HEAP_CHUNKS) + if (ECMA_STRING_GET_CONTAINER (string2_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING) { const ecma_string_heap_header_t *const data_p = ECMA_GET_NON_NULL_POINTER (ecma_string_heap_header_t, - string2_p->u.collection_cp); + string2_p->u.utf8_collection_cp); utf8_string2_p = (lit_utf8_byte_t *) (data_p + 1); utf8_string2_size = (lit_utf8_size_t) data_p->size; } + if (ECMA_STRING_GET_CONTAINER (string2_p) == ECMA_STRING_CONTAINER_HEAP_ASCII_STRING) + { + lit_utf8_byte_t *const data_p = ECMA_GET_NON_NULL_POINTER (lit_utf8_byte_t, + string2_p->u.ascii_string.ascii_collection_cp); + + utf8_string2_p = data_p; + utf8_string2_size = (lit_utf8_size_t) string2_p->u.ascii_string.size; + } else if (ECMA_STRING_GET_CONTAINER (string2_p) == ECMA_STRING_CONTAINER_LIT_TABLE) { const lit_literal_t lit = lit_get_literal_by_cp (string2_p->u.lit_cp); @@ -1170,11 +1297,15 @@ ecma_string_get_length (const ecma_string_t *string_p) /**< ecma-string */ { return (ecma_length_t) ecma_string_get_heap_number_size (string_p->u.number_cp); } + case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING: + { + return (ecma_length_t) (string_p->u.ascii_string.size); + } default: { - JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_CHUNKS); + JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING); const ecma_string_heap_header_t *const data_p = ECMA_GET_NON_NULL_POINTER (ecma_string_heap_header_t, - string_p->u.collection_cp); + string_p->u.utf8_collection_cp); return (ecma_length_t) data_p->length; } @@ -1214,11 +1345,15 @@ ecma_string_get_size (const ecma_string_t *string_p) /**< ecma-string */ { return ecma_string_get_heap_number_size (string_p->u.number_cp); } + case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING: + { + return (lit_utf8_size_t) string_p->u.ascii_string.size; + } default: { - JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_CHUNKS); + JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING); const ecma_string_heap_header_t *const data_p = ECMA_GET_NON_NULL_POINTER (ecma_string_heap_header_t, - string_p->u.collection_cp); + string_p->u.utf8_collection_cp); return (lit_utf8_size_t) data_p->size; }