Skip to content

Commit 1e0356b

Browse files
committed
Use bit vector to store CESU-8 lookup table,
to improve lit_get_unicode_char_size_by_utf8_first_byte performance. JerryScript-DCO-1.0-Signed-off-by: Xin Hu [email protected]
1 parent 50d124b commit 1e0356b

File tree

2 files changed

+37
-13
lines changed

2 files changed

+37
-13
lines changed

jerry-core/lit/lit-strings.cpp

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,17 @@ lit_utf8_string_code_unit_at (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 stri
757757
return code_unit;
758758
} /* lit_utf8_string_code_unit_at */
759759

760+
/* CESU-8 number of bytes occupied lookup table */
761+
#ifndef __LITTLE_ENDIAN
762+
const __attribute__ ((aligned (CESU_8_TABLE_MEM_ALIGNMENT))) lit_utf8_byte_t table[]
763+
{
764+
1, 1, 1, 1, 1, 1, 1, 1,
765+
0, 0, 0, 0,
766+
2, 2,
767+
3, 0
768+
};
769+
#endif
770+
760771
/**
761772
* Get CESU-8 encoded size of character
762773
*
@@ -765,19 +776,29 @@ lit_utf8_string_code_unit_at (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 stri
765776
lit_utf8_size_t
766777
lit_get_unicode_char_size_by_utf8_first_byte (const lit_utf8_byte_t first_byte) /**< buffer with characters */
767778
{
768-
if ((first_byte & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
769-
{
770-
return 1;
771-
}
772-
else if ((first_byte & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
773-
{
774-
return 2;
775-
}
776-
else
777-
{
778-
JERRY_ASSERT ((first_byte & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER);
779-
return 3;
780-
}
779+
JERRY_ASSERT (((first_byte >> 4) <= 7 || (first_byte >> 4) == 12 ||
780+
(first_byte >> 4) == 13 || (first_byte >> 4) == 14));
781+
782+
#ifdef __LITTLE_ENDIAN
783+
//compact CESU-8 length lookup table into an uint32_t, every two bits represent one item
784+
//const lit_utf8_byte_t table[]
785+
//{
786+
// 1, 1, 1, 1, 1, 1, 1, 1,
787+
// 0, 0, 0, 0,
788+
// 2, 2,
789+
// 3, 0
790+
//};
791+
// MSB ---> LSB
792+
// on little endian platform, it is 00 11 10 10 00 00 00 00 01 01 01 01 01 01 01 01
793+
// table index: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
794+
795+
const uint32_t cesu_8_store = 0x3a005555;
796+
int shift = (first_byte >> 4) << 1;
797+
798+
return (cesu_8_store >> shift) & 0x3;
799+
#else
800+
return table[first_byte >> 4];
801+
#endif
781802
} /* lit_get_unicode_char_size_by_utf8_first_byte */
782803

783804
/**

jerry-core/lit/lit-strings.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,9 @@ lit_string_hash_t lit_utf8_string_calc_hash (const lit_utf8_byte_t *, lit_utf8_s
157157
lit_string_hash_t lit_utf8_string_hash_combine (lit_string_hash_t, const lit_utf8_byte_t *, lit_utf8_size_t);
158158

159159
/* code unit access */
160+
#ifndef __LITTLE_ENDIAN
161+
#define CESU_8_TABLE_MEM_ALIGNMENT 16
162+
#endif
160163
ecma_char_t lit_utf8_string_code_unit_at (const lit_utf8_byte_t *, lit_utf8_size_t, ecma_length_t);
161164
lit_utf8_size_t lit_get_unicode_char_size_by_utf8_first_byte (lit_utf8_byte_t);
162165

0 commit comments

Comments
 (0)