@@ -757,6 +757,17 @@ lit_utf8_string_code_unit_at (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 stri
757
757
return code_unit;
758
758
} /* lit_utf8_string_code_unit_at */
759
759
760
+ /* CESU-8 number of bytes occupied lookup table */
761
+ #ifndef __LITTLE_ENDIAN
762
+ const __attribute__ ((aligned (CESU_8_TABLE_MEM_ALIGNMENT))) lit_utf8_byte_t table[]
763
+ {
764
+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
765
+ 0 , 0 , 0 , 0 ,
766
+ 2 , 2 ,
767
+ 3 , 0
768
+ };
769
+ #endif
770
+
760
771
/* *
761
772
* Get CESU-8 encoded size of character
762
773
*
@@ -765,19 +776,29 @@ lit_utf8_string_code_unit_at (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 stri
765
776
lit_utf8_size_t
766
777
lit_get_unicode_char_size_by_utf8_first_byte (const lit_utf8_byte_t first_byte) /* *< buffer with characters */
767
778
{
768
- if ((first_byte & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
769
- {
770
- return 1 ;
771
- }
772
- else if ((first_byte & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
773
- {
774
- return 2 ;
775
- }
776
- else
777
- {
778
- JERRY_ASSERT ((first_byte & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER);
779
- return 3 ;
780
- }
779
+ JERRY_ASSERT (((first_byte >> 4 ) <= 7 || (first_byte >> 4 ) == 12 ||
780
+ (first_byte >> 4 ) == 13 || (first_byte >> 4 ) == 14 ));
781
+
782
+ #ifdef __LITTLE_ENDIAN
783
+ // compact CESU-8 length lookup table into an uint32_t, every two bits represent one item
784
+ // const lit_utf8_byte_t table[]
785
+ // {
786
+ // 1, 1, 1, 1, 1, 1, 1, 1,
787
+ // 0, 0, 0, 0,
788
+ // 2, 2,
789
+ // 3, 0
790
+ // };
791
+ // MSB ---> LSB
792
+ // on little endian platform, it is 00 11 10 10 00 00 00 00 01 01 01 01 01 01 01 01
793
+ // table index: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
794
+
795
+ const uint32_t cesu_8_store = 0x3a005555 ;
796
+ int shift = (first_byte >> 4 ) << 1 ;
797
+
798
+ return (cesu_8_store >> shift) & 0x3 ;
799
+ #else
800
+ return table[first_byte >> 4 ];
801
+ #endif
781
802
} /* lit_get_unicode_char_size_by_utf8_first_byte */
782
803
783
804
/* *
0 commit comments