Skip to content

Commit 5aa4211

Browse files
committed
Use code unit instead of code point
JerryScript-DCO-1.0-Signed-off-by: László Langó [email protected]
1 parent a9c77b4 commit 5aa4211

File tree

7 files changed

+51
-52
lines changed

7 files changed

+51
-52
lines changed

jerry-core/ecma/builtin-objects/ecma-builtin-global.c

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -96,25 +96,25 @@ ecma_builtin_global_object_print (ecma_value_t this_arg __attr_unused___, /**< t
9696

9797
while (utf8_str_curr_p < utf8_str_end_p)
9898
{
99-
ecma_char_t code_point = lit_utf8_read_next (&utf8_str_curr_p);
99+
ecma_char_t code_unit = lit_utf8_read_next (&utf8_str_curr_p);
100100

101-
if (code_point == LIT_CHAR_NULL)
101+
if (code_unit == LIT_CHAR_NULL)
102102
{
103103
printf ("\\u0000");
104104
}
105-
else if (code_point <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
105+
else if (code_unit <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
106106
{
107-
printf ("%c", (char) code_point);
107+
printf ("%c", (char) code_unit);
108108
}
109109
else
110110
{
111-
JERRY_STATIC_ASSERT (sizeof (code_point) == 2,
111+
JERRY_STATIC_ASSERT (sizeof (code_unit) == 2,
112112
size_of_code_point_must_be_equal_to_2_bytes);
113113

114-
uint32_t byte_high = (uint32_t) JRT_EXTRACT_BIT_FIELD (ecma_char_t, code_point,
114+
uint32_t byte_high = (uint32_t) JRT_EXTRACT_BIT_FIELD (ecma_char_t, code_unit,
115115
JERRY_BITSINBYTE,
116116
JERRY_BITSINBYTE);
117-
uint32_t byte_low = (uint32_t) JRT_EXTRACT_BIT_FIELD (ecma_char_t, code_point,
117+
uint32_t byte_low = (uint32_t) JRT_EXTRACT_BIT_FIELD (ecma_char_t, code_unit,
118118
0,
119119
JERRY_BITSINBYTE);
120120

@@ -801,9 +801,9 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
801801
continue;
802802
}
803803

804-
lit_code_point_t decoded_byte;
804+
ecma_char_t decoded_byte;
805805

806-
if (!lit_read_code_point_from_hex (input_char_p + 1, 2, &decoded_byte))
806+
if (!lit_read_code_unit_from_hex (input_char_p + 1, 2, &decoded_byte))
807807
{
808808
ret_value = ecma_raise_uri_error (ECMA_ERR_MSG (""));
809809
break;
@@ -857,9 +857,9 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
857857
continue;
858858
}
859859

860-
lit_code_point_t decoded_byte;
860+
ecma_char_t decoded_byte;
861861

862-
if (!lit_read_code_point_from_hex (input_char_p + 1, 2, &decoded_byte))
862+
if (!lit_read_code_unit_from_hex (input_char_p + 1, 2, &decoded_byte))
863863
{
864864
ret_value = ecma_raise_uri_error (ECMA_ERR_MSG (""));
865865
break;
@@ -916,16 +916,16 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
916916
}
917917
else
918918
{
919-
lit_code_point_t cp;
919+
ecma_char_t chr;
920920

921-
if (!lit_read_code_point_from_hex (input_char_p + 1, 2, &cp)
922-
|| ((cp & LIT_UTF8_EXTRA_BYTE_MASK) != LIT_UTF8_EXTRA_BYTE_MARKER))
921+
if (!lit_read_code_unit_from_hex (input_char_p + 1, 2, &chr)
922+
|| ((chr & LIT_UTF8_EXTRA_BYTE_MASK) != LIT_UTF8_EXTRA_BYTE_MARKER))
923923
{
924924
is_valid = false;
925925
break;
926926
}
927927

928-
octets[i] = (lit_utf8_byte_t) cp;
928+
octets[i] = (lit_utf8_byte_t) chr;
929929
input_char_p += URI_ENCODED_BYTE_SIZE;
930930
}
931931
}

jerry-core/ecma/builtin-objects/ecma-builtin-json.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,15 +178,15 @@ ecma_builtin_json_parse_string (ecma_json_token_t *token_p) /**< token argument
178178
}
179179
case LIT_CHAR_LOWERCASE_U:
180180
{
181-
lit_code_point_t code_point;
181+
ecma_char_t code_unit;
182182

183-
if (!(lit_read_code_point_from_hex (current_p + 1, 4, &code_point)))
183+
if (!(lit_read_code_unit_from_hex (current_p + 1, 4, &code_unit)))
184184
{
185185
return;
186186
}
187187

188188
current_p += 5;
189-
write_p += lit_code_point_to_cesu8 (code_point, write_p);
189+
write_p += lit_code_unit_to_utf8 (code_unit, write_p);
190190
continue;
191191
}
192192
default:

jerry-core/lit/lit-char-helpers.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -289,32 +289,32 @@ lit_char_hex_to_int (ecma_char_t c) /**< code unit, corresponding to
289289
* @return true if decoding was successful, false otherwise
290290
*/
291291
bool
292-
lit_read_code_point_from_hex (lit_utf8_byte_t *buf_p, /**< buffer with characters */
293-
lit_utf8_size_t number_of_characters, /**< number of characters to be read */
294-
lit_code_point_t *out_code_point_p) /**< [out] decoded result */
292+
lit_read_code_unit_from_hex (lit_utf8_byte_t *buf_p, /**< buffer with characters */
293+
lit_utf8_size_t number_of_characters, /**< number of characters to be read */
294+
ecma_char_ptr_t out_unit_point_p) /**< [out] decoded result */
295295
{
296-
lit_code_point_t code_point = 0;
296+
ecma_char_t code_unit = LIT_BYTE_NULL;
297297

298298
JERRY_ASSERT (number_of_characters >= 2 && number_of_characters <= 4);
299299

300300
for (lit_utf8_size_t i = 0; i < number_of_characters; i++)
301301
{
302-
code_point <<= 4;
302+
code_unit = (ecma_char_t) (code_unit << 4u);
303303

304304
if (*buf_p >= LIT_CHAR_ASCII_DIGITS_BEGIN
305305
&& *buf_p <= LIT_CHAR_ASCII_DIGITS_END)
306306
{
307-
code_point |= (uint32_t) (*buf_p - LIT_CHAR_ASCII_DIGITS_BEGIN);
307+
code_unit |= (ecma_char_t) (*buf_p - LIT_CHAR_ASCII_DIGITS_BEGIN);
308308
}
309309
else if (*buf_p >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN
310310
&& *buf_p <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END)
311311
{
312-
code_point |= (uint32_t) (*buf_p - (LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN - 10));
312+
code_unit |= (ecma_char_t) (*buf_p - (LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN - 10));
313313
}
314314
else if (*buf_p >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN
315315
&& *buf_p <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_END)
316316
{
317-
code_point |= (uint32_t) (*buf_p - (LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN - 10));
317+
code_unit |= (ecma_char_t) (*buf_p - (LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN - 10));
318318
}
319319
else
320320
{
@@ -324,9 +324,9 @@ lit_read_code_point_from_hex (lit_utf8_byte_t *buf_p, /**< buffer with character
324324
buf_p++;
325325
}
326326

327-
*out_code_point_p = code_point;
327+
*out_unit_point_p = code_unit;
328328
return true;
329-
} /* lit_read_code_point_from_hex */
329+
} /* lit_read_code_unit_from_hex */
330330

331331
/**
332332
* Check if specified character is a word character (part of IsWordChar abstract operation)

jerry-core/lit/lit-char-helpers.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ extern bool lit_char_is_hex_digit (ecma_char_t);
213213
extern uint32_t lit_char_hex_to_int (ecma_char_t);
214214

215215
/* read a hex encoded code point from a zero terminated buffer */
216-
bool lit_read_code_point_from_hex (lit_utf8_byte_t *, lit_utf8_size_t, lit_code_point_t *);
216+
bool lit_read_code_unit_from_hex (lit_utf8_byte_t *, lit_utf8_size_t, ecma_char_ptr_t);
217217

218218
/**
219219
* Null character

jerry-core/parser/regexp/re-compiler.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,12 @@
4141
*/
4242
static void
4343
re_append_char_class (void *re_ctx_p, /**< RegExp compiler context */
44-
uint32_t start, /**< character class range from */
45-
uint32_t end) /**< character class range to */
44+
ecma_char_t start, /**< character class range from */
45+
ecma_char_t end) /**< character class range to */
4646
{
4747
re_compiler_ctx_t *ctx_p = (re_compiler_ctx_t *) re_ctx_p;
48-
re_append_char (ctx_p->bytecode_ctx_p, (ecma_char_t) start);
49-
re_append_char (ctx_p->bytecode_ctx_p, (ecma_char_t) end);
48+
re_append_char (ctx_p->bytecode_ctx_p, start);
49+
re_append_char (ctx_p->bytecode_ctx_p, end);
5050
ctx_p->parser_ctx_p->num_of_classes++;
5151
} /* re_append_char_class */
5252

jerry-core/parser/regexp/re-parser.c

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
315315
{
316316
re_token_type_t token_type = ((re_compiler_ctx_t *) re_ctx_p)->current_token.type;
317317
out_token_p->qmax = out_token_p->qmin = 1;
318-
uint32_t start = RE_CHAR_UNDEF;
318+
ecma_char_t start = RE_CHAR_UNDEF;
319319
bool is_range = false;
320320
parser_ctx_p->num_of_classes = 0;
321321

@@ -332,7 +332,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
332332
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string"));
333333
}
334334

335-
uint32_t ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
335+
ecma_char_t ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
336336

337337
if (ch == LIT_CHAR_RIGHT_SQUARE)
338338
{
@@ -412,27 +412,27 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
412412
}
413413
else if (ch == LIT_CHAR_LOWERCASE_X)
414414
{
415-
lit_code_point_t code_point;
415+
ecma_char_t code_unit;
416416

417-
if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 2, &code_point))
417+
if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 2, &code_unit))
418418
{
419419
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\x'"));
420420
}
421421

422422
parser_ctx_p->input_curr_p += 2;
423-
append_char_class (re_ctx_p, code_point, code_point);
423+
append_char_class (re_ctx_p, code_unit, code_unit);
424424
}
425425
else if (ch == LIT_CHAR_LOWERCASE_U)
426426
{
427-
lit_code_point_t code_point;
427+
ecma_char_t code_unit;
428428

429-
if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 4, &code_point))
429+
if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 4, &code_unit))
430430
{
431431
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\u'"));
432432
}
433433

434434
parser_ctx_p->input_curr_p += 4;
435-
append_char_class (re_ctx_p, code_point, code_point);
435+
append_char_class (re_ctx_p, code_unit, code_unit);
436436
}
437437
else if (ch == LIT_CHAR_LOWERCASE_D)
438438
{
@@ -499,12 +499,11 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
499499
append_char_class (re_ctx_p, LIT_CHAR_LOWERCASE_Z + 1, LIT_UTF16_CODE_UNIT_MAX);
500500
ch = RE_CHAR_UNDEF;
501501
}
502-
else if (ch <= LIT_UTF16_CODE_UNIT_MAX
503-
&& lit_char_is_octal_digit ((ecma_char_t) ch)
502+
else if (lit_char_is_octal_digit ((ecma_char_t) ch)
504503
&& ch != LIT_CHAR_0)
505504
{
506505
parser_ctx_p->input_curr_p--;
507-
ch = re_parse_octal (parser_ctx_p);
506+
ch = (ecma_char_t) re_parse_octal (parser_ctx_p);
508507
}
509508
} /* ch == LIT_CHAR_BACKSLASH */
510509

@@ -667,28 +666,28 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
667666
else if (ch == LIT_CHAR_LOWERCASE_X
668667
&& re_hex_lookup (parser_ctx_p, 2))
669668
{
670-
lit_code_point_t code_point;
669+
ecma_char_t code_unit;
671670

672-
if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 2, &code_point))
671+
if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 2, &code_unit))
673672
{
674673
return ecma_raise_syntax_error (ECMA_ERR_MSG ("decode error"));
675674
}
676675

677676
parser_ctx_p->input_curr_p += 2;
678-
out_token_p->value = code_point;
677+
out_token_p->value = code_unit;
679678
}
680679
else if (ch == LIT_CHAR_LOWERCASE_U
681680
&& re_hex_lookup (parser_ctx_p, 4))
682681
{
683-
lit_code_point_t code_point;
682+
ecma_char_t code_unit;
684683

685-
if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 4, &code_point))
684+
if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 4, &code_unit))
686685
{
687686
return ecma_raise_syntax_error (ECMA_ERR_MSG ("decode error"));
688687
}
689688

690689
parser_ctx_p->input_curr_p += 4;
691-
out_token_p->value = code_point;
690+
out_token_p->value = code_unit;
692691
}
693692
else if (ch == LIT_CHAR_LOWERCASE_D)
694693
{

jerry-core/parser/regexp/re-parser.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ typedef enum
7878
/**
7979
* Undefined character (out of the range of the codeunit)
8080
*/
81-
#define RE_CHAR_UNDEF 0xFFFFFFFF
81+
#define RE_CHAR_UNDEF 0xFFFF
8282

8383
/**
8484
* RegExp token type
@@ -104,7 +104,7 @@ typedef struct
104104
uint32_t num_of_classes; /**< number of character classes */
105105
} re_parser_ctx_t;
106106

107-
typedef void (*re_char_class_callback) (void *re_ctx_p, uint32_t start, uint32_t end);
107+
typedef void (*re_char_class_callback) (void *re_ctx_p, ecma_char_t start, ecma_char_t end);
108108

109109
ecma_value_t
110110
re_parse_char_class (re_parser_ctx_t *, re_char_class_callback, void *, re_token_t *);

0 commit comments

Comments
 (0)