From bc17f89d0f73d4529508d4efc77c668ea7ac30ba Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 25 May 2025 12:24:50 +0200 Subject: [PATCH] Pass hash table directly in tokenizer This avoids dereferences by Z_ARRVAL_P(). --- ext/tokenizer/tokenizer.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/ext/tokenizer/tokenizer.c b/ext/tokenizer/tokenizer.c index 9d1b19df91163..203a9b9c36323 100644 --- a/ext/tokenizer/tokenizer.c +++ b/ext/tokenizer/tokenizer.c @@ -280,7 +280,7 @@ static zend_string *make_str(unsigned char *text, size_t leng, HashTable *intern } static void add_token( - zval *return_value, int token_type, unsigned char *text, size_t leng, int lineno, + HashTable *return_value_ht, int token_type, unsigned char *text, size_t leng, int lineno, zend_class_entry *token_class, HashTable *interned_strings) { zval token; if (token_class) { @@ -315,7 +315,7 @@ static void add_token( } else { ZVAL_STR(&token, make_str(text, leng, interned_strings)); } - zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &token); + zend_hash_next_index_insert_new(return_value_ht, &token); } static bool tokenize(zval *return_value, zend_string *source, zend_class_entry *token_class) @@ -337,11 +337,13 @@ static bool tokenize(zval *return_value, zend_string *source, zend_class_entry * zend_hash_init(&interned_strings, 0, NULL, NULL, 0); array_init(return_value); + HashTable *return_value_ht = Z_ARRVAL_P(return_value); + while ((token_type = lex_scan(&token, NULL))) { ZEND_ASSERT(token_type != T_ERROR); add_token( - return_value, token_type, zendtext, zendleng, token_line, + return_value_ht, token_type, zendtext, zendleng, token_line, token_class, &interned_strings); if (Z_TYPE(token) != IS_UNDEF) { @@ -358,7 +360,7 @@ static bool tokenize(zval *return_value, zend_string *source, zend_class_entry * /* fetch the rest into a T_INLINE_HTML */ if (zendcursor < zendlimit) { add_token( - return_value, T_INLINE_HTML, zendcursor, zendlimit - zendcursor, + return_value_ht, T_INLINE_HTML, zendcursor, zendlimit - zendcursor, token_line, token_class, &interned_strings); } break; @@ -383,7 +385,7 @@ static bool tokenize(zval *return_value, zend_string *source, zend_class_entry * } struct event_context { - zval *tokens; + HashTable *tokens; zend_class_entry *token_class; }; @@ -428,7 +430,7 @@ static void on_event( ctx->tokens, token, (unsigned char *) text, length, line, ctx->token_class, NULL); break; case ON_FEEDBACK: { - HashTable *tokens_ht = Z_ARRVAL_P(ctx->tokens); + HashTable *tokens_ht = ctx->tokens; zval *token_zv, *id_zv = NULL; ZEND_HASH_REVERSE_FOREACH_VAL(tokens_ht, token_zv) { id_zv = extract_token_id_to_replace(token_zv, text, length); @@ -469,7 +471,7 @@ static bool tokenize_parse( zend_prepare_string_for_scanning(&source_zval, ZSTR_EMPTY_ALLOC()); array_init(&token_stream); - ctx.tokens = &token_stream; + ctx.tokens = Z_ARRVAL(token_stream); ctx.token_class = token_class; CG(ast) = NULL;