Skip to content

Commit 4115119

Browse files
lukaszsamsonjosevalim
authored andcommitted
Handle error result from unescape_tokens in tokenizer (#14587)
1 parent 711008a commit 4115119

File tree

2 files changed

+53
-6
lines changed

2 files changed

+53
-6
lines changed

lib/elixir/src/elixir_tokenizer.erl

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -948,17 +948,22 @@ handle_dot([$., H | T] = Original, Line, Column, DotInfo, BaseScope, Tokens) whe
948948
InterScope
949949
end,
950950

951-
{ok, [UnescapedPart]} = unescape_tokens([Part], Line, Column, NewScope),
951+
case unescape_tokens([Part], Line, Column, NewScope) of
952+
{ok, [UnescapedPart]} ->
953+
case unsafe_to_atom(UnescapedPart, Line, Column, NewScope) of
954+
{ok, Atom} ->
955+
Token = check_call_identifier(Line, Column, H, Atom, Rest),
956+
TokensSoFar = add_token_with_eol({'.', DotInfo}, Tokens),
957+
tokenize(Rest, NewLine, NewColumn, NewScope, [Token | TokensSoFar]);
952958

953-
case unsafe_to_atom(UnescapedPart, Line, Column, NewScope) of
954-
{ok, Atom} ->
955-
Token = check_call_identifier(Line, Column, H, Atom, Rest),
956-
TokensSoFar = add_token_with_eol({'.', DotInfo}, Tokens),
957-
tokenize(Rest, NewLine, NewColumn, NewScope, [Token | TokensSoFar]);
959+
{error, Reason} ->
960+
error(Reason, Original, NewScope, Tokens)
961+
end;
958962

959963
{error, Reason} ->
960964
error(Reason, Original, NewScope, Tokens)
961965
end;
966+
962967
{_NewLine, _NewColumn, _Parts, Rest, NewScope} ->
963968
Message = "interpolation is not allowed when calling function/macro. Found interpolation in a call starting with: ",
964969
error({?LOC(Line, Column), Message, [H]}, Rest, NewScope, Tokens);

lib/elixir/test/elixir/code_test.exs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,48 @@ defmodule CodeTest do
514514
}
515515
end
516516

517+
test "string_to_quoted handles unescape errors properly" do
518+
# Test invalid hex escape character
519+
assert {:error, {meta, message, token}} = Code.string_to_quoted("a.'\\xg'")
520+
521+
assert meta[:line] == 1
522+
assert meta[:column] == 3
523+
524+
assert message ==
525+
"invalid hex escape character, expected \\xHH where H is a hexadecimal digit. Syntax error after: "
526+
527+
assert token == "\\x"
528+
529+
# Test invalid Unicode escape character
530+
assert {:error, {meta2, message2, token2}} = Code.string_to_quoted("a.'\\ug'")
531+
532+
assert meta2[:line] == 1
533+
assert meta2[:column] == 3
534+
535+
assert message2 ==
536+
"invalid Unicode escape character, expected \\uHHHH or \\u{H*} where H is a hexadecimal digit. Syntax error after: "
537+
538+
assert token2 == "\\u"
539+
540+
# Test invalid Unicode code point (surrogate pair)
541+
assert {:error, {meta3, message3, token3}} = Code.string_to_quoted("a.'\\u{D800}'")
542+
543+
assert meta3[:line] == 1
544+
assert meta3[:column] == 3
545+
546+
assert message3 == "invalid or reserved Unicode code point \\u{D800}. Syntax error after: "
547+
assert token3 == "\\u"
548+
549+
# Test Unicode code point beyond valid range
550+
assert {:error, {meta4, message4, token4}} = Code.string_to_quoted("a.'\\u{110000}'")
551+
552+
assert meta4[:line] == 1
553+
assert meta4[:column] == 3
554+
555+
assert message4 == "invalid or reserved Unicode code point \\u{110000}. Syntax error after: "
556+
assert token4 == "\\u"
557+
end
558+
517559
test "string_to_quoted raises UnicodeConversionError for invalid UTF-8 in quoted atoms and function calls" do
518560
invalid_utf8_cases = [
519561
# Quoted atom

0 commit comments

Comments
 (0)