Skip to content

Commit 1e9cc3d

Browse files
authored
gh-133197: Improve error message for incompatible string / bytes prefixes (#133242)
1 parent 4912b29 commit 1e9cc3d

File tree

5 files changed

+143
-25
lines changed

5 files changed

+143
-25
lines changed

Lib/test/test_fstring.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1304,7 +1304,7 @@ def test_invalid_string_prefixes(self):
13041304
"Bf''",
13051305
"BF''",]
13061306
double_quote_cases = [case.replace("'", '"') for case in single_quote_cases]
1307-
self.assertAllRaise(SyntaxError, 'invalid syntax',
1307+
self.assertAllRaise(SyntaxError, 'prefixes are incompatible',
13081308
single_quote_cases + double_quote_cases)
13091309

13101310
def test_leading_trailing_spaces(self):

Lib/test/test_grammar.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,27 @@ def test_string_literals(self):
216216
'
217217
self.assertEqual(x, y)
218218

219+
def test_string_prefixes(self):
220+
def check(s):
221+
parsed = eval(s)
222+
self.assertIs(type(parsed), str)
223+
self.assertGreater(len(parsed), 0)
224+
225+
check("u'abc'")
226+
check("r'abc\t'")
227+
check("rf'abc\a {1 + 1}'")
228+
check("fr'abc\a {1 + 1}'")
229+
230+
def test_bytes_prefixes(self):
231+
def check(s):
232+
parsed = eval(s)
233+
self.assertIs(type(parsed), bytes)
234+
self.assertGreater(len(parsed), 0)
235+
236+
check("b'abc'")
237+
check("br'abc\t'")
238+
check("rb'abc\a'")
239+
219240
def test_ellipsis(self):
220241
x = ...
221242
self.assertTrue(x is Ellipsis)

Lib/test/test_syntax.py

Lines changed: 63 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1877,21 +1877,77 @@
18771877
Traceback (most recent call last):
18781878
SyntaxError: cannot assign to f-string expression here. Maybe you meant '==' instead of '='?
18791879
1880-
>>> ft'abc'
1880+
>>> ub''
18811881
Traceback (most recent call last):
1882-
SyntaxError: can't use 'f' and 't' string prefixes together
1882+
SyntaxError: 'u' and 'b' prefixes are incompatible
18831883
1884-
>>> tf"{x=}"
1884+
>>> bu"привет"
18851885
Traceback (most recent call last):
1886-
SyntaxError: can't use 'f' and 't' string prefixes together
1886+
SyntaxError: 'u' and 'b' prefixes are incompatible
18871887
1888-
>>> tb''
1888+
>>> ur''
1889+
Traceback (most recent call last):
1890+
SyntaxError: 'u' and 'r' prefixes are incompatible
1891+
1892+
>>> ru"\t"
1893+
Traceback (most recent call last):
1894+
SyntaxError: 'u' and 'r' prefixes are incompatible
1895+
1896+
>>> uf'{1 + 1}'
1897+
Traceback (most recent call last):
1898+
SyntaxError: 'u' and 'f' prefixes are incompatible
1899+
1900+
>>> fu""
1901+
Traceback (most recent call last):
1902+
SyntaxError: 'u' and 'f' prefixes are incompatible
1903+
1904+
>>> ut'{1}'
1905+
Traceback (most recent call last):
1906+
SyntaxError: 'u' and 't' prefixes are incompatible
1907+
1908+
>>> tu"234"
1909+
Traceback (most recent call last):
1910+
SyntaxError: 'u' and 't' prefixes are incompatible
1911+
1912+
>>> bf'{x!r}'
1913+
Traceback (most recent call last):
1914+
SyntaxError: 'b' and 'f' prefixes are incompatible
1915+
1916+
>>> fb"text"
18891917
Traceback (most recent call last):
1890-
SyntaxError: can't use 'b' and 't' string prefixes together
1918+
SyntaxError: 'b' and 'f' prefixes are incompatible
18911919
18921920
>>> bt"text"
18931921
Traceback (most recent call last):
1894-
SyntaxError: can't use 'b' and 't' string prefixes together
1922+
SyntaxError: 'b' and 't' prefixes are incompatible
1923+
1924+
>>> tb''
1925+
Traceback (most recent call last):
1926+
SyntaxError: 'b' and 't' prefixes are incompatible
1927+
1928+
>>> tf"{0.3:.02f}"
1929+
Traceback (most recent call last):
1930+
SyntaxError: 'f' and 't' prefixes are incompatible
1931+
1932+
>>> ft'{x=}'
1933+
Traceback (most recent call last):
1934+
SyntaxError: 'f' and 't' prefixes are incompatible
1935+
1936+
>>> tfu"{x=}"
1937+
Traceback (most recent call last):
1938+
SyntaxError: 'u' and 'f' prefixes are incompatible
1939+
1940+
>>> turf"{x=}"
1941+
Traceback (most recent call last):
1942+
SyntaxError: 'u' and 'r' prefixes are incompatible
1943+
1944+
>>> burft"{x=}"
1945+
Traceback (most recent call last):
1946+
SyntaxError: 'u' and 'b' prefixes are incompatible
1947+
1948+
>>> brft"{x=}"
1949+
Traceback (most recent call last):
1950+
SyntaxError: 'b' and 'f' prefixes are incompatible
18951951
18961952
>>> t'{x}' = 42
18971953
Traceback (most recent call last):
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Improve :exc:`SyntaxError` error messages for incompatible string / bytes
2+
prefixes.

Parser/lexer/lexer.c

Lines changed: 56 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,51 @@ tok_continuation_line(struct tok_state *tok) {
404404
return c;
405405
}
406406

407+
static int
408+
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
409+
int saw_b, int saw_r, int saw_u,
410+
int saw_f, int saw_t) {
411+
// Supported: rb, rf, rt (in any order)
412+
// Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
413+
414+
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2) \
415+
do { \
416+
(void)_PyTokenizer_syntaxerror_known_range( \
417+
tok, (int)(tok->start + 1 - tok->line_start), \
418+
(int)(tok->cur - tok->line_start), \
419+
"'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
420+
return -1; \
421+
} while (0)
422+
423+
if (saw_u && saw_b) {
424+
RETURN_SYNTAX_ERROR("u", "b");
425+
}
426+
if (saw_u && saw_r) {
427+
RETURN_SYNTAX_ERROR("u", "r");
428+
}
429+
if (saw_u && saw_f) {
430+
RETURN_SYNTAX_ERROR("u", "f");
431+
}
432+
if (saw_u && saw_t) {
433+
RETURN_SYNTAX_ERROR("u", "t");
434+
}
435+
436+
if (saw_b && saw_f) {
437+
RETURN_SYNTAX_ERROR("b", "f");
438+
}
439+
if (saw_b && saw_t) {
440+
RETURN_SYNTAX_ERROR("b", "t");
441+
}
442+
443+
if (saw_f && saw_t) {
444+
RETURN_SYNTAX_ERROR("f", "t");
445+
}
446+
447+
#undef RETURN_SYNTAX_ERROR
448+
449+
return 0;
450+
}
451+
407452
static int
408453
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
409454
{
@@ -648,40 +693,34 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
648693
/* Process the various legal combinations of b"", r"", u"", and f"". */
649694
int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
650695
while (1) {
651-
if (!(saw_b || saw_u || saw_f) && (c == 'b' || c == 'B'))
696+
if (!saw_b && (c == 'b' || c == 'B')) {
652697
saw_b = 1;
698+
}
653699
/* Since this is a backwards compatibility support literal we don't
654700
want to support it in arbitrary order like byte literals. */
655-
else if (!(saw_b || saw_u || saw_r || saw_f || saw_t)
656-
&& (c == 'u'|| c == 'U')) {
701+
else if (!saw_u && (c == 'u'|| c == 'U')) {
657702
saw_u = 1;
658703
}
659704
/* ur"" and ru"" are not supported */
660-
else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) {
705+
else if (!saw_r && (c == 'r' || c == 'R')) {
661706
saw_r = 1;
662707
}
663-
else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F')) {
708+
else if (!saw_f && (c == 'f' || c == 'F')) {
664709
saw_f = 1;
665710
}
666-
else if (!(saw_t || saw_u) && (c == 't' || c == 'T')) {
711+
else if (!saw_t && (c == 't' || c == 'T')) {
667712
saw_t = 1;
668713
}
669714
else {
670715
break;
671716
}
672717
c = tok_nextc(tok);
673718
if (c == '"' || c == '\'') {
674-
if (saw_b && saw_t) {
675-
return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
676-
tok, (int)(tok->start + 1 - tok->line_start),
677-
(int)(tok->cur - tok->line_start),
678-
"can't use 'b' and 't' string prefixes together"));
679-
}
680-
if (saw_f && saw_t) {
681-
return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
682-
tok, (int)(tok->start + 1 - tok->line_start),
683-
(int)(tok->cur - tok->line_start),
684-
"can't use 'f' and 't' string prefixes together"));
719+
// Raise error on incompatible string prefixes:
720+
int status = maybe_raise_syntax_error_for_string_prefixes(
721+
tok, saw_b, saw_r, saw_u, saw_f, saw_t);
722+
if (status < 0) {
723+
return MAKE_TOKEN(ERRORTOKEN);
685724
}
686725

687726
// Handle valid f or t string creation:

0 commit comments

Comments
 (0)