Skip to content

Commit 5ec6af6

Browse files
isidenticalpablogsal
authored andcommitted
gh-88116: Handle wide unicode characters in tracebacks
1 parent 9a9fba8 commit 5ec6af6

File tree

2 files changed

+98
-14
lines changed

2 files changed

+98
-14
lines changed

Lib/test/test_traceback.py

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -922,8 +922,63 @@ def f():
922922
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
923923
" callable()",
924924
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 4}, in f",
925-
" print(1, www(",
926-
" ^^^^",
925+
f" print(1, www(",
926+
f" ^^^^^^^",
927+
]
928+
self.assertEqual(actual, expected)
929+
930+
def test_byte_offset_with_wide_characters_term_highlight(self):
931+
def f():
932+
说明说明 = 1
933+
şçöğıĤellö = 0 # not wide but still non-ascii
934+
return 说明说明 / şçöğıĤellö
935+
936+
actual = self.get_exception(f)
937+
expected = [
938+
f"Traceback (most recent call last):",
939+
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
940+
f" callable()",
941+
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 3}, in f",
942+
f" return 说明说明 / şçöğıĤellö",
943+
f" ~~~~~~~~~^~~~~~~~~~~~",
944+
]
945+
self.assertEqual(actual, expected)
946+
947+
def test_byte_offset_with_emojis_term_highlight(self):
948+
def f():
949+
return "✨🐍" + func_说明说明("📗🚛",
950+
"📗🚛") + "🐍"
951+
952+
actual = self.get_exception(f)
953+
expected = [
954+
f"Traceback (most recent call last):",
955+
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
956+
f" callable()",
957+
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 1}, in f",
958+
f' return "✨🐍" + func_说明说明("📗🚛",',
959+
f" ^^^^^^^^^^^^^",
960+
]
961+
self.assertEqual(actual, expected)
962+
963+
def test_byte_offset_wide_chars_subscript(self):
964+
def f():
965+
my_dct = {
966+
"✨🚛✨": {
967+
"说明": {
968+
"🐍🐍🐍": None
969+
}
970+
}
971+
}
972+
return my_dct["✨🚛✨"]["说明"]["🐍"]["说明"]["🐍🐍"]
973+
974+
actual = self.get_exception(f)
975+
expected = [
976+
f"Traceback (most recent call last):",
977+
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
978+
f" callable()",
979+
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 8}, in f",
980+
f' return my_dct["✨🚛✨"]["说明"]["🐍"]["说明"]["🐍🐍"]',
981+
f" ~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^",
927982
]
928983
self.assertEqual(actual, expected)
929984

Lib/traceback.py

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -489,39 +489,49 @@ def format_frame_summary(self, frame_summary):
489489
stripped_line = frame_summary.line.strip()
490490
row.append(' {}\n'.format(stripped_line))
491491

492-
orig_line_len = len(frame_summary._original_line)
492+
line = frame_summary._original_line
493+
orig_line_len = len(line)
493494
frame_line_len = len(frame_summary.line.lstrip())
494495
stripped_characters = orig_line_len - frame_line_len
495496
if (
496497
frame_summary.colno is not None
497498
and frame_summary.end_colno is not None
498499
):
499500
start_offset = _byte_offset_to_character_offset(
500-
frame_summary._original_line, frame_summary.colno) + 1
501+
line, frame_summary.colno)
501502
end_offset = _byte_offset_to_character_offset(
502-
frame_summary._original_line, frame_summary.end_colno) + 1
503+
line, frame_summary.end_colno)
504+
code_segment = line[start_offset:end_offset]
503505

504506
anchors = None
505507
if frame_summary.lineno == frame_summary.end_lineno:
506508
with suppress(Exception):
507-
anchors = _extract_caret_anchors_from_line_segment(
508-
frame_summary._original_line[start_offset - 1:end_offset - 1]
509-
)
509+
anchors = _extract_caret_anchors_from_line_segment(code_segment)
510510
else:
511-
end_offset = stripped_characters + len(stripped_line)
511+
# Don't count the newline since the anchors only need to
512+
# go up until the last character of the line.
513+
end_offset = len(line.rstrip())
512514

513515
# show indicators if primary char doesn't span the frame line
514516
if end_offset - start_offset < len(stripped_line) or (
515517
anchors and anchors.right_start_offset - anchors.left_end_offset > 0):
518+
# When showing this on a terminal, some of the non-ASCII characters
519+
# might be rendered as double-width characters, so we need to take
520+
# that into account when calculating the length of the line.
521+
dp_start_offset = _display_width(line, start_offset) + 1
522+
dp_end_offset = _display_width(line, end_offset) + 1
523+
516524
row.append(' ')
517-
row.append(' ' * (start_offset - stripped_characters))
525+
row.append(' ' * (dp_start_offset - stripped_characters))
518526

519527
if anchors:
520-
row.append(anchors.primary_char * (anchors.left_end_offset))
521-
row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset))
522-
row.append(anchors.primary_char * (end_offset - start_offset - anchors.right_start_offset))
528+
dp_left_end_offset = _display_width(code_segment, anchors.left_end_offset)
529+
dp_right_start_offset = _display_width(code_segment, anchors.right_start_offset)
530+
row.append(anchors.primary_char * dp_left_end_offset)
531+
row.append(anchors.secondary_char * (dp_right_start_offset - dp_left_end_offset))
532+
row.append(anchors.primary_char * (dp_end_offset - dp_start_offset - dp_right_start_offset))
523533
else:
524-
row.append('^' * (end_offset - start_offset))
534+
row.append('^' * (dp_end_offset - dp_start_offset))
525535

526536
row.append('\n')
527537

@@ -642,6 +652,25 @@ def _extract_caret_anchors_from_line_segment(segment):
642652

643653
return None
644654

655+
_WIDE_CHAR_SPECIFIERS = "WF"
656+
657+
def _display_width(line, offset):
658+
"""Calculate the extra amount of width space the given source
659+
code segment might take if it were to be displayed on a fixed
660+
width output device. Supports wide unicode characters and emojis."""
661+
662+
# Fast track for ASCII-only strings
663+
if line.isascii():
664+
return offset
665+
666+
import unicodedata
667+
668+
return sum(
669+
2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1
670+
for char in line[:offset]
671+
)
672+
673+
645674

646675
class _ExceptionPrintContext:
647676
def __init__(self):

0 commit comments

Comments
 (0)