Skip to content

Commit 89a5e03

Browse files
authored
bpo-30003: Fix handling escape characters in HZ codec (#1556)
1 parent 15033d1 commit 89a5e03

File tree

3 files changed

+19
-13
lines changed

3 files changed

+19
-13
lines changed

Lib/test/test_codecencodings_cn.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase):
8686
(b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'),
8787
(b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'),
8888
(b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"),
89+
# issue 30003
90+
('ab~cd', 'strict', b'ab~~cd'), # escape ~
91+
(b'~{Dc~~:C~}', 'strict', None), # ~~ only in ASCII mode
92+
(b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode
8993
)
9094

9195
if __name__ == "__main__":

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,9 @@ Extension Modules
334334
Library
335335
-------
336336

337+
- bpo-30003: Fix handling escape characters in HZ codec. Based on patch
338+
by Ma Lin.
339+
337340
- bpo-30149: inspect.signature() now supports callables with
338341
variable-argument parameters wrapped with partialmethod.
339342
Patch by Dong-hee Na.

Modules/cjkcodecs/_codecs_cn.c

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -350,15 +350,17 @@ ENCODER(hz)
350350
DBCHAR code;
351351

352352
if (c < 0x80) {
353-
if (state->i == 0) {
354-
WRITEBYTE1((unsigned char)c);
355-
NEXT(1, 1);
356-
}
357-
else {
358-
WRITEBYTE3('~', '}', (unsigned char)c);
359-
NEXT(1, 3);
353+
if (state->i) {
354+
WRITEBYTE2('~', '}');
355+
NEXT_OUT(2);
360356
state->i = 0;
361357
}
358+
WRITEBYTE1((unsigned char)c);
359+
NEXT(1, 1);
360+
if (c == '~') {
361+
WRITEBYTE1('~');
362+
NEXT_OUT(1);
363+
}
362364
continue;
363365
}
364366

@@ -409,17 +411,14 @@ DECODER(hz)
409411
unsigned char c2 = INBYTE2;
410412

411413
REQUIRE_INBUF(2);
412-
if (c2 == '~') {
414+
if (c2 == '~' && state->i == 0)
413415
OUTCHAR('~');
414-
NEXT_IN(2);
415-
continue;
416-
}
417416
else if (c2 == '{' && state->i == 0)
418417
state->i = 1; /* set GB */
418+
else if (c2 == '\n' && state->i == 0)
419+
; /* line-continuation */
419420
else if (c2 == '}' && state->i == 1)
420421
state->i = 0; /* set ASCII */
421-
else if (c2 == '\n')
422-
; /* line-continuation */
423422
else
424423
return 1;
425424
NEXT_IN(2);

0 commit comments

Comments
 (0)