Skip to content

Commit 88f32ae

Browse files
committed
Use urllib.parse.unquote
1 parent 9daca87 commit 88f32ae

File tree

1 file changed

+2
-41
lines changed

1 file changed

+2
-41
lines changed

w3lib/_url.py

Lines changed: 2 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
# https://url.spec.whatwg.org/
44

5-
from itertools import chain
65
from math import floor
76
from typing import List, Optional, Tuple, Union
7+
from urllib.parse import unquote
88

99
from . import _utr46
1010
from ._encoding import (
@@ -277,45 +277,6 @@ def _parse_opaque_host(input: str) -> str:
277277
return _utf_8_percent_encode(input, _C0_CONTROL_PERCENT_ENCODE_SET)
278278

279279

280-
_ASCII_HEX_BYTES = tuple(
281-
chain(
282-
range(0x30, 0x39 + 1),
283-
range(0x41, 0x46 + 1),
284-
range(0x61, 0x66 + 1),
285-
)
286-
)
287-
288-
289-
# https://url.spec.whatwg.org/commit-snapshots/a46cb9188a48c2c9d80ba32a9b1891652d6b4900/#percent-decode
290-
def _percent_decode(input: bytes) -> bytes:
291-
output = b""
292-
pointer = 0
293-
input_length = len(input)
294-
while pointer < input_length:
295-
byte = input[pointer]
296-
if byte != 0x25 or (
297-
byte == 0x25
298-
and (
299-
pointer + 2 >= input_length
300-
or input[pointer + 1] not in _ASCII_HEX_BYTES
301-
or input[pointer + 2] not in _ASCII_HEX_BYTES
302-
)
303-
):
304-
output += b"%c" % byte
305-
else:
306-
byte_hex = b"%c%c" % (input[pointer + 1], input[pointer + 2])
307-
byte_point = int(byte_hex, base=16)
308-
output += b"%c" % byte_point
309-
pointer += 2
310-
pointer += 1
311-
return output
312-
313-
314-
# https://url.spec.whatwg.org/commit-snapshots/a46cb9188a48c2c9d80ba32a9b1891652d6b4900/#string-percent-decode
315-
def _percent_decode_string(input: str) -> bytes:
316-
return _percent_decode(input.encode())
317-
318-
319280
# https://url.spec.whatwg.org/commit-snapshots/a46cb9188a48c2c9d80ba32a9b1891652d6b4900/#ipv4-number-parser
320281
def _parse_ipv4_number(input: str) -> Tuple[int, bool]:
321282
if not input:
@@ -407,7 +368,7 @@ def _parse_host(
407368
return _parse_ipv6(input[1:-1])
408369
if not is_special:
409370
return _parse_opaque_host(input)
410-
domain = _percent_decode_string(input).decode()
371+
domain = unquote(input)
411372
ascii_domain = _domain_to_ascii(domain)
412373
for code_point in ascii_domain:
413374
if code_point in _FORBIDDEN_DOMAIN_CODE_POINTS:

0 commit comments

Comments
 (0)