8
8
import posixpath
9
9
import re
10
10
import string
11
+ from inspect import getfullargspec
11
12
from typing import (
12
13
cast ,
13
14
Callable ,
20
21
Union ,
21
22
)
22
23
from urllib .parse import (
23
- parse_qs ,
24
- parse_qsl ,
24
+ parse_qs as _parse_qs ,
25
+ parse_qsl as _parse_qsl ,
25
26
ParseResult ,
26
27
quote ,
27
28
unquote_to_bytes ,
41
42
from ._url import _SPECIAL_SCHEMES
42
43
43
44
45
+ _REMOVE_SEPARATOR = 'separator' not in getfullargspec (_parse_qs )[0 ]
46
+
47
+
48
+ def _handle_separator (func , * args , ** kwargs ):
49
+ if _REMOVE_SEPARATOR :
50
+ kwargs .pop ('separator' , None )
51
+ return func (* args , ** kwargs )
52
+
53
+
54
+ def parse_qs (* args , ** kwargs ):
55
+ return _handle_separator (_parse_qs , * args , ** kwargs )
56
+
57
+
58
+ def parse_qsl (* args , ** kwargs ):
59
+ return _handle_separator (_parse_qsl , * args , ** kwargs )
60
+
61
+
44
62
# error handling function for bytes-to-Unicode decoding errors with URLs
45
63
def _quote_byte (error : UnicodeError ) -> Tuple [str , int ]:
46
64
error = cast (AnyUnicodeError , error )
@@ -200,6 +218,8 @@ def url_query_parameter(
200
218
parameter : str ,
201
219
default : Optional [str ] = None ,
202
220
keep_blank_values : Union [bool , int ] = 0 ,
221
+ * ,
222
+ separator : str = '&' ,
203
223
) -> Optional [str ]:
204
224
"""Return the value of a url parameter, given the url and parameter name
205
225
@@ -230,7 +250,9 @@ def url_query_parameter(
230
250
"""
231
251
232
252
queryparams = parse_qs (
233
- urlsplit (str (url ))[3 ], keep_blank_values = bool (keep_blank_values )
253
+ urlsplit (str (url ))[3 ],
254
+ keep_blank_values = bool (keep_blank_values ),
255
+ separator = separator ,
234
256
)
235
257
if parameter in queryparams :
236
258
return queryparams [parameter ][0 ]
@@ -305,9 +327,13 @@ def url_query_cleaner(
305
327
return url
306
328
307
329
308
- def _add_or_replace_parameters (url : str , params : Dict [str , str ]) -> str :
330
+ def _add_or_replace_parameters (url : str , params : Dict [str , str ], * , separator : str = '&' ) -> str :
309
331
parsed = urlsplit (url )
310
- current_args = parse_qsl (parsed .query , keep_blank_values = True )
332
+ current_args = parse_qsl (
333
+ parsed .query ,
334
+ keep_blank_values = True ,
335
+ separator = separator ,
336
+ )
311
337
312
338
new_args = []
313
339
seen_params = set ()
@@ -327,7 +353,7 @@ def _add_or_replace_parameters(url: str, params: Dict[str, str]) -> str:
327
353
return urlunsplit (parsed ._replace (query = query ))
328
354
329
355
330
- def add_or_replace_parameter (url : str , name : str , new_value : str ) -> str :
356
+ def add_or_replace_parameter (url : str , name : str , new_value : str , * , separator : str = '&' ) -> str :
331
357
"""Add or remove a parameter to a given url
332
358
333
359
>>> import w3lib.url
@@ -340,10 +366,10 @@ def add_or_replace_parameter(url: str, name: str, new_value: str) -> str:
340
366
>>>
341
367
342
368
"""
343
- return _add_or_replace_parameters (url , {name : new_value })
369
+ return _add_or_replace_parameters (url , {name : new_value }, separator = separator )
344
370
345
371
346
- def add_or_replace_parameters (url : str , new_parameters : Dict [str , str ]) -> str :
372
+ def add_or_replace_parameters (url : str , new_parameters : Dict [str , str ], * , separator : str = '&' ) -> str :
347
373
"""Add or remove a parameters to a given url
348
374
349
375
>>> import w3lib.url
@@ -355,7 +381,7 @@ def add_or_replace_parameters(url: str, new_parameters: Dict[str, str]) -> str:
355
381
>>>
356
382
357
383
"""
358
- return _add_or_replace_parameters (url , new_parameters )
384
+ return _add_or_replace_parameters (url , new_parameters , separator = separator )
359
385
360
386
361
387
def path_to_file_uri (path : str ) -> str :
@@ -528,6 +554,8 @@ def canonicalize_url(
528
554
keep_blank_values : bool = True ,
529
555
keep_fragments : bool = False ,
530
556
encoding : Optional [str ] = None ,
557
+ * ,
558
+ query_separator : str = '&'
531
559
) -> str :
532
560
r"""Canonicalize the given url by applying the following procedures:
533
561
@@ -600,7 +628,11 @@ def canonicalize_url(
600
628
# Similar considerations apply to query parts. The functionality of
601
629
# IRIs (namely, to be able to include non-ASCII characters) can only be
602
630
# used if the query part is encoded in UTF-8.
603
- keyvals = parse_qsl_to_bytes (query , keep_blank_values )
631
+ keyvals = parse_qsl_to_bytes (
632
+ query ,
633
+ keep_blank_values ,
634
+ separator = query_separator ,
635
+ )
604
636
605
637
keyvals .sort ()
606
638
query = urlencode (keyvals )
@@ -642,7 +674,10 @@ def parse_url(
642
674
643
675
644
676
def parse_qsl_to_bytes (
645
- qs : str , keep_blank_values : bool = False
677
+ qs : str ,
678
+ keep_blank_values : bool = False ,
679
+ * ,
680
+ separator : str = '&' ,
646
681
) -> List [Tuple [bytes , bytes ]]:
647
682
"""Parse a query given as a string argument.
648
683
@@ -665,7 +700,7 @@ def parse_qsl_to_bytes(
665
700
# with unquote_to_bytes(s)
666
701
coerce_args = cast (Callable [..., Tuple [str , Callable [..., bytes ]]], _coerce_args )
667
702
qs , _coerce_result = coerce_args (qs )
668
- pairs = [ s2 for s1 in qs .split ("&" ) for s2 in s1 . split ( ";" )]
703
+ pairs = qs .split (separator )
669
704
r = []
670
705
for name_value in pairs :
671
706
if not name_value :
0 commit comments