From 27937581e7b551772823da2c3954ca2d07000fb3 Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <hugovk@users.noreply.github.com>
Date: Tue, 9 Nov 2021 15:28:07 +0200
Subject: [PATCH 1/5] Add python_requires to help pip

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 743abe43..538f6cb8 100644
--- a/setup.py
+++ b/setup.py
@@ -13,6 +13,7 @@
     include_package_data=True,
     zip_safe=False,
     platforms=["Any"],
+    python_requires=">=3.6",
     classifiers=[
         "Development Status :: 5 - Production/Stable",
         "License :: OSI Approved :: BSD License",

From 313a723ec01cb2023bda51b6cde72df8976a17b7 Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <hugovk@users.noreply.github.com>
Date: Tue, 9 Nov 2021 15:26:26 +0200
Subject: [PATCH 2/5] Remove redundant code for EOL Python 2.7 and 3.5

---
 setup.cfg         | 2 --
 tox.ini           | 2 +-
 w3lib/encoding.py | 5 +----
 3 files changed, 2 insertions(+), 7 deletions(-)
 delete mode 100644 setup.cfg

diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 2a9acf13..00000000
--- a/setup.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-[bdist_wheel]
-universal = 1
diff --git a/tox.ini b/tox.ini
index 4e8e4767..20883f68 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,7 +4,7 @@
 # and then run "tox" from this directory.
 
 [tox]
-envlist = py27, pypy, py35, py36, py37, py38, pypy3, docs, security, flake8, pylint, black
+envlist = py36, py37, py38, pypy3, docs, security, flake8, pylint, black
 
 [testenv]
 deps =
diff --git a/w3lib/encoding.py b/w3lib/encoding.py
index 32252105..74034adf 100644
--- a/w3lib/encoding.py
+++ b/w3lib/encoding.py
@@ -2,7 +2,6 @@
 Functions for handling encoding of web pages
 """
 import re, codecs, encodings
-from sys import version_info
 from typing import Callable, Match, Optional, Tuple, Union, cast
 from w3lib._types import AnyUnicodeError, StrOrBytes
 from w3lib.util import to_native_str
@@ -208,9 +207,7 @@ def to_unicode(data_str: bytes, encoding: str) -> str:
     Characters that cannot be converted will be converted to ``\\ufffd`` (the
     unicode replacement character).
     """
-    return data_str.decode(
-        encoding, "replace" if version_info[0:2] >= (3, 3) else "w3lib_replace"
-    )
+    return data_str.decode(encoding, "replace")
 
 
 def html_to_unicode(

From e22a503a412088c46deb2e1838d6d284b01804c1 Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <hugovk@users.noreply.github.com>
Date: Tue, 9 Nov 2021 15:49:36 +0200
Subject: [PATCH 3/5] Fix DeprecationWarning: The w3lib.utils.to_native_str
 function is deprecated and will be removed in a future release. Please use
 w3lib.utils.to_unicode instead.

---
 w3lib/encoding.py | 4 ++--
 w3lib/http.py     | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/w3lib/encoding.py b/w3lib/encoding.py
index 74034adf..8a3adbfd 100644
--- a/w3lib/encoding.py
+++ b/w3lib/encoding.py
@@ -4,7 +4,7 @@
 import re, codecs, encodings
 from typing import Callable, Match, Optional, Tuple, Union, cast
 from w3lib._types import AnyUnicodeError, StrOrBytes
-from w3lib.util import to_native_str
+import w3lib.util
 
 _HEADER_ENCODING_RE = re.compile(r"charset=([\w-]+)", re.I)
 
@@ -92,7 +92,7 @@ def html_body_declared_encoding(html_body_str: StrOrBytes) -> Optional[str]:
             or match.group("xmlcharset")
         )
         if encoding:
-            return resolve_encoding(to_native_str(encoding))
+            return resolve_encoding(w3lib.util.to_unicode(encoding))
 
     return None
 
diff --git a/w3lib/http.py b/w3lib/http.py
index 4ea31fad..bf87330b 100644
--- a/w3lib/http.py
+++ b/w3lib/http.py
@@ -1,6 +1,6 @@
 from base64 import urlsafe_b64encode
 from typing import Any, List, MutableMapping, Optional, AnyStr, Sequence, Union, Mapping
-from w3lib.util import to_bytes, to_native_str
+from w3lib.util import to_bytes, to_unicode
 
 HeadersDictInput = Mapping[bytes, Union[Any, Sequence]]
 HeadersDictOutput = MutableMapping[bytes, List[bytes]]
@@ -97,7 +97,7 @@ def basic_auth_header(
 
     """
 
-    auth = "%s:%s" % (to_native_str(username), to_native_str(password))
+    auth = "%s:%s" % (to_unicode(username), to_unicode(password))
     # XXX: RFC 2617 doesn't define encoding, but ISO-8859-1
     # seems to be the most widely used encoding here. See also:
     # http://greenbytes.de/tech/webdav/draft-ietf-httpauth-basicauth-enc-latest.html

From 09d9ce249b4a3e43a5e3a718868c038bb8ef6a1f Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <hugovk@users.noreply.github.com>
Date: Tue, 9 Nov 2021 15:55:12 +0200
Subject: [PATCH 4/5] Upgrade Python syntax with pyupgrade --py36-plus

---
 tests/test_encoding.py | 2 +-
 tests/test_url.py      | 2 +-
 w3lib/encoding.py      | 2 +-
 w3lib/html.py          | 4 +---
 w3lib/http.py          | 2 +-
 5 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/tests/test_encoding.py b/tests/test_encoding.py
index 33d7f110..dfda2032 100644
--- a/tests/test_encoding.py
+++ b/tests/test_encoding.py
@@ -149,7 +149,7 @@ def _assert_encoding(self, content_type, body, expected_encoding, expected_unico
         else:
             self.assertTrue(
                 body_unicode in expected_unicode,
-                "%s is not in %s" % (body_unicode, expected_unicode),
+                f"{body_unicode} is not in {expected_unicode}",
             )
 
     def test_content_type_and_conversion(self):
diff --git a/tests/test_url.py b/tests/test_url.py
index fe9ee999..b1299dee 100644
--- a/tests/test_url.py
+++ b/tests/test_url.py
@@ -1033,7 +1033,7 @@ def test_bytes_uri(self):
 
     def test_unicode_uri(self):
         result = parse_data_uri("data:,é")
-        self.assertEqual(result.data, "é".encode("utf-8"))
+        self.assertEqual(result.data, "é".encode())
 
     def test_default_mediatype(self):
         result = parse_data_uri("data:;charset=iso-8859-7,%be%d3%be")
diff --git a/w3lib/encoding.py b/w3lib/encoding.py
index 8a3adbfd..84c0e28b 100644
--- a/w3lib/encoding.py
+++ b/w3lib/encoding.py
@@ -162,7 +162,7 @@ def resolve_encoding(encoding_alias: str) -> Optional[str]:
     (codecs.BOM_UTF16_LE, "utf-16-le"),
     (codecs.BOM_UTF8, "utf-8"),
 ]
-_FIRST_CHARS = set(c[0] for (c, _) in _BOM_TABLE)
+_FIRST_CHARS = {c[0] for (c, _) in _BOM_TABLE}
 
 
 def read_bom(data: bytes) -> Union[Tuple[None, None], Tuple[str, bytes]]:
diff --git a/w3lib/html.py b/w3lib/html.py
index 634d90f5..8c5c32de 100644
--- a/w3lib/html.py
+++ b/w3lib/html.py
@@ -228,9 +228,7 @@ def remove_tags_with_content(
 
     utext = to_unicode(text, encoding)
     if which_ones:
-        tags = "|".join(
-            [r"<%s\b.*?</%s>|<%s\s*/>" % (tag, tag, tag) for tag in which_ones]
-        )
+        tags = "|".join([fr"<{tag}\b.*?</{tag}>|<{tag}\s*/>" for tag in which_ones])
         retags = re.compile(tags, re.DOTALL | re.IGNORECASE)
         utext = retags.sub("", utext)
     return utext
diff --git a/w3lib/http.py b/w3lib/http.py
index bf87330b..e14e4345 100644
--- a/w3lib/http.py
+++ b/w3lib/http.py
@@ -97,7 +97,7 @@ def basic_auth_header(
 
     """
 
-    auth = "%s:%s" % (to_unicode(username), to_unicode(password))
+    auth = f"{to_unicode(username)}:{to_unicode(password)}"
     # XXX: RFC 2617 doesn't define encoding, but ISO-8859-1
     # seems to be the most widely used encoding here. See also:
     # http://greenbytes.de/tech/webdav/draft-ietf-httpauth-basicauth-enc-latest.html

From f4ab57a18c92f5395a8e9075ca58201cf8095dfb Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <hugovk@users.noreply.github.com>
Date: Tue, 9 Nov 2021 16:23:09 +0200
Subject: [PATCH 5/5] Fix pylint

---
 tests/test_html.py |  2 +-
 tests/test_url.py  | 16 ++++------------
 w3lib/encoding.py  |  1 +
 w3lib/url.py       |  4 +++-
 4 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/tests/test_html.py b/tests/test_html.py
index f6ca90d2..d61a15ca 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -124,7 +124,7 @@ def test_missing_semicolon(self):
         ):
             self.assertEqual(replace_entities(entity, encoding="cp1252"), result)
             self.assertEqual(
-                replace_entities("x%sy" % entity, encoding="cp1252"), "x%sy" % result
+                replace_entities(f"x{entity}y", encoding="cp1252"), f"x{result}y"
             )
 
     def test_encoding(self):
diff --git a/tests/test_url.py b/tests/test_url.py
index b1299dee..f721bd62 100644
--- a/tests/test_url.py
+++ b/tests/test_url.py
@@ -266,12 +266,8 @@ def test_safe_url_idna_encoding_failure(self):
 
         # DNS label too long
         self.assertEqual(
-            safe_url_string(
-                "http://www.{label}.com/résumé?q=résumé".format(label="example" * 11)
-            ),
-            "http://www.{label}.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9".format(
-                label="example" * 11
-            ),
+            safe_url_string(f"http://www.{'example' * 11}.com/résumé?q=résumé"),
+            f"http://www.{'example' * 11}.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9",
         )
 
     def test_safe_url_port_number(self):
@@ -971,12 +967,8 @@ def test_canonicalize_url_idna_exceptions(self):
 
         # DNS label too long
         self.assertEqual(
-            canonicalize_url(
-                "http://www.{label}.com/résumé?q=résumé".format(label="example" * 11)
-            ),
-            "http://www.{label}.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9".format(
-                label="example" * 11
-            ),
+            canonicalize_url(f"http://www.{'example' * 11}.com/résumé?q=résumé"),
+            f"http://www.{'example' * 11}.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9",
         )
 
     def test_preserve_nonfragment_hash(self):
diff --git a/w3lib/encoding.py b/w3lib/encoding.py
index 84c0e28b..86b678be 100644
--- a/w3lib/encoding.py
+++ b/w3lib/encoding.py
@@ -45,6 +45,7 @@ def http_content_type_encoding(content_type: Optional[str]) -> Optional[str]:
 _XML_ENCODING_RE = _TEMPLATE % ("encoding", r"(?P<xmlcharset>[\w-]+)")
 
 # check for meta tags, or xml decl. and stop search if a body tag is encountered
+# pylint: disable=consider-using-f-string
 _BODY_ENCODING_PATTERN = (
     r"<\s*(?:meta%s(?:(?:\s+%s|\s+%s){2}|\s+%s)|\?xml\s[^>]+%s|body)"
     % (_SKIP_ATTRS, _HTTPEQUIV_RE, _CONTENT_RE, _CONTENT2_RE, _XML_ENCODING_RE)
diff --git a/w3lib/url.py b/w3lib/url.py
index 71398516..0592a8bf 100644
--- a/w3lib/url.py
+++ b/w3lib/url.py
@@ -319,7 +319,7 @@ def path_to_file_uri(path: str) -> str:
     x = pathname2url(os.path.abspath(path))
     if os.name == "nt":
         x = x.replace("|", ":")  # http://bugs.python.org/issue5861
-    return "file:///%s" % x.lstrip("/")
+    return f"file:///{x.lstrip('/')}"
 
 
 def file_uri_to_path(uri: str) -> str:
@@ -344,6 +344,7 @@ def any_to_uri(uri_or_path: str) -> str:
 _char = set(map(chr, range(127)))
 
 # RFC 2045 token.
+# pylint: disable=consider-using-f-string
 _token = r"[{}]+".format(
     re.escape(
         "".join(
@@ -359,6 +360,7 @@ def any_to_uri(uri_or_path: str) -> str:
 )
 
 # RFC 822 quoted-string, without surrounding quotation marks.
+# pylint: disable=consider-using-f-string
 _quoted_string = r"(?:[{}]|(?:\\[{}]))*".format(
     re.escape("".join(_char - {'"', "\\", "\r"})), re.escape("".join(_char))
 )