diff --git a/tests/test_html.py b/tests/test_html.py
index d4861ba7..1e637b0f 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -372,6 +372,30 @@ def test_get_base_url(self):
get_base_url(text, baseurl.encode("ascii")), "http://example.org/something"
)
+ def test_base_url_in_comment(self):
+ self.assertEqual(
+ get_base_url(""""""), ""
+ )
+ self.assertEqual(
+ get_base_url(""" """
+ ),
+ "http://example_2.com/",
+ )
+
+ self.assertEqual(
+ get_base_url(
+ """ """
+ ),
+ "http://example_3.com/",
+ )
+
def test_relative_url_with_absolute_path(self):
baseurl = "https://example.org"
text = """\
diff --git a/w3lib/html.py b/w3lib/html.py
index a4be0542..a31d42bd 100644
--- a/w3lib/html.py
+++ b/w3lib/html.py
@@ -311,7 +311,7 @@ def get_base_url(
"""
- utext = to_unicode(text, encoding)
+ utext: str = remove_comments(text, encoding=encoding)
m = _baseurl_re.search(utext)
if m:
return urljoin(