From 45ac2150a0993603023eb854b6c81a937fa870b6 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 25 Jul 2017 12:33:32 +0200 Subject: [PATCH 1/2] Update test_urlparse from Python 3.4 Keep test_main() and remove subTest() (introduced in Python 3.4). --- Lib/test/test_urlparse.py | 220 ++++++++++++++++++++++++++++++-------- 1 file changed, 173 insertions(+), 47 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index d67cf258b02123..e811123deb191a 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -664,6 +664,45 @@ def test_anyscheme(self): self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"), (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'')) + def test_default_scheme(self): + # Exercise the scheme parameter of urlparse() and urlsplit() + for func in (urllib.parse.urlparse, urllib.parse.urlsplit): + result = func("http://example.net/", "ftp") + self.assertEqual(result.scheme, "http") + result = func(b"http://example.net/", b"ftp") + self.assertEqual(result.scheme, b"http") + self.assertEqual(func("path", "ftp").scheme, "ftp") + self.assertEqual(func("path", scheme="ftp").scheme, "ftp") + self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp") + self.assertEqual(func("path").scheme, "") + self.assertEqual(func(b"path").scheme, b"") + self.assertEqual(func(b"path", "").scheme, b"") + + def test_parse_fragments(self): + # Exercise the allow_fragments parameter of urlparse() and urlsplit() + tests = ( + ("http:#frag", "path"), + ("//example.net#frag", "path"), + ("index.html#frag", "path"), + (";a=b#frag", "params"), + ("?a=b#frag", "query"), + ("#frag", "path"), + ) + for url, attr in tests: + for func in (urllib.parse.urlparse, urllib.parse.urlsplit): + if attr == "params" and func is urllib.parse.urlsplit: + attr = "path" + result = func(url, allow_fragments=False) + self.assertEqual(result.fragment, "") + self.assertTrue(getattr(result, attr).endswith("#frag")) + self.assertEqual(func(url, "", False).fragment, "") + + result = func(url, allow_fragments=True) + self.assertEqual(result.fragment, "frag") + self.assertFalse(getattr(result, attr).endswith("frag")) + self.assertEqual(func(url, "", True).fragment, "frag") + self.assertEqual(func(url).fragment, "frag") + def test_mixed_types_rejected(self): # Several functions that process either strings or ASCII encoded bytes # accept multiple arguments. Check they reject mixed type input @@ -749,52 +788,6 @@ def test_parse_qsl_encoding(self): errors="ignore") self.assertEqual(result, [('key', '\u0141-')]) - def test_splitport(self): - splitport = urllib.parse.splitport - self.assertEqual(splitport('parrot:88'), ('parrot', '88')) - self.assertEqual(splitport('parrot'), ('parrot', None)) - self.assertEqual(splitport('parrot:'), ('parrot', None)) - self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None)) - self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None)) - - def test_splitnport(self): - splitnport = urllib.parse.splitnport - self.assertEqual(splitnport('parrot:88'), ('parrot', 88)) - self.assertEqual(splitnport('parrot'), ('parrot', -1)) - self.assertEqual(splitnport('parrot', 55), ('parrot', 55)) - self.assertEqual(splitnport('parrot:'), ('parrot', -1)) - self.assertEqual(splitnport('parrot:', 55), ('parrot', 55)) - self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1)) - self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55)) - self.assertEqual(splitnport('parrot:cheese'), ('parrot', None)) - self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None)) - - def test_splitquery(self): - # Normal cases are exercised by other tests; ensure that we also - # catch cases with no port specified (testcase ensuring coverage) - result = urllib.parse.splitquery('http://python.org/fake?foo=bar') - self.assertEqual(result, ('http://python.org/fake', 'foo=bar')) - result = urllib.parse.splitquery('http://python.org/fake?foo=bar?') - self.assertEqual(result, ('http://python.org/fake?foo=bar', '')) - result = urllib.parse.splitquery('http://python.org/fake') - self.assertEqual(result, ('http://python.org/fake', None)) - - def test_splitvalue(self): - # Normal cases are exercised by other tests; test pathological cases - # with no key/value pairs. (testcase ensuring coverage) - result = urllib.parse.splitvalue('foo=bar') - self.assertEqual(result, ('foo', 'bar')) - result = urllib.parse.splitvalue('foo=') - self.assertEqual(result, ('foo', '')) - result = urllib.parse.splitvalue('foobar') - self.assertEqual(result, ('foobar', None)) - - def test_to_bytes(self): - result = urllib.parse.to_bytes('http://www.python.org') - self.assertEqual(result, 'http://www.python.org') - self.assertRaises(UnicodeError, urllib.parse.to_bytes, - 'http://www.python.org/medi\u00e6val') - def test_urlencode_sequences(self): # Other tests incidentally urlencode things; test non-covered cases: # Sequence and object values. @@ -863,9 +856,142 @@ def test_telurl_params(self): self.assertEqual(p1.path, '863-1234') self.assertEqual(p1.params, 'phone-context=+1-914-555') + def test_Quoter_repr(self): + quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE) + self.assertIn('Quoter', repr(quoter)) + + +class Utility_Tests(unittest.TestCase): + """Testcase to test the various utility functions in the urllib.""" + # In Python 2 this test class was in test_urllib. + + def test_splittype(self): + splittype = urllib.parse.splittype + self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring')) + self.assertEqual(splittype('opaquestring'), (None, 'opaquestring')) + self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring')) + self.assertEqual(splittype('type:'), ('type', '')) + self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string')) + + def test_splithost(self): + splithost = urllib.parse.splithost + self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'), + ('www.example.org:80', '/foo/bar/baz.html')) + self.assertEqual(splithost('//www.example.org:80'), + ('www.example.org:80', '')) + self.assertEqual(splithost('/foo/bar/baz.html'), + (None, '/foo/bar/baz.html')) + + def test_splituser(self): + splituser = urllib.parse.splituser + self.assertEqual(splituser('User:Pass@www.python.org:080'), + ('User:Pass', 'www.python.org:080')) + self.assertEqual(splituser('@www.python.org:080'), + ('', 'www.python.org:080')) + self.assertEqual(splituser('www.python.org:080'), + (None, 'www.python.org:080')) + self.assertEqual(splituser('User:Pass@'), + ('User:Pass', '')) + self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'), + ('User@example.com:Pass', 'www.python.org:080')) + + def test_splitpasswd(self): + # Some of the password examples are not sensible, but it is added to + # confirming to RFC2617 and addressing issue4675. + splitpasswd = urllib.parse.splitpasswd + self.assertEqual(splitpasswd('user:ab'), ('user', 'ab')) + self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb')) + self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb')) + self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb')) + self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb')) + self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb')) + self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b')) + self.assertEqual(splitpasswd('user:a b'), ('user', 'a b')) + self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab')) + self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b')) + self.assertEqual(splitpasswd('user:'), ('user', '')) + self.assertEqual(splitpasswd('user'), ('user', None)) + self.assertEqual(splitpasswd(':ab'), ('', 'ab')) + + def test_splitport(self): + splitport = urllib.parse.splitport + self.assertEqual(splitport('parrot:88'), ('parrot', '88')) + self.assertEqual(splitport('parrot'), ('parrot', None)) + self.assertEqual(splitport('parrot:'), ('parrot', None)) + self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None)) + self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None)) + self.assertEqual(splitport('[::1]:88'), ('[::1]', '88')) + self.assertEqual(splitport('[::1]'), ('[::1]', None)) + self.assertEqual(splitport(':88'), ('', '88')) + + def test_splitnport(self): + splitnport = urllib.parse.splitnport + self.assertEqual(splitnport('parrot:88'), ('parrot', 88)) + self.assertEqual(splitnport('parrot'), ('parrot', -1)) + self.assertEqual(splitnport('parrot', 55), ('parrot', 55)) + self.assertEqual(splitnport('parrot:'), ('parrot', -1)) + self.assertEqual(splitnport('parrot:', 55), ('parrot', 55)) + self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1)) + self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55)) + self.assertEqual(splitnport('parrot:cheese'), ('parrot', None)) + self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None)) + + def test_splitquery(self): + # Normal cases are exercised by other tests; ensure that we also + # catch cases with no port specified (testcase ensuring coverage) + splitquery = urllib.parse.splitquery + self.assertEqual(splitquery('http://python.org/fake?foo=bar'), + ('http://python.org/fake', 'foo=bar')) + self.assertEqual(splitquery('http://python.org/fake?foo=bar?'), + ('http://python.org/fake?foo=bar', '')) + self.assertEqual(splitquery('http://python.org/fake'), + ('http://python.org/fake', None)) + self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar')) + + def test_splittag(self): + splittag = urllib.parse.splittag + self.assertEqual(splittag('http://example.com?foo=bar#baz'), + ('http://example.com?foo=bar', 'baz')) + self.assertEqual(splittag('http://example.com?foo=bar#'), + ('http://example.com?foo=bar', '')) + self.assertEqual(splittag('#baz'), ('', 'baz')) + self.assertEqual(splittag('http://example.com?foo=bar'), + ('http://example.com?foo=bar', None)) + self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'), + ('http://example.com?foo=bar#baz', 'boo')) + + def test_splitattr(self): + splitattr = urllib.parse.splitattr + self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'), + ('/path', ['attr1=value1', 'attr2=value2'])) + self.assertEqual(splitattr('/path;'), ('/path', [''])) + self.assertEqual(splitattr(';attr1=value1;attr2=value2'), + ('', ['attr1=value1', 'attr2=value2'])) + self.assertEqual(splitattr('/path'), ('/path', [])) + + def test_splitvalue(self): + # Normal cases are exercised by other tests; test pathological cases + # with no key/value pairs. (testcase ensuring coverage) + splitvalue = urllib.parse.splitvalue + self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar')) + self.assertEqual(splitvalue('foo='), ('foo', '')) + self.assertEqual(splitvalue('=bar'), ('', 'bar')) + self.assertEqual(splitvalue('foobar'), ('foobar', None)) + self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz')) + + def test_to_bytes(self): + result = urllib.parse.to_bytes('http://www.python.org') + self.assertEqual(result, 'http://www.python.org') + self.assertRaises(UnicodeError, urllib.parse.to_bytes, + 'http://www.python.org/medi\u00e6val') + + def test_unwrap(self): + url = urllib.parse.unwrap('') + self.assertEqual(url, 'type://host/path') + def test_main(): - support.run_unittest(UrlParseTestCase) + support.run_unittest(UrlParseTestCase, Utility_Tests) if __name__ == "__main__": test_main() From eebcbd815345809c10cd3a72c811a53e80ca0610 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 12 Jul 2017 14:51:46 +0200 Subject: [PATCH 2/2] bpo-30500: urllib: Simplify splithost by calling into urlparse. (#1849) (#2291) The current regex based splitting produces a wrong result. For example:: http://abc#@def Web browsers parse that URL as ``http://abc/#@def``, that is, the host is ``abc``, the path is ``/``, and the fragment is ``#@def``. (cherry picked from commit 90e01e50ef8a9e6c91f30d965563c378a4ad26de) (cherry picked from commit cc54c1c0d2d05fe7404ba64c53df4b1352ed2262) --- Lib/test/test_urlparse.py | 51 ++++++++++++++----- Lib/urllib/parse.py | 8 ++- Misc/ACKS | 1 + .../2017-07-11-22-02-51.bpo-30500.wXUrkQ.rst | 4 ++ 4 files changed, 47 insertions(+), 17 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2017-07-11-22-02-51.bpo-30500.wXUrkQ.rst diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index e811123deb191a..af637ffd78ab98 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -681,27 +681,34 @@ def test_default_scheme(self): def test_parse_fragments(self): # Exercise the allow_fragments parameter of urlparse() and urlsplit() tests = ( - ("http:#frag", "path"), - ("//example.net#frag", "path"), - ("index.html#frag", "path"), - (";a=b#frag", "params"), - ("?a=b#frag", "query"), - ("#frag", "path"), + ("http:#frag", "path", "frag"), + ("//example.net#frag", "path", "frag"), + ("index.html#frag", "path", "frag"), + (";a=b#frag", "params", "frag"), + ("?a=b#frag", "query", "frag"), + ("#frag", "path", "frag"), + ("abc#@frag", "path", "@frag"), + ("//abc#@frag", "path", "@frag"), + ("//abc:80#@frag", "path", "@frag"), + ("//abc#@frag:80", "path", "@frag:80"), ) - for url, attr in tests: + for url, attr, expected_frag in tests: for func in (urllib.parse.urlparse, urllib.parse.urlsplit): if attr == "params" and func is urllib.parse.urlsplit: attr = "path" result = func(url, allow_fragments=False) self.assertEqual(result.fragment, "") - self.assertTrue(getattr(result, attr).endswith("#frag")) + self.assertTrue( + getattr(result, attr).endswith("#" + expected_frag)) self.assertEqual(func(url, "", False).fragment, "") result = func(url, allow_fragments=True) - self.assertEqual(result.fragment, "frag") - self.assertFalse(getattr(result, attr).endswith("frag")) - self.assertEqual(func(url, "", True).fragment, "frag") - self.assertEqual(func(url).fragment, "frag") + self.assertEqual(result.fragment, expected_frag) + self.assertFalse( + getattr(result, attr).endswith(expected_frag)) + self.assertEqual(func(url, "", True).fragment, + expected_frag) + self.assertEqual(func(url).fragment, expected_frag) def test_mixed_types_rejected(self): # Several functions that process either strings or ASCII encoded bytes @@ -882,6 +889,26 @@ def test_splithost(self): self.assertEqual(splithost('/foo/bar/baz.html'), (None, '/foo/bar/baz.html')) + # bpo-30500: # starts a fragment. + self.assertEqual(splithost('//127.0.0.1#@host.com'), + ('127.0.0.1', '/#@host.com')) + self.assertEqual(splithost('//127.0.0.1#@host.com:80'), + ('127.0.0.1', '/#@host.com:80')) + self.assertEqual(splithost('//127.0.0.1:80#@host.com'), + ('127.0.0.1:80', '/#@host.com')) + + # Empty host is returned as empty string. + self.assertEqual(splithost("///file"), + ('', '/file')) + + # Trailing semicolon, question mark and hash symbol are kept. + self.assertEqual(splithost("//example.net/file;"), + ('example.net', '/file;')) + self.assertEqual(splithost("//example.net/file?"), + ('example.net', '/file?')) + self.assertEqual(splithost("//example.net/file#"), + ('example.net', '/file#')) + def test_splituser(self): splituser = urllib.parse.splituser self.assertEqual(splituser('User:Pass@www.python.org:080'), diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 975c6ffb9c15a3..4fcf0c0ead12c9 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -860,14 +860,12 @@ def splithost(url): """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" global _hostprog if _hostprog is None: - import re - _hostprog = re.compile('^//([^/?]*)(.*)$') + _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL) match = _hostprog.match(url) if match: - host_port = match.group(1) - path = match.group(2) - if path and not path.startswith('/'): + host_port, path = match.groups() + if path and path[0] != '/': path = '/' + path return host_port, path return None, url diff --git a/Misc/ACKS b/Misc/ACKS index cc194ab7adda03..e4c753839512a0 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -892,6 +892,7 @@ Chad Netzer Max Neunhöffer George Neville-Neil Hieu Nguyen +Nam Nguyen Johannes Nicolai Samuel Nicolary Jonathan Niehof diff --git a/Misc/NEWS.d/next/Security/2017-07-11-22-02-51.bpo-30500.wXUrkQ.rst b/Misc/NEWS.d/next/Security/2017-07-11-22-02-51.bpo-30500.wXUrkQ.rst new file mode 100644 index 00000000000000..6570e709d6bfac --- /dev/null +++ b/Misc/NEWS.d/next/Security/2017-07-11-22-02-51.bpo-30500.wXUrkQ.rst @@ -0,0 +1,4 @@ +Fix urllib.parse.splithost() to correctly parse fragments. For example, +``splithost('//127.0.0.1#@evil.com/')`` now correctly returns the +``127.0.0.1`` host, instead of treating ``@evil.com`` as the host in an +authentification (``login@host``).