From 11411bbcf451eb4c2e060ab7dfe64cd53899268e Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Fri, 16 Oct 2020 16:54:39 +0100 Subject: [PATCH 1/7] bpo-28660: make TextWrapper break long words on hyphens when break_long_words=True and break_on_hyphens=True --- Lib/test/test_textwrap.py | 53 +++++++++++++++++++++++++++++++++++++++ Lib/textwrap.py | 10 ++++++-- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py index ed97f70ba1fa40..05da11a1d94949 100644 --- a/Lib/test/test_textwrap.py +++ b/Lib/test/test_textwrap.py @@ -640,6 +640,59 @@ def test_max_lines_long(self): max_lines=4) +class LongWordWithHyphensTestCase(BaseTestCase): + def setUp(self): + self.wrapper = TextWrapper() + self.text = '''\ +We used enyzme 2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate synthase. +''' + + def test_break_long_words_on_hyphen(self): + expected = ['We used enyzme 2-succinyl-6-hydroxy-2,4-', + 'cyclohexadiene-1-carboxylate synthase.'] + self.check_wrap(self.text, 50, expected) + + expected = ['We used', 'enyzme 2-', 'succinyl-', '6-hydroxy-', '2,4-', + 'cyclohexad', 'iene-1-', 'carboxylat', 'e', 'synthase.'] + self.check_wrap(self.text, 10, expected) + + def test_break_long_words_not_on_hyphen(self): + expected = ['We used enyzme 2-succinyl-6-hydroxy-2,4-cyclohexad', + 'iene-1-carboxylate synthase.'] + self.check_wrap(self.text, 50, expected, break_on_hyphens=False) + + expected = ['We used', 'enyzme 2-s', 'uccinyl-6-', 'hydroxy-2,', + '4-cyclohex', 'adiene-1-c', 'arboxylate', 'synthase.'] + self.check_wrap(self.text, 10, expected, break_on_hyphens=False) + + def test_break_on_hyphen_but_not_long_words(self): + expected = ['We used enyzme', + '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate', + 'synthase.'] + + self.check_wrap(self.text, 50, expected, break_long_words=False) + + expected = ['We used', 'enyzme', + '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate', + 'synthase.'] + self.check_wrap(self.text, 10, expected, break_long_words=False) + + def test_do_not_break_long_words_or_on_hyphens(self): + expected = ['We used enyzme', + '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate', + 'synthase.'] + self.check_wrap(self.text, 50, expected, + break_long_words=False, + break_on_hyphens=False) + + expected = ['We used', 'enyzme', + '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate', + 'synthase.'] + self.check_wrap(self.text, 10, expected, + break_long_words=False, + break_on_hyphens=False) + + class IndentTestCases(BaseTestCase): # called before each test method diff --git a/Lib/textwrap.py b/Lib/textwrap.py index 30e693c8de0354..e627a2f0a2c07c 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -215,8 +215,14 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): # If we're allowed to break long words, then do so: put as much # of the next chunk onto the current line as will fit. if self.break_long_words: - cur_line.append(reversed_chunks[-1][:space_left]) - reversed_chunks[-1] = reversed_chunks[-1][space_left:] + end = space_left + chunk = reversed_chunks[-1] + if self.break_on_hyphens and len(chunk) > space_left: + hyphen = chunk.rfind('-', 0, space_left) + if hyphen != -1: + end = hyphen+1 + cur_line.append(chunk[:end]) + reversed_chunks[-1] = chunk[end:] # Otherwise, we have to preserve the long word intact. Only add # it to the current line if there's nothing already there -- From affeb9a8caf684dd2d57c103192416ac0c4e32f0 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 16 Oct 2020 16:08:05 +0000 Subject: [PATCH 2/7] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst diff --git a/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst b/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst new file mode 100644 index 00000000000000..a45c9aee3c96e7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst @@ -0,0 +1 @@ +textwrap.wrap() attempts to break long words after hyphens when break_long_words=True and break_on_hyphens=True. \ No newline at end of file From 6a224a66cf43a088035d07553e0f1d096a3ff26e Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Sat, 17 Oct 2020 16:42:44 +0100 Subject: [PATCH 3/7] Update 2020-10-16-16-08-04.bpo-28660.eX9pvD.rst --- .../next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst b/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst index a45c9aee3c96e7..daae8db12cef48 100644 --- a/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst +++ b/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst @@ -1 +1 @@ -textwrap.wrap() attempts to break long words after hyphens when break_long_words=True and break_on_hyphens=True. \ No newline at end of file +textwrap.wrap() now attempts to break long words after hyphens when break_long_words=True and break_on_hyphens=True. From f750e0b8701a2479172cfaa23b56d8f530f41e70 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Sun, 18 Oct 2020 14:32:28 +0100 Subject: [PATCH 4/7] Update Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst Co-authored-by: Serhiy Storchaka --- .../next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst b/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst index daae8db12cef48..d67993492f9ffe 100644 --- a/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst +++ b/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst @@ -1 +1 @@ -textwrap.wrap() now attempts to break long words after hyphens when break_long_words=True and break_on_hyphens=True. +:func:`textwrap.wrap` now attempts to break long words after hyphens when ``break_long_words=True`` and ``break_on_hyphens=True``. From 080b5a3af3b4464c7aca9f55ce7d27d92dc8119b Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Sun, 18 Oct 2020 14:32:38 +0100 Subject: [PATCH 5/7] Update Lib/textwrap.py Co-authored-by: Serhiy Storchaka --- Lib/textwrap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/textwrap.py b/Lib/textwrap.py index e627a2f0a2c07c..e8b298ce45cfe6 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -220,7 +220,7 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): if self.break_on_hyphens and len(chunk) > space_left: hyphen = chunk.rfind('-', 0, space_left) if hyphen != -1: - end = hyphen+1 + end = hyphen + 1 cur_line.append(chunk[:end]) reversed_chunks[-1] = chunk[end:] From 386540888a20db735c58defadb8dacfc3aa333ac Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Sun, 18 Oct 2020 16:43:42 +0100 Subject: [PATCH 6/7] do not break after a hyphen-only prefix --- Lib/test/test_textwrap.py | 37 ++++++++++++++++++++++++++++--------- Lib/textwrap.py | 4 +++- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py index 05da11a1d94949..dfbc2b93dfc0d6 100644 --- a/Lib/test/test_textwrap.py +++ b/Lib/test/test_textwrap.py @@ -643,55 +643,74 @@ def test_max_lines_long(self): class LongWordWithHyphensTestCase(BaseTestCase): def setUp(self): self.wrapper = TextWrapper() - self.text = '''\ + self.text1 = '''\ We used enyzme 2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate synthase. +''' + self.text2 = '''\ +1234567890-1234567890--this_is_a_very_long_option_indeed-good-bye" ''' def test_break_long_words_on_hyphen(self): expected = ['We used enyzme 2-succinyl-6-hydroxy-2,4-', 'cyclohexadiene-1-carboxylate synthase.'] - self.check_wrap(self.text, 50, expected) + self.check_wrap(self.text1, 50, expected) expected = ['We used', 'enyzme 2-', 'succinyl-', '6-hydroxy-', '2,4-', 'cyclohexad', 'iene-1-', 'carboxylat', 'e', 'synthase.'] - self.check_wrap(self.text, 10, expected) + self.check_wrap(self.text1, 10, expected) + + expected = ['1234567890', '-123456789', '0--this_is', '_a_very_lo', + 'ng_option_', 'indeed-', 'good-bye"'] + self.check_wrap(self.text2, 10, expected) def test_break_long_words_not_on_hyphen(self): expected = ['We used enyzme 2-succinyl-6-hydroxy-2,4-cyclohexad', 'iene-1-carboxylate synthase.'] - self.check_wrap(self.text, 50, expected, break_on_hyphens=False) + self.check_wrap(self.text1, 50, expected, break_on_hyphens=False) expected = ['We used', 'enyzme 2-s', 'uccinyl-6-', 'hydroxy-2,', '4-cyclohex', 'adiene-1-c', 'arboxylate', 'synthase.'] - self.check_wrap(self.text, 10, expected, break_on_hyphens=False) + self.check_wrap(self.text1, 10, expected, break_on_hyphens=False) + + expected = ['1234567890', '-123456789', '0--this_is', '_a_very_lo', + 'ng_option_', 'indeed-', 'good-bye"'] + self.check_wrap(self.text2, 10, expected) def test_break_on_hyphen_but_not_long_words(self): expected = ['We used enyzme', '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate', 'synthase.'] - self.check_wrap(self.text, 50, expected, break_long_words=False) + self.check_wrap(self.text1, 50, expected, break_long_words=False) expected = ['We used', 'enyzme', '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate', 'synthase.'] - self.check_wrap(self.text, 10, expected, break_long_words=False) + self.check_wrap(self.text1, 10, expected, break_long_words=False) + + expected = ['1234567890', '-123456789', '0--this_is', '_a_very_lo', + 'ng_option_', 'indeed-', 'good-bye"'] + self.check_wrap(self.text2, 10, expected) + def test_do_not_break_long_words_or_on_hyphens(self): expected = ['We used enyzme', '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate', 'synthase.'] - self.check_wrap(self.text, 50, expected, + self.check_wrap(self.text1, 50, expected, break_long_words=False, break_on_hyphens=False) expected = ['We used', 'enyzme', '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate', 'synthase.'] - self.check_wrap(self.text, 10, expected, + self.check_wrap(self.text1, 10, expected, break_long_words=False, break_on_hyphens=False) + expected = ['1234567890', '-123456789', '0--this_is', '_a_very_lo', + 'ng_option_', 'indeed-', 'good-bye"'] + self.check_wrap(self.text2, 10, expected) class IndentTestCases(BaseTestCase): diff --git a/Lib/textwrap.py b/Lib/textwrap.py index e8b298ce45cfe6..372ca834ae0524 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -218,8 +218,10 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): end = space_left chunk = reversed_chunks[-1] if self.break_on_hyphens and len(chunk) > space_left: + # break after last hyphen, but only if there are + # non-hyphens before it hyphen = chunk.rfind('-', 0, space_left) - if hyphen != -1: + if hyphen > 0 and any(c != '-' for c in chunk[0:hyphen]): end = hyphen + 1 cur_line.append(chunk[:end]) reversed_chunks[-1] = chunk[end:] From 9f06183a3700c00c17e88b2d022bc3b782d75009 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Sun, 18 Oct 2020 17:08:15 +0100 Subject: [PATCH 7/7] chunk[0:hyphen] --> chunk[:hyphen] --- Lib/textwrap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/textwrap.py b/Lib/textwrap.py index 372ca834ae0524..841de9baecf5d8 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -221,7 +221,7 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): # break after last hyphen, but only if there are # non-hyphens before it hyphen = chunk.rfind('-', 0, space_left) - if hyphen > 0 and any(c != '-' for c in chunk[0:hyphen]): + if hyphen > 0 and any(c != '-' for c in chunk[:hyphen]): end = hyphen + 1 cur_line.append(chunk[:end]) reversed_chunks[-1] = chunk[end:]