diff --git a/README.md b/README.md index b7f31c1..66c23e9 100644 --- a/README.md +++ b/README.md @@ -165,15 +165,18 @@ All parameters are optional. | Option | Type | Default | Description | | :-----------------: | :-------------------: | :-------------------: | :-------------------------------------------------------------------------------: | -| `header` | `List[Any]` | `None` | First table row seperated by header row separator. Values should support `str()`. | -| `body` | `List[List[Any]]` | `None` | List of rows for the main section of the table. Values should support `str()`. | -| `footer` | `List[Any]` | `None` | Last table row seperated by header row separator. Values should support `str()`. | +| `header` | `List[Any]` | `None` | First table row seperated by header row separator. Values should support `str()` | +| `body` | `List[List[Any]]` | `None` | List of rows for the main section of the table. Values should support `str()` | +| `footer` | `List[Any]` | `None` | Last table row seperated by header row separator. Values should support `str()` | | `column_widths` | `List[Optional[int]]` | `None` (automatic) | List of column widths in characters for each column | | `alignments` | `List[Alignment]` | `None` (all centered) | Column alignments
(ex. `[Alignment.LEFT, Alignment.CENTER, Alignment.RIGHT]`) | | `style` | `TableStyle` | `double_thin_compact` | Table style to use for the table\* | | `first_col_heading` | `bool` | `False` | Whether to add a heading column separator after the first column | | `last_col_heading` | `bool` | `False` | Whether to add a heading column separator before the last column | -| `cell_padding` | `int` | `1` | The minimum number of spaces to add between the cell content and the cell border. | +| `cell_padding` | `int` | `1` | The minimum number of spaces to add between the cell content and the cell border | +| `use_wcwidth` | `bool` | `True` | Whether to use [wcwidth][wcwidth] instead of `len()` to calculate cell width | + +[wcwidth]: https://pypi.org/project/wcwidth/ \*See a list of all preset styles [here](https://table2ascii.readthedocs.io/en/latest/styles.html). diff --git a/pyproject.toml b/pyproject.toml index 1c5a9d5..ae84208 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,5 +77,6 @@ namespace_packages = true [[tool.mypy.overrides]] module = [ "setuptools.*", + "wcwidth" ] ignore_missing_imports = true diff --git a/requirements.txt b/requirements.txt index 4037efa..b6ab4e4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -typing-extensions>=3.7.4; python_version<'3.8' \ No newline at end of file +typing-extensions>=3.7.4; python_version<'3.8' +wcwidth<1 \ No newline at end of file diff --git a/table2ascii/options.py b/table2ascii/options.py index 998edbd..9f73a16 100644 --- a/table2ascii/options.py +++ b/table2ascii/options.py @@ -8,7 +8,12 @@ @dataclass class Options: - """Class for storing options that the user sets""" + """Class for storing options that the user sets + + .. versionchanged:: 1.0.0 + + Added ``use_wcwidth`` option + """ first_col_heading: bool last_col_heading: bool @@ -16,3 +21,4 @@ class Options: alignments: list[Alignment] | None cell_padding: int style: TableStyle + use_wcwidth: bool diff --git a/table2ascii/table_to_ascii.py b/table2ascii/table_to_ascii.py index a8ff2c6..fcf8c51 100644 --- a/table2ascii/table_to_ascii.py +++ b/table2ascii/table_to_ascii.py @@ -2,6 +2,8 @@ from math import ceil, floor +from wcwidth import wcswidth + from .alignment import Alignment from .annotations import SupportsStr from .exceptions import ( @@ -44,6 +46,7 @@ def __init__( self.__first_col_heading = options.first_col_heading self.__last_col_heading = options.last_col_heading self.__cell_padding = options.cell_padding + self.__use_wcwidth = options.use_wcwidth # calculate number of columns self.__columns = self.__count_columns() @@ -93,7 +96,7 @@ def __auto_column_widths(self) -> list[int]: def widest_line(value: SupportsStr) -> int: """Returns the width of the longest line in a multi-line string""" text = str(value) - return max(len(line) for line in text.splitlines()) if len(text) else 0 + return max(self.__str_width(line) for line in text.splitlines()) if len(text) else 0 column_widths = [] # get the width necessary for each column @@ -145,17 +148,18 @@ def __pad(self, cell_value: SupportsStr, width: int, alignment: Alignment) -> st text = str(cell_value) padding = " " * self.__cell_padding padded_text = f"{padding}{text}{padding}" + text_width = self.__str_width(padded_text) if alignment == Alignment.LEFT: # pad with spaces on the end - return padded_text + (" " * (width - len(padded_text))) + return padded_text + (" " * (width - text_width)) if alignment == Alignment.CENTER: # pad with spaces, half on each side - before = " " * floor((width - len(padded_text)) / 2) - after = " " * ceil((width - len(padded_text)) / 2) + before = " " * floor((width - text_width) / 2) + after = " " * ceil((width - text_width) / 2) return before + padded_text + after if alignment == Alignment.RIGHT: # pad with spaces at the beginning - return (" " * (width - len(padded_text))) + padded_text + return (" " * (width - text_width)) + padded_text raise InvalidAlignmentError(alignment) def __row_to_ascii( @@ -344,6 +348,23 @@ def __body_to_ascii(self, body: list[list[SupportsStr]]) -> str: for row in body ) + def __str_width(self, text: str) -> int: + """ + Returns the width of the string in characters for the purposes of monospace + formatting. This is usually the same as the length of the string, but can be + different for double-width characters (East Asian Wide and East Asian Fullwidth) + or zero-width characters (combining characters, zero-width space, etc.) + + Args: + text: The text to measure + + Returns: + The width of the string in characters + """ + width = wcswidth(text) if self.__use_wcwidth else -1 + # if use_wcwidth is False or wcswidth fails, fall back to len + return width if width >= 0 else len(text) + def to_ascii(self) -> str: """Generates a formatted ASCII table @@ -380,9 +401,13 @@ def table2ascii( alignments: list[Alignment] | None = None, cell_padding: int = 1, style: TableStyle = PresetStyle.double_thin_compact, + use_wcwidth: bool = True, ) -> str: """Convert a 2D Python table to ASCII text + .. versionchanged:: 1.0.0 + Added the ``use_wcwidth`` parameter defaulting to :py:obj:`True`. + Args: header: List of column values in the table's header row. All values should be :class:`str` or support :class:`str` conversion. If not specified, the table will not have a header row. @@ -396,7 +421,7 @@ def table2ascii( Defaults to :py:obj:`False`. column_widths: List of widths in characters for each column. Any value of :py:obj:`None` indicates that the column width should be determined automatically. If :py:obj:`None` - is passed instead of a :py:obj:`~typing.List`, all columns will be automatically sized. + is passed instead of a :class:`list`, all columns will be automatically sized. Defaults to :py:obj:`None`. alignments: List of alignments for each column (ex. ``[Alignment.LEFT, Alignment.CENTER, Alignment.RIGHT]``). If not specified or set to @@ -406,6 +431,11 @@ def table2ascii( Defaults to ``1``. style: Table style to use for styling (preset styles can be imported). Defaults to :ref:`PresetStyle.double_thin_compact `. + use_wcwidth: Whether to use :func:`wcwidth.wcswidth` to determine the width of each cell instead of + :func:`len`. The :func:`~wcwidth.wcswidth` function takes into account double-width characters + (East Asian Wide and East Asian Fullwidth) and zero-width characters (combining characters, + zero-width space, etc.), whereas :func:`len` determines the width solely based on the number of + characters in the string. Defaults to :py:obj:`True`. Returns: The generated ASCII table @@ -421,5 +451,6 @@ def table2ascii( alignments=alignments, cell_padding=cell_padding, style=style, + use_wcwidth=use_wcwidth, ), ).to_ascii() diff --git a/tests/test_convert.py b/tests/test_convert.py index 414636f..60e696a 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -247,3 +247,52 @@ def test_multiline_cells(): "╚═══════════════════════════════════════════╝" ) assert text == expected + + +def test_east_asian_wide_characters_and_zero_width_wcwidth(): + # using wcwidth.wcswidth() to count the number of characters + text = t2a( + header=["#\u200b", "🦁", "🦡", "🦅", "🐍"], + body=[["💻", "✅", "✅", "❌", "❌"]], + footer=["🥞", "日", "月", "火", "水"], + first_col_heading=True, + ) + text2 = t2a( + header=["#\u200b", "🦁", "🦡", "🦅", "🐍"], + body=[["💻", "✅", "✅", "❌", "❌"]], + footer=["🥞", "日", "月", "火", "水"], + first_col_heading=True, + use_wcwidth=True, + ) + expected = ( + "╔════╦═══════════════════╗\n" + "║ #​ ║ 🦁 🦡 🦅 🐍 ║\n" + "╟────╫───────────────────╢\n" + "║ 💻 ║ ✅ ✅ ❌ ❌ ║\n" + "╟────╫───────────────────╢\n" + "║ 🥞 ║ 日 月 火 水 ║\n" + "╚════╩═══════════════════╝" + ) + assert text == expected + assert text2 == expected + + +def test_east_asian_wide_characters_and_zero_width_no_wcwidth(): + # using len() to count the number of characters + text = t2a( + header=["#\u200b", "🦁", "🦡", "🦅", "🐍"], + body=[["💻", "✅", "✅", "❌", "❌"]], + footer=["🥞", "日", "月", "火", "水"], + first_col_heading=True, + use_wcwidth=False, + ) + expected = ( + "╔════╦═══════════════╗\n" + "║ #​ ║ 🦁 🦡 🦅 🐍 ║\n" + "╟────╫───────────────╢\n" + "║ 💻 ║ ✅ ✅ ❌ ❌ ║\n" + "╟────╫───────────────╢\n" + "║ 🥞 ║ 日 月 火 水 ║\n" + "╚════╩═══════════════╝" + ) + assert text == expected