From 057eb29153b0be0f12a3b33a16f996e440de4f03 Mon Sep 17 00:00:00 2001 From: rdmontgomery Date: Thu, 18 Oct 2018 18:53:34 -0400 Subject: [PATCH 01/14] ENH: set infer_nrows for read_fwf (GH15138) Previously, read_fwf would infer the width of the columns from the first 100 rows of data. It now accepts a parameter infer_nrows that lets the user choose how many rows to use in the inference. --- pandas/io/parsers.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 1edc6f6e14442..b3b4a51a57798 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -527,6 +527,7 @@ def _read(filepath_or_buffer, kwds): _fwf_defaults = { 'colspecs': 'infer', + 'infer_nrows': 100, 'widths': None, } @@ -716,7 +717,8 @@ def parser_f(filepath_or_buffer, @Appender(_read_fwf_doc) -def read_fwf(filepath_or_buffer, colspecs='infer', widths=None, **kwds): +def read_fwf(filepath_or_buffer, colspecs='infer', infer_nrows=100, + widths=None, **kwds): # Check input arguments. if colspecs is None and widths is None: raise ValueError("Must specify either colspecs or widths") @@ -732,6 +734,7 @@ def read_fwf(filepath_or_buffer, colspecs='infer', widths=None, **kwds): col += w kwds['colspecs'] = colspecs + kwds['infer_nrows'] = infer_nrows kwds['engine'] = 'python-fwf' return _read(filepath_or_buffer, kwds) @@ -3361,13 +3364,15 @@ class FixedWidthReader(BaseIterator): A reader of fixed-width lines. """ - def __init__(self, f, colspecs, delimiter, comment, skiprows=None): + def __init__(self, f, colspecs, delimiter, comment, infer_nrows, + skiprows=None): self.f = f self.buffer = None self.delimiter = '\r\n' + delimiter if delimiter else '\n\r\t ' self.comment = comment if colspecs == 'infer': - self.colspecs = self.detect_colspecs(skiprows=skiprows) + self.colspecs = self.detect_colspecs(infer_nrows=infer_nrows, + skiprows=skiprows) else: self.colspecs = colspecs @@ -3420,11 +3425,11 @@ def get_rows(self, n, skiprows=None): self.buffer = iter(buffer_rows) return detect_rows - def detect_colspecs(self, n=100, skiprows=None): + def detect_colspecs(self, infer_nrows, skiprows=None): # Regex escape the delimiters delimiters = ''.join(r'\%s' % x for x in self.delimiter) pattern = re.compile('([^%s]+)' % delimiters) - rows = self.get_rows(n, skiprows) + rows = self.get_rows(infer_nrows, skiprows) if not rows: raise EmptyDataError("No rows from which to infer column width") max_len = max(map(len, rows)) @@ -3463,8 +3468,10 @@ class FixedWidthFieldParser(PythonParser): def __init__(self, f, **kwds): # Support iterators, convert to a list. self.colspecs = kwds.pop('colspecs') + self.infer_nrows = kwds.pop('infer_nrows') PythonParser.__init__(self, f, **kwds) def _make_reader(self, f): self.data = FixedWidthReader(f, self.colspecs, self.delimiter, - self.comment, self.skiprows) + self.comment, self.infer_nrows, + self.skiprows) From 9a9609d92e13ed26f69eddc08a3312154fe95dd7 Mon Sep 17 00:00:00 2001 From: rdmontgomery Date: Thu, 18 Oct 2018 19:25:40 -0400 Subject: [PATCH 02/14] Add documentation for infer_nrows Fixed reference in colspecs. Switched half-open interval representation from non-standard [from, to[ to standard [from, to). --- pandas/io/parsers.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index b3b4a51a57798..13c0f98c5373f 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -342,14 +342,17 @@ _engine_doc)) _fwf_widths = """\ -colspecs : list of pairs (int, int) or 'infer'. optional +colspecs : list of pairs (int, int) or 'infer', default 'infer' A list of pairs (tuples) giving the extents of the fixed-width - fields of each line as half-open intervals (i.e., [from, to[ ). + fields of each line as half-open intervals (i.e., [from, to) ). String value 'infer' can be used to instruct the parser to try - detecting the column specifications from the first 100 rows of - the data which are not being skipped via skiprows (default='infer'). -widths : list of ints. optional - A list of field widths which can be used instead of 'colspecs' if + detecting the column specifications using the ``infer_nrows`` + number of rows of the data which are not being skipped via skiprows. +infer_nrows : int or 'all', default 100 + The number of rows to consider when letting the parser determine the + ``colspecs``. If 'all', then all the rows will be used. +widths : list of ints, optional + A list of field widths which can be used instead of ``colspecs`` if the intervals are contiguous. delimiter : str, default ``'\t' + ' '`` Characters to consider as filler characters in the fixed-width file. From 68d83c92c77073fefd39baa8ca971144f3d6a0f2 Mon Sep 17 00:00:00 2001 From: rdmontgomery Date: Thu, 18 Oct 2018 23:02:35 -0400 Subject: [PATCH 03/14] Remove documentation for infer_nrows='all' --- pandas/io/parsers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 13c0f98c5373f..a7855a6d39d77 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -348,9 +348,9 @@ String value 'infer' can be used to instruct the parser to try detecting the column specifications using the ``infer_nrows`` number of rows of the data which are not being skipped via skiprows. -infer_nrows : int or 'all', default 100 +infer_nrows : int, default 100 The number of rows to consider when letting the parser determine the - ``colspecs``. If 'all', then all the rows will be used. + ``colspecs``. widths : list of ints, optional A list of field widths which can be used instead of ``colspecs`` if the intervals are contiguous. From da081a868c403ed42ad754adc32333e36bdf06cd Mon Sep 17 00:00:00 2001 From: rdmontgomery Date: Fri, 19 Oct 2018 08:28:41 -0400 Subject: [PATCH 04/14] Add test to explicitly set infer_nrows --- pandas/tests/io/parser/test_read_fwf.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index a60f2b5a4c946..43b80697ef0af 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -141,6 +141,17 @@ def test_fwf_colspecs_None(self): expected = DataFrame([[123456, 456], [456789, 789]]) tm.assert_frame_equal(result, expected) + def test_fwf_colspecs_infer_nrows(self): + # GH 15138 + # infer_nrows = 1 should have colspec == [(2, 3), (5, 6)] + data = """\ + 1 2 +123 98 +""" + df = read_fwf(StringIO(data), header=None, infer_nrows=1) + expected = pd.DataFrame([[1, 2], [3, 8]]) + tm.assert_frame_equal(df, expected) + def test_fwf_regression(self): # GH 3594 # turns out 'T060' is parsable as a datetime slice! From 3f69510f42f3fa7b1df35ac9187c9e5a65970b36 Mon Sep 17 00:00:00 2001 From: rdmontgomery Date: Tue, 23 Oct 2018 08:34:54 -0400 Subject: [PATCH 05/14] Move new keyword to end of positional arguments --- pandas/io/parsers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a7855a6d39d77..b1c7aa559666c 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -720,8 +720,8 @@ def parser_f(filepath_or_buffer, @Appender(_read_fwf_doc) -def read_fwf(filepath_or_buffer, colspecs='infer', infer_nrows=100, - widths=None, **kwds): +def read_fwf(filepath_or_buffer, colspecs='infer', widths=None, + infer_nrows=100, **kwds): # Check input arguments. if colspecs is None and widths is None: raise ValueError("Must specify either colspecs or widths") From f3e715d0d0d8a0ca7a78eafb3f01f9170a740cbd Mon Sep 17 00:00:00 2001 From: rdmontgomery Date: Tue, 23 Oct 2018 08:36:03 -0400 Subject: [PATCH 06/14] Make infer_nrows optional to retain behavior --- pandas/io/parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index b1c7aa559666c..ceb76cca3e63c 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -3428,7 +3428,7 @@ def get_rows(self, n, skiprows=None): self.buffer = iter(buffer_rows) return detect_rows - def detect_colspecs(self, infer_nrows, skiprows=None): + def detect_colspecs(self, infer_nrows=100, skiprows=None): # Regex escape the delimiters delimiters = ''.join(r'\%s' % x for x in self.delimiter) pattern = re.compile('([^%s]+)' % delimiters) From 1df0493214c2dd6c82baac017fa716f5c34a281b Mon Sep 17 00:00:00 2001 From: rdmontgomery Date: Tue, 23 Oct 2018 09:28:39 -0400 Subject: [PATCH 07/14] Make infer_nrows optional when invoking object --- pandas/io/parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index ceb76cca3e63c..87f589617c95d 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -3367,7 +3367,7 @@ class FixedWidthReader(BaseIterator): A reader of fixed-width lines. """ - def __init__(self, f, colspecs, delimiter, comment, infer_nrows, + def __init__(self, f, colspecs, delimiter, comment, infer_nrows=100, skiprows=None): self.f = f self.buffer = None From a35abd8900e9bff3ceb2a81cdd3aecc76375dd35 Mon Sep 17 00:00:00 2001 From: rdmontgomery Date: Wed, 24 Oct 2018 14:26:40 -0400 Subject: [PATCH 08/14] Let infer_nrows default to None to keep behavior Rework so that if infer_nrows is not set, detect_colspecs still uses n=100, preserving previous behavior. If infer_nrows comes down the pipe, the default n is overwritten. Also make sure that the new parameter infer_nrows is the trailing keyword in all function calls. Fix the documentation to reflect these changes. --- pandas/io/parsers.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 87f589617c95d..a2af2243a3abb 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -346,13 +346,15 @@ A list of pairs (tuples) giving the extents of the fixed-width fields of each line as half-open intervals (i.e., [from, to) ). String value 'infer' can be used to instruct the parser to try - detecting the column specifications using the ``infer_nrows`` - number of rows of the data which are not being skipped via skiprows. -infer_nrows : int, default 100 + detecting the column specifications from the first 100 rows of + the data which are not being skipped via skiprows (default='infer'), + or by using the `infer_nrows` parameter. +infer_nrows : int, default None The number of rows to consider when letting the parser determine the - ``colspecs``. + ``colspecs``. If not set (or set to `None`), default behavior of 100 + rows is used. widths : list of ints, optional - A list of field widths which can be used instead of ``colspecs`` if + A list of field widths which can be used instead of `colspecs` if the intervals are contiguous. delimiter : str, default ``'\t' + ' '`` Characters to consider as filler characters in the fixed-width file. @@ -530,7 +532,7 @@ def _read(filepath_or_buffer, kwds): _fwf_defaults = { 'colspecs': 'infer', - 'infer_nrows': 100, + 'infer_nrows': None, 'widths': None, } @@ -721,7 +723,7 @@ def parser_f(filepath_or_buffer, @Appender(_read_fwf_doc) def read_fwf(filepath_or_buffer, colspecs='infer', widths=None, - infer_nrows=100, **kwds): + infer_nrows=None, **kwds): # Check input arguments. if colspecs is None and widths is None: raise ValueError("Must specify either colspecs or widths") @@ -3367,15 +3369,15 @@ class FixedWidthReader(BaseIterator): A reader of fixed-width lines. """ - def __init__(self, f, colspecs, delimiter, comment, infer_nrows=100, - skiprows=None): + def __init__(self, f, colspecs, delimiter, comment, skiprows=None, + infer_nrows=None): self.f = f self.buffer = None self.delimiter = '\r\n' + delimiter if delimiter else '\n\r\t ' self.comment = comment if colspecs == 'infer': - self.colspecs = self.detect_colspecs(infer_nrows=infer_nrows, - skiprows=skiprows) + self.colspecs = self.detect_colspecs(skiprows=skiprows, + infer_nrows=infer_nrows) else: self.colspecs = colspecs @@ -3428,11 +3430,13 @@ def get_rows(self, n, skiprows=None): self.buffer = iter(buffer_rows) return detect_rows - def detect_colspecs(self, infer_nrows=100, skiprows=None): + def detect_colspecs(self, n=100, skiprows=None, infer_nrows=None): # Regex escape the delimiters delimiters = ''.join(r'\%s' % x for x in self.delimiter) pattern = re.compile('([^%s]+)' % delimiters) - rows = self.get_rows(infer_nrows, skiprows) + if infer_nrows: + n = infer_nrows + rows = self.get_rows(n, skiprows) if not rows: raise EmptyDataError("No rows from which to infer column width") max_len = max(map(len, rows)) @@ -3476,5 +3480,5 @@ def __init__(self, f, **kwds): def _make_reader(self, f): self.data = FixedWidthReader(f, self.colspecs, self.delimiter, - self.comment, self.infer_nrows, - self.skiprows) + self.comment, self.skiprows, + self.infer_nrows) From d5de9d98eecf9af34739ad5121440b01eb4278e1 Mon Sep 17 00:00:00 2001 From: rdmontgomery Date: Fri, 26 Oct 2018 14:18:31 -0400 Subject: [PATCH 09/14] Add a whatsnew note --- doc/source/whatsnew/v0.24.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index f17ea8941a57c..cfe3e05579596 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -201,6 +201,7 @@ Other Enhancements - :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`). - New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`). - Compatibility with Matplotlib 3.0 (:issue:`22790`). +- :func:`read_fwf` now accepts keyword `infer_nrows` (:issue:`15138`). .. _whatsnew_0240.api_breaking: From 49d55945d2dc837ca7959f4a556dde98dc1a13fe Mon Sep 17 00:00:00 2001 From: rdmontgomery Date: Fri, 26 Oct 2018 14:28:15 -0400 Subject: [PATCH 10/14] Add test for infer_nrows > number of rows --- pandas/tests/io/parser/test_read_fwf.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 43b80697ef0af..f002ba4ff7768 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -143,15 +143,20 @@ def test_fwf_colspecs_None(self): def test_fwf_colspecs_infer_nrows(self): # GH 15138 - # infer_nrows = 1 should have colspec == [(2, 3), (5, 6)] data = """\ 1 2 123 98 """ + # infer_nrows == 1 should have colspec == [(2, 3), (5, 6)] df = read_fwf(StringIO(data), header=None, infer_nrows=1) expected = pd.DataFrame([[1, 2], [3, 8]]) tm.assert_frame_equal(df, expected) + # test for infer_nrows > number of rows + df = read_fwf(StringIO(data), header=None, infer_nrows=10) + expected = pd.DataFrame([[1, 2], [123, 98]]) + tm.assert_frame_equal(df, expected) + def test_fwf_regression(self): # GH 3594 # turns out 'T060' is parsable as a datetime slice! From 6675dacfd92b58f99093c77147938551d807d3e1 Mon Sep 17 00:00:00 2001 From: rdmontgomery Date: Fri, 26 Oct 2018 14:48:32 -0400 Subject: [PATCH 11/14] Switch places and add versionadded tag --- pandas/io/parsers.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a2af2243a3abb..60f97bb91f122 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -349,13 +349,16 @@ detecting the column specifications from the first 100 rows of the data which are not being skipped via skiprows (default='infer'), or by using the `infer_nrows` parameter. +widths : list of ints, optional + A list of field widths which can be used instead of `colspecs` if + the intervals are contiguous. infer_nrows : int, default None The number of rows to consider when letting the parser determine the ``colspecs``. If not set (or set to `None`), default behavior of 100 rows is used. -widths : list of ints, optional - A list of field widths which can be used instead of `colspecs` if - the intervals are contiguous. + + .. versionadded:: 0.24.0 + delimiter : str, default ``'\t' + ' '`` Characters to consider as filler characters in the fixed-width file. Can be used to specify the filler character of the fields From de29edf337f6bd7df876c67de2691a829e163a27 Mon Sep 17 00:00:00 2001 From: rdmontgomery Date: Sun, 28 Oct 2018 14:51:02 -0400 Subject: [PATCH 12/14] Use infer_nrows unless n is set, default to 100 Preserve backwards compatibility by allowing n to be set, but the new default will be to use infer_nrows. --- pandas/io/parsers.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 60f97bb91f122..bf9413409b836 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -352,10 +352,9 @@ widths : list of ints, optional A list of field widths which can be used instead of `colspecs` if the intervals are contiguous. -infer_nrows : int, default None +infer_nrows : int, default 100 The number of rows to consider when letting the parser determine the - ``colspecs``. If not set (or set to `None`), default behavior of 100 - rows is used. + `colspecs`. .. versionadded:: 0.24.0 @@ -535,7 +534,7 @@ def _read(filepath_or_buffer, kwds): _fwf_defaults = { 'colspecs': 'infer', - 'infer_nrows': None, + 'infer_nrows': 100, 'widths': None, } @@ -726,7 +725,7 @@ def parser_f(filepath_or_buffer, @Appender(_read_fwf_doc) def read_fwf(filepath_or_buffer, colspecs='infer', widths=None, - infer_nrows=None, **kwds): + infer_nrows=100, **kwds): # Check input arguments. if colspecs is None and widths is None: raise ValueError("Must specify either colspecs or widths") @@ -3373,7 +3372,7 @@ class FixedWidthReader(BaseIterator): """ def __init__(self, f, colspecs, delimiter, comment, skiprows=None, - infer_nrows=None): + infer_nrows=100): self.f = f self.buffer = None self.delimiter = '\r\n' + delimiter if delimiter else '\n\r\t ' @@ -3433,13 +3432,14 @@ def get_rows(self, n, skiprows=None): self.buffer = iter(buffer_rows) return detect_rows - def detect_colspecs(self, n=100, skiprows=None, infer_nrows=None): + def detect_colspecs(self, n=None, skiprows=None, infer_nrows=100): + # infer_nrows replaces n, see GH15138 # Regex escape the delimiters delimiters = ''.join(r'\%s' % x for x in self.delimiter) pattern = re.compile('([^%s]+)' % delimiters) - if infer_nrows: - n = infer_nrows - rows = self.get_rows(n, skiprows) + if n: + infer_nrows = n + rows = self.get_rows(infer_nrows, skiprows) if not rows: raise EmptyDataError("No rows from which to infer column width") max_len = max(map(len, rows)) From cc1ce143fb6036900e169e77cf0d0f9d764b3314 Mon Sep 17 00:00:00 2001 From: rdmontgomery Date: Sun, 25 Nov 2018 14:39:43 -0500 Subject: [PATCH 13/14] Replace internal param with infer_nrows Let infer_nrows replace the internal parameter n and reorder the keywords. --- pandas/io/parsers.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index f883134ea6230..22b506d22debb 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -3451,8 +3451,8 @@ def __init__(self, f, colspecs, delimiter, comment, skiprows=None, self.delimiter = '\r\n' + delimiter if delimiter else '\n\r\t ' self.comment = comment if colspecs == 'infer': - self.colspecs = self.detect_colspecs(skiprows=skiprows, - infer_nrows=infer_nrows) + self.colspecs = self.detect_colspecs(infer_nrows=infer_nrows, + skiprows=skiprows) else: self.colspecs = colspecs @@ -3468,19 +3468,20 @@ def __init__(self, f, colspecs, delimiter, comment, skiprows=None, raise TypeError('Each column specification must be ' '2 element tuple or list of integers') - def get_rows(self, n, skiprows=None): + def get_rows(self, infer_nrows, skiprows=None): """ Read rows from self.f, skipping as specified. - We distinguish buffer_rows (the first <= n lines) - from the rows returned to detect_colspecs because - it's simpler to leave the other locations with - skiprows logic alone than to modify them to deal - with the fact we skipped some rows here as well. + We distinguish buffer_rows (the first <= infer_nrows + lines) from the rows returned to detect_colspecs + because it's simpler to leave the other locations + with skiprows logic alone than to modify them to + deal with the fact we skipped some rows here as + well. Parameters ---------- - n : int + infer_nrows : int Number of rows to read from self.f, not counting rows that are skipped. skiprows: set, optional @@ -3500,18 +3501,15 @@ def get_rows(self, n, skiprows=None): if i not in skiprows: detect_rows.append(row) buffer_rows.append(row) - if len(detect_rows) >= n: + if len(detect_rows) >= infer_nrows: break self.buffer = iter(buffer_rows) return detect_rows - def detect_colspecs(self, n=None, skiprows=None, infer_nrows=100): - # infer_nrows replaces n, see GH15138 + def detect_colspecs(self, infer_nrows=100, skiprows=None): # Regex escape the delimiters delimiters = ''.join(r'\%s' % x for x in self.delimiter) pattern = re.compile('([^%s]+)' % delimiters) - if n: - infer_nrows = n rows = self.get_rows(infer_nrows, skiprows) if not rows: raise EmptyDataError("No rows from which to infer column width") From ecf35d6cf4f918005830488be242208d9d79fd37 Mon Sep 17 00:00:00 2001 From: rdmontgomery Date: Sun, 25 Nov 2018 16:44:16 -0500 Subject: [PATCH 14/14] Add keyword back to function definition Fix bug introduced when master was merged back in and the function definition was changed along with the docstring. --- pandas/io/parsers.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 22b506d22debb..4bbd69cb5c8ad 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -719,8 +719,8 @@ def parser_f(filepath_or_buffer, )(read_table) -def read_fwf(filepath_or_buffer, colspecs='infer', - widths=None, **kwds): +def read_fwf(filepath_or_buffer, colspecs='infer', widths=None, + infer_nrows=100, **kwds): r""" Read a table of fixed-width formatted lines into DataFrame. @@ -753,6 +753,11 @@ def read_fwf(filepath_or_buffer, colspecs='infer', widths : list of int, optional A list of field widths which can be used instead of 'colspecs' if the intervals are contiguous. + infer_nrows : int, default 100 + The number of rows to consider when letting the parser determine the + `colspecs`. + + .. versionadded:: 0.24.0 **kwds : optional Optional keyword arguments can be passed to ``TextFileReader``.