From 3efb86e98ef7eaaea6fa18105ffe07908f13efa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agust=C3=ADn=20Herranz?= Date: Sat, 21 Sep 2013 00:39:28 +0200 Subject: [PATCH] BUG/TST: Allow generators in DataFrame.from_record. - nrows implementation doesn't allow unknown size iterator like generators, if nrows = none ends with a TypeError. - To allow generators if nrows=None consume it into a list. - Add two tests of generators input. - Add release notes for #4910 --- doc/source/release.rst | 1 + pandas/core/frame.py | 17 +++++++++++------ pandas/tests/test_frame.py | 30 ++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 78236bbf821dd..0026e8c27d176 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -169,6 +169,7 @@ Improvements to existing features high-dimensional arrays). - :func:`~pandas.read_html` now supports the ``parse_dates``, ``tupleize_cols`` and ``thousands`` parameters (:issue:`4770`). + - ``DataFrame.from_records()`` accept generators (:issue:`4910`) API Changes ~~~~~~~~~~~ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d778fa096f589..1bbaeffff77bc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -724,12 +724,17 @@ def from_records(cls, data, index=None, exclude=None, columns=None, values = [first_row] - i = 1 - for row in data: - values.append(row) - i += 1 - if i >= nrows: - break + #if unknown length iterable (generator) + if nrows == None: + #consume whole generator + values += list(data) + else: + i = 1 + for row in data: + values.append(row) + i += 1 + if i >= nrows: + break if dtype is not None: data = np.array(values, dtype=dtype) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 1e4e988431f43..4c31961cbf8fb 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -3739,6 +3739,36 @@ def test_from_records_iterator(self): nrows=2) assert_frame_equal(df, xp.reindex(columns=['x','y']), check_dtype=False) + def test_from_records_tuples_generator(self): + def tuple_generator(length): + for i in range(length): + letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + yield (i, letters[i % len(letters)], i/length) + + columns_names = ['Integer', 'String', 'Float'] + columns = [[i[j] for i in tuple_generator(10)] for j in range(len(columns_names))] + data = {'Integer': columns[0], 'String': columns[1], 'Float': columns[2]} + expected = DataFrame(data, columns=columns_names) + + generator = tuple_generator(10) + result = DataFrame.from_records(generator, columns=columns_names) + assert_frame_equal(result, expected) + + def test_from_records_lists_generator(self): + def list_generator(length): + for i in range(length): + letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + yield [i, letters[i % len(letters)], i/length] + + columns_names = ['Integer', 'String', 'Float'] + columns = [[i[j] for i in list_generator(10)] for j in range(len(columns_names))] + data = {'Integer': columns[0], 'String': columns[1], 'Float': columns[2]} + expected = DataFrame(data, columns=columns_names) + + generator = list_generator(10) + result = DataFrame.from_records(generator, columns=columns_names) + assert_frame_equal(result, expected) + def test_from_records_columns_not_modified(self): tuples = [(1, 2, 3), (1, 2, 3),