Skip to content

Commit 3466934

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into concat-empty-ea
2 parents bd316c3 + df5eee6 commit 3466934

File tree

303 files changed

+7915
-4865
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

303 files changed

+7915
-4865
lines changed

asv_bench/benchmarks/arithmetic.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def time_series_op_with_fill_value_no_nas(self):
6767
self.ser.add(self.ser, fill_value=4)
6868

6969

70-
class MixedFrameWithSeriesAxis0:
70+
class MixedFrameWithSeriesAxis:
7171
params = [
7272
[
7373
"eq",
@@ -78,7 +78,7 @@ class MixedFrameWithSeriesAxis0:
7878
"gt",
7979
"add",
8080
"sub",
81-
"div",
81+
"truediv",
8282
"floordiv",
8383
"mul",
8484
"pow",
@@ -87,15 +87,19 @@ class MixedFrameWithSeriesAxis0:
8787
param_names = ["opname"]
8888

8989
def setup(self, opname):
90-
arr = np.arange(10 ** 6).reshape(100, -1)
90+
arr = np.arange(10 ** 6).reshape(1000, -1)
9191
df = DataFrame(arr)
9292
df["C"] = 1.0
9393
self.df = df
9494
self.ser = df[0]
95+
self.row = df.iloc[0]
9596

9697
def time_frame_op_with_series_axis0(self, opname):
9798
getattr(self.df, opname)(self.ser, axis=0)
9899

100+
def time_frame_op_with_series_axis1(self, opname):
101+
getattr(operator, opname)(self.df, self.ser)
102+
99103

100104
class Ops:
101105

asv_bench/benchmarks/frame_methods.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ def setup(self):
564564

565565
def time_frame_get_dtype_counts(self):
566566
with warnings.catch_warnings(record=True):
567-
self.df._data.get_dtype_counts()
567+
self.df.dtypes.value_counts()
568568

569569
def time_info(self):
570570
self.df.info()

asv_bench/benchmarks/groupby.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,4 +626,96 @@ def time_first(self):
626626
self.df_nans.groupby("key").transform("first")
627627

628628

629+
class TransformEngine:
630+
def setup(self):
631+
N = 10 ** 3
632+
data = DataFrame(
633+
{0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
634+
columns=[0, 1],
635+
)
636+
self.grouper = data.groupby(0)
637+
638+
def time_series_numba(self):
639+
def function(values, index):
640+
return values * 5
641+
642+
self.grouper[1].transform(function, engine="numba")
643+
644+
def time_series_cython(self):
645+
def function(values):
646+
return values * 5
647+
648+
self.grouper[1].transform(function, engine="cython")
649+
650+
def time_dataframe_numba(self):
651+
def function(values, index):
652+
return values * 5
653+
654+
self.grouper.transform(function, engine="numba")
655+
656+
def time_dataframe_cython(self):
657+
def function(values):
658+
return values * 5
659+
660+
self.grouper.transform(function, engine="cython")
661+
662+
663+
class AggEngine:
664+
def setup(self):
665+
N = 10 ** 3
666+
data = DataFrame(
667+
{0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
668+
columns=[0, 1],
669+
)
670+
self.grouper = data.groupby(0)
671+
672+
def time_series_numba(self):
673+
def function(values, index):
674+
total = 0
675+
for i, value in enumerate(values):
676+
if i % 2:
677+
total += value + 5
678+
else:
679+
total += value * 2
680+
return total
681+
682+
self.grouper[1].agg(function, engine="numba")
683+
684+
def time_series_cython(self):
685+
def function(values):
686+
total = 0
687+
for i, value in enumerate(values):
688+
if i % 2:
689+
total += value + 5
690+
else:
691+
total += value * 2
692+
return total
693+
694+
self.grouper[1].agg(function, engine="cython")
695+
696+
def time_dataframe_numba(self):
697+
def function(values, index):
698+
total = 0
699+
for i, value in enumerate(values):
700+
if i % 2:
701+
total += value + 5
702+
else:
703+
total += value * 2
704+
return total
705+
706+
self.grouper.agg(function, engine="numba")
707+
708+
def time_dataframe_cython(self):
709+
def function(values):
710+
total = 0
711+
for i, value in enumerate(values):
712+
if i % 2:
713+
total += value + 5
714+
else:
715+
total += value * 2
716+
return total
717+
718+
self.grouper.agg(function, engine="cython")
719+
720+
629721
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/rolling.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -150,19 +150,18 @@ def time_quantile(self, constructor, window, dtype, percentile, interpolation):
150150
self.roll.quantile(percentile, interpolation=interpolation)
151151

152152

153-
class PeakMemFixed:
154-
def setup(self):
155-
N = 10
156-
arr = 100 * np.random.random(N)
157-
self.roll = pd.Series(arr).rolling(10)
158-
159-
def peakmem_fixed(self):
160-
# GH 25926
161-
# This is to detect memory leaks in rolling operations.
162-
# To save time this is only ran on one method.
163-
# 6000 iterations is enough for most types of leaks to be detected
164-
for x in range(6000):
165-
self.roll.max()
153+
class PeakMemFixedWindowMinMax:
154+
155+
params = ["min", "max"]
156+
157+
def setup(self, operation):
158+
N = int(1e6)
159+
arr = np.random.random(N)
160+
self.roll = pd.Series(arr).rolling(2)
161+
162+
def peakmem_fixed(self, operation):
163+
for x in range(5):
164+
getattr(self.roll, operation)()
166165

167166

168167
class ForwardWindowMethods:

asv_bench/benchmarks/stat_ops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ class FrameOps:
1111
param_names = ["op", "dtype", "axis"]
1212

1313
def setup(self, op, dtype, axis):
14-
if op == "mad" and dtype == "Int64" and axis == 1:
15-
# GH-33036
14+
if op == "mad" and dtype == "Int64":
15+
# GH-33036, GH#33600
1616
raise NotImplementedError
1717
values = np.random.randn(100000, 4)
1818
if dtype == "Int64":

ci/code_checks.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,13 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
150150
# Check for imports from pandas._testing instead of `import pandas._testing as tm`
151151
invgrep -R --include="*.py*" -E "from pandas._testing import" pandas/tests
152152
RET=$(($RET + $?)) ; echo $MSG "DONE"
153-
invgrep -R --include="*.py*" -E "from pandas.util import testing as tm" pandas/tests
153+
invgrep -R --include="*.py*" -E "from pandas import _testing as tm" pandas/tests
154+
RET=$(($RET + $?)) ; echo $MSG "DONE"
155+
156+
# No direct imports from conftest
157+
invgrep -R --include="*.py*" -E "conftest import" pandas/tests
158+
RET=$(($RET + $?)) ; echo $MSG "DONE"
159+
invgrep -R --include="*.py*" -E "import conftest" pandas/tests
154160
RET=$(($RET + $?)) ; echo $MSG "DONE"
155161

156162
MSG='Check for use of exec' ; echo $MSG

ci/deps/azure-36-minimum_versions.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ dependencies:
2121
- numexpr=2.6.2
2222
- numpy=1.13.3
2323
- openpyxl=2.5.7
24-
- pytables=3.4.2
24+
- pytables=3.4.3
2525
- python-dateutil=2.7.3
2626
- pytz=2017.2
2727
- scipy=0.19.0

doc/source/getting_started/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ data set, a sliding window of the data or grouped by categories. The latter is a
398398
<div class="card-body">
399399

400400
Change the structure of your data table in multiple ways. You can :func:`~pandas.melt` your data table from wide to long/tidy form or :func:`~pandas.pivot`
401-
from long to wide format. With aggregations built-in, a pivot table is created with a sinlge command.
401+
from long to wide format. With aggregations built-in, a pivot table is created with a single command.
402402

403403
.. image:: ../_static/schemas/07_melt.svg
404404
:align: center

doc/source/getting_started/install.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ BeautifulSoup4 4.6.0 HTML parser for read_html (see :ref
262262
Jinja2 Conditional formatting with DataFrame.style
263263
PyQt4 Clipboard I/O
264264
PyQt5 Clipboard I/O
265-
PyTables 3.4.2 HDF5-based reading / writing
265+
PyTables 3.4.3 HDF5-based reading / writing
266266
SQLAlchemy 1.1.4 SQL support for databases other than sqlite
267267
SciPy 0.19.0 Miscellaneous statistical functions
268268
XLsxWriter 0.9.8 Excel writing
@@ -279,7 +279,7 @@ psycopg2 PostgreSQL engine for sqlalchemy
279279
pyarrow 0.12.0 Parquet, ORC (requires 0.13.0), and feather reading / writing
280280
pymysql 0.7.11 MySQL engine for sqlalchemy
281281
pyreadstat SPSS files (.sav) reading
282-
pytables 3.4.2 HDF5 reading / writing
282+
pytables 3.4.3 HDF5 reading / writing
283283
pyxlsb 1.0.6 Reading for xlsb files
284284
qtpy Clipboard I/O
285285
s3fs 0.3.0 Amazon S3 access

doc/source/getting_started/intro_tutorials/03_subset_data.rst

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
<div class="card-body">
2424
<p class="card-text">
2525

26-
This tutorial uses the titanic data set, stored as CSV. The data
26+
This tutorial uses the Titanic data set, stored as CSV. The data
2727
consists of the following data columns:
2828

2929
- PassengerId: Id of every passenger.
@@ -72,7 +72,7 @@ How do I select specific columns from a ``DataFrame``?
7272
<ul class="task-bullet">
7373
<li>
7474

75-
I’m interested in the age of the titanic passengers.
75+
I’m interested in the age of the Titanic passengers.
7676

7777
.. ipython:: python
7878
@@ -111,7 +111,7 @@ the number of rows is returned.
111111
<ul class="task-bullet">
112112
<li>
113113

114-
I’m interested in the age and sex of the titanic passengers.
114+
I’m interested in the age and sex of the Titanic passengers.
115115

116116
.. ipython:: python
117117
@@ -198,7 +198,7 @@ can be used to filter the ``DataFrame`` by putting it in between the
198198
selection brackets ``[]``. Only rows for which the value is ``True``
199199
will be selected.
200200

201-
We now from before that the original titanic ``DataFrame`` consists of
201+
We know from before that the original Titanic ``DataFrame`` consists of
202202
891 rows. Let’s have a look at the amount of rows which satisfy the
203203
condition by checking the ``shape`` attribute of the resulting
204204
``DataFrame`` ``above_35``:
@@ -212,7 +212,7 @@ condition by checking the ``shape`` attribute of the resulting
212212
<ul class="task-bullet">
213213
<li>
214214

215-
I’m interested in the titanic passengers from cabin class 2 and 3.
215+
I’m interested in the Titanic passengers from cabin class 2 and 3.
216216

217217
.. ipython:: python
218218

0 commit comments

Comments
 (0)