-
-
Notifications
You must be signed in to change notification settings - Fork 18.7k
Description
pd.qcut
doesn't seem to support ndarray
type. However, docstrings points x : ndarray or Series
In [46]: d = range(5)
In [47]: d
Out[47]: [0, 1, 2, 3, 4]
In [48]: pd.qcut(d, [0, 1])
Out[48]:
[(-0.001, 4.0], (-0.001, 4.0], (-0.001, 4.0], (-0.001, 4.0], (-0.001, 4.0]]
Categories (1, interval[float64]): [(-0.001, 4.0]]
In [49]: d = np.array(range(5))
In [50]: d
Out[50]: array([0, 1, 2, 3, 4])
In [51]: pd.qcut(d, [0, 1])
Out[51]:
[(-0.001, 4.0], (-0.001, 4.0], (-0.001, 4.0], (-0.001, 4.0], (-0.001, 4.0]]
Categories (1, interval[float64]): [(-0.001, 4.0]]
In [52]: d = np.array([[x] for x in range(5)])
In [53]: type(d)
Out[53]: numpy.ndarray
In [54]: d
Out[54]:
array([[0],
[1],
[2],
[3],
[4]])
In [55]: pd.qcut(d, [0, 1])
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-55-b172e74b0ffd> in <module>()
----> 1 pd.qcut(d, [0, 1])
e:\github\pandas\pandas\core\reshape\tile.pyc in qcut(x, q, labels, retbins, precision, duplicates)
206 fac, bins = _bins_to_cuts(x, bins, labels=labels,
207 precision=precision, include_lowest=True,
--> 208 dtype=dtype, duplicates=duplicates)
209
210 return _postprocess_for_cut(fac, bins, retbins, x_is_series,
e:\github\pandas\pandas\core\reshape\tile.pyc in _bins_to_cuts(x, bins, right, labels, precision, include_lowest, dtype, duplicates)
258
259 np.putmask(ids, na_mask, 0)
--> 260 result = algos.take_nd(labels, ids - 1)
261
262 else:
e:\github\pandas\pandas\core\algorithms.pyc in take_nd(arr, indexer, axis, out, fill_value, mask_info, allow_fill)
1318 if is_categorical(arr):
1319 return arr.take_nd(indexer, fill_value=fill_value,
-> 1320 allow_fill=allow_fill)
1321 elif is_datetimetz(arr):
1322 return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
e:\github\pandas\pandas\core\categorical.pyc in take_nd(self, indexer, allow_fill, fill_value)
1703 assert isna(fill_value)
1704
-> 1705 codes = take_1d(self._codes, indexer, allow_fill=True, fill_value=-1)
1706 result = self._constructor(codes, categories=self.categories,
1707 ordered=self.ordered, fastpath=True)
e:\github\pandas\pandas\core\algorithms.pyc in take_nd(arr, indexer, axis, out, fill_value, mask_info, allow_fill)
1381 func = _get_take_nd_function(arr.ndim, arr.dtype, out.dtype, axis=axis,
1382 mask_info=mask_info)
-> 1383 func(arr, indexer, out, fill_value)
1384
1385 if flip_order:
e:\github\pandas\pandas\_libs\algos_take_helper.pxi in pandas._libs.algos.take_1d_int8_int8 (pandas\_libs\algos.c:75951)()
560 @cython.boundscheck(False)
561 def take_1d_int8_int8(ndarray[int8_t, ndim=1] values,
--> 562 int64_t[:] indexer,
563 int8_t[:] out,
564 fill_value=np.nan):
ValueError: Buffer has wrong number of dimensions (expected 1, got 2)
Problem description
pd.qcut
doesn't seem to support ndarray
type. However, docstrings points x : ndarray or Series
.
Is this expected behavior or does it imply ndarray means for shape (n, )
only?
I can confirm this was working in earlier versions.
Output of pd.show_versions()
pandas: 0.22.0.dev0+16.gd70526b
pytest: 3.2.0
pip: 9.0.1
setuptools: 36.2.7
Cython: 0.24.1
numpy: 1.12.1
scipy: 0.19.1
pyarrow: None
xarray: None
IPython: 5.1.0
sphinx: 1.4.6
patsy: 0.4.1
dateutil: 2.5.3
pytz: 2017.2
blosc: None
bottleneck: 1.2.0
tables: 3.2.2
numexpr: 2.6.2
feather: None
matplotlib: 2.0.2
openpyxl: 2.3.2
xlrd: 1.0.0
xlwt: 1.1.2
xlsxwriter: 0.9.3
lxml: 3.6.4
bs4: 4.5.1
html5lib: 0.999999999
sqlalchemy: 1.0.13
pymysql: 0.7.9.None
psycopg2: 2.7.3.1 (dt dec pq3 ext lo64)
jinja2: 2.8
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: None