-
-
Notifications
You must be signed in to change notification settings - Fork 18.7k
Typ parts of c parser #44677
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Typ parts of c parser #44677
Changes from 5 commits
92ed9b7
182217a
42b15a2
091b052
a9b4e07
2596f19
95a0de0
9979f41
de71573
3146aae
30c46b2
129d5af
5f32d2f
34795f8
fa8fc9b
6df4cdc
d8fa395
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,19 @@ | ||
from __future__ import annotations | ||
|
||
from typing import ( | ||
Hashable, | ||
Mapping, | ||
Sequence, | ||
) | ||
import warnings | ||
|
||
import numpy as np | ||
|
||
import pandas._libs.parsers as parsers | ||
from pandas._typing import ( | ||
ArrayLike, | ||
DtypeArg, | ||
DtypeObj, | ||
FilePath, | ||
ReadCsvBuffer, | ||
) | ||
|
@@ -20,6 +27,10 @@ | |
from pandas.core.dtypes.concat import union_categoricals | ||
from pandas.core.dtypes.dtypes import ExtensionDtype | ||
|
||
from pandas import ( | ||
Index, | ||
MultiIndex, | ||
) | ||
from pandas.core.indexes.api import ensure_index_from_sequences | ||
|
||
from pandas.io.parsers.base_parser import ( | ||
|
@@ -34,7 +45,7 @@ class CParserWrapper(ParserBase): | |
|
||
def __init__( | ||
self, src: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], **kwds | ||
): | ||
) -> None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Personally, I still like to add it. If mypy sees There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer adding it to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we have some (now outdated) style guidelines for the typing in https://pandas.pydata.org/pandas-docs/dev/development/contributing_codebase.html#style-guidelines. The not including the return type of If this is to be changed, we should be consistent and add it elsewhere (and update the style guidlines). I did have no strong preference originally but now that we have been not adding the redundant There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, removed it |
||
self.kwds = kwds | ||
kwds = kwds.copy() | ||
ParserBase.__init__(self, kwds) | ||
|
@@ -193,7 +204,7 @@ def close(self) -> None: | |
except ValueError: | ||
pass | ||
|
||
def _set_noconvert_columns(self): | ||
def _set_noconvert_columns(self) -> None: | ||
""" | ||
Set the columns that should not undergo dtype conversions. | ||
|
||
|
@@ -214,7 +225,14 @@ def _set_noconvert_columns(self): | |
for col in noconvert_columns: | ||
self._reader.set_noconvert(col) | ||
|
||
def read(self, nrows=None): | ||
def read( | ||
self, | ||
nrows: int | None = None, | ||
) -> tuple[ | ||
Index | MultiIndex | None, | ||
Sequence[Hashable] | MultiIndex, | ||
Mapping[Hashable, ArrayLike], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Return types should be as concrete as possible. If you know it is a list/dict, it probably shouldn't be Sequence/Mapping. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The basic issue here is, if we type some functions as sequence, we also have to type the return types as sequence, because most of the time there is one code branch just passing the inputs along (for example _do_date_conversion). If we want to use lists, we get into a bunch of other issues. Not 100% sure what to do there. Went with Sequence for now There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FWIW i usually use list/tuple/whatever on the theory that "well it's accurate and more specific than Sequence!" and then @simonjayhawkins tells me to use Sequence anyway |
||
]: | ||
try: | ||
if self.low_memory: | ||
chunks = self._reader.read_low_memory(nrows) | ||
|
@@ -306,11 +324,11 @@ def read(self, nrows=None): | |
index, names = self._make_index(date_data, alldata, names) | ||
|
||
# maybe create a mi on the columns | ||
names = self._maybe_make_multi_index_columns(names, self.col_names) | ||
conv_names = self._maybe_make_multi_index_columns(names, self.col_names) | ||
|
||
return index, names, date_data | ||
return index, conv_names, date_data | ||
|
||
def _filter_usecols(self, names): | ||
def _filter_usecols(self, names: Sequence[Hashable]) -> Sequence[Hashable]: | ||
# hackish | ||
usecols = self._evaluate_usecols(self.usecols, names) | ||
if usecols is not None and len(names) != len(usecols): | ||
|
@@ -395,13 +413,15 @@ def _concatenate_chunks(chunks: list[dict[int, ArrayLike]]) -> dict: | |
return result | ||
|
||
|
||
def ensure_dtype_objs(dtype): | ||
def ensure_dtype_objs( | ||
dtype: DtypeArg | Mapping[Hashable, DtypeArg] | None | ||
) -> DtypeObj | Mapping[Hashable, DtypeObj] | None: | ||
twoertwein marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
Ensure we have either None, a dtype object, or a dictionary mapping to | ||
dtype objects. | ||
""" | ||
if isinstance(dtype, dict): | ||
dtype = {k: pandas_dtype(dtype[k]) for k in dtype} | ||
return {k: pandas_dtype(dtype[k]) for k in dtype} | ||
elif dtype is not None: | ||
dtype = pandas_dtype(dtype) | ||
return pandas_dtype(dtype) | ||
return dtype |
Uh oh!
There was an error while loading. Please reload this page.