diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index b80a127be970d..4dc5e7516db7e 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -791,7 +791,8 @@ cdef class StringHashTable(HashTable): raise KeyError(key) @cython.boundscheck(False) - def get_indexer(self, ndarray[object] values): + def get_indexer(self, ndarray[object] values) -> ndarray: + # -> np.ndarray[np.intp] cdef: Py_ssize_t i, n = len(values) ndarray[intp_t] labels = np.empty(n, dtype=np.intp) diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index 477c9fd655a4a..4c647056641f5 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -4,6 +4,7 @@ from typing import ( Any, Callable, + Generator, ) import numpy as np @@ -52,8 +53,7 @@ def is_bool_array(values: np.ndarray, skipna: bool = False): ... def fast_multiget(mapping: dict, keys: np.ndarray, default=np.nan) -> ArrayLike: ... -# TODO: gen: Generator? -def fast_unique_multiple_list_gen(gen: object, sort: bool = True) -> list: ... +def fast_unique_multiple_list_gen(gen: Generator, sort: bool = True) -> list: ... def fast_unique_multiple_list(lists: list, sort: bool = True) -> list: ... def fast_unique_multiple(arrays: list, sort: bool = True) -> list: ... @@ -90,10 +90,9 @@ def infer_datetimelike_array( arr: np.ndarray # np.ndarray[object] ) -> str: ... -# TODO: new_dtype -> np.dtype? def astype_intsafe( arr: np.ndarray, # np.ndarray[object] - new_dtype, + new_dtype: np.dtype, ) -> np.ndarray: ... def fast_zip(ndarrays: list) -> np.ndarray: ... # np.ndarray[object] @@ -134,15 +133,13 @@ def memory_usage_of_objects( ) -> int: ... # np.int64 -# TODO: f: Callable? -# TODO: dtype -> DtypeObj? def map_infer_mask( arr: np.ndarray, f: Callable[[Any], Any], mask: np.ndarray, # const uint8_t[:] convert: bool = ..., na_value: Any = ..., - dtype: Any = ..., + dtype: np.dtype = ..., ) -> ArrayLike: ... def indices_fast( diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e816bd4cd4026..a5ed650d72911 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -633,7 +633,7 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool: @cython.wraparound(False) @cython.boundscheck(False) -def astype_intsafe(ndarray[object] arr, new_dtype) -> ndarray: +def astype_intsafe(ndarray[object] arr, cnp.dtype new_dtype) -> ndarray: cdef: Py_ssize_t i, n = len(arr) object val @@ -661,7 +661,8 @@ cpdef ndarray[object] ensure_string_array( bint copy=True, bint skipna=True, ): - """Returns a new numpy array with object dtype and only strings and na values. + """ + Returns a new numpy array with object dtype and only strings and na values. Parameters ---------- @@ -679,7 +680,7 @@ cpdef ndarray[object] ensure_string_array( Returns ------- - ndarray + np.ndarray[object] An array with the input array's elements casted to str or nan-like. """ cdef: @@ -2452,7 +2453,8 @@ no_default = NoDefault.no_default # Sentinel indicating the default value. @cython.boundscheck(False) @cython.wraparound(False) def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=True, - object na_value=no_default, object dtype=object) -> "ArrayLike": + object na_value=no_default, cnp.dtype dtype=np.dtype(object) + ) -> "ArrayLike": """ Substitute for np.vectorize with pandas-friendly dtype inference. @@ -2472,7 +2474,7 @@ def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=Tr Returns ------- - ndarray + np.ndarray or ExtensionArray """ cdef: Py_ssize_t i, n diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index d6ca38e57d2d8..4d55967c1e135 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -93,7 +93,7 @@ def build_field_sarray(const int64_t[:] dtindex): return out -def month_position_check(fields, weekdays): +def month_position_check(fields, weekdays) -> str | None: cdef: int32_t daysinmonth, y, m, d bint calendar_end = True @@ -755,7 +755,7 @@ cdef inline ndarray[int64_t] _roundup_int64(values, int64_t unit): return _floor_int64(values + unit // 2, unit) -def round_nsint64(values: np.ndarray, mode: RoundTo, nanos) -> np.ndarray: +def round_nsint64(values: np.ndarray, mode: RoundTo, nanos: int) -> np.ndarray: """ Applies rounding mode at given frequency diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 02731bd4fbbc1..5a2643dd531ed 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -609,7 +609,7 @@ def argsort( Returns ------- - ndarray + np.ndarray[np.intp] Array of indices that sort ``self``. If NaN values are contained, NaN values are placed at the end. diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index f2b5ad447a0cf..272cf19be559c 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1599,7 +1599,7 @@ def argsort(self, ascending=True, kind="quicksort", **kwargs): Returns ------- - numpy.array + np.ndarray[np.intp] See Also -------- diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index ee68f5558a651..087ce415cc4ba 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -135,10 +135,10 @@ class TimedeltaArray(dtl.TimelikeOps): # define my properties & methods for delegation _other_ops: list[str] = [] _bool_ops: list[str] = [] - _object_ops = ["freq"] - _field_ops = ["days", "seconds", "microseconds", "nanoseconds"] - _datetimelike_ops = _field_ops + _object_ops + _bool_ops - _datetimelike_methods = [ + _object_ops: list[str] = ["freq"] + _field_ops: list[str] = ["days", "seconds", "microseconds", "nanoseconds"] + _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops + _datetimelike_methods: list[str] = [ "to_pytimedelta", "total_seconds", "round", diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 5656323b82fb7..f56e13775460b 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -164,7 +164,7 @@ def _get_combined_index( return index -def union_indexes(indexes, sort=True) -> Index: +def union_indexes(indexes, sort: bool = True) -> Index: """ Return the union of indexes. @@ -273,7 +273,7 @@ def _sanitize_and_check(indexes): return indexes, "array" -def all_indexes_same(indexes): +def all_indexes_same(indexes) -> bool: """ Determine if all indexes contain the same elements. diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c79518702169a..310ee4c3a63e3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -215,7 +215,7 @@ def join( return cast(F, join) -def disallow_kwargs(kwargs: dict[str, Any]): +def disallow_kwargs(kwargs: dict[str, Any]) -> None: if kwargs: raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}") @@ -626,7 +626,7 @@ def _maybe_check_unique(self) -> None: raise DuplicateLabelError(msg) @final - def _format_duplicate_message(self): + def _format_duplicate_message(self) -> DataFrame: """ Construct the DataFrame for a DuplicateLabelError. @@ -789,7 +789,7 @@ def __array_wrap__(self, result, context=None): return Index(result, **attrs) @cache_readonly - def dtype(self): + def dtype(self) -> DtypeObj: """ Return the dtype object of the underlying data. """ @@ -1064,11 +1064,11 @@ def copy( return new_index @final - def __copy__(self, **kwargs): + def __copy__(self: _IndexT, **kwargs) -> _IndexT: return self.copy(**kwargs) @final - def __deepcopy__(self, memo=None): + def __deepcopy__(self: _IndexT, memo=None) -> _IndexT: """ Parameters ---------- @@ -1354,7 +1354,7 @@ def to_series(self, index=None, name: Hashable = None) -> Series: return Series(self._values.copy(), index=index, name=name) - def to_frame(self, index: bool = True, name=None) -> DataFrame: + def to_frame(self, index: bool = True, name: Hashable = None) -> DataFrame: """ Create a DataFrame with a column containing the Index. @@ -1426,7 +1426,7 @@ def name(self): return self._name @name.setter - def name(self, value): + def name(self, value: Hashable): if self._no_setting_name: # Used in MultiIndex.levels to avoid silently ignoring name updates. raise RuntimeError( @@ -2367,7 +2367,7 @@ def _is_all_dates(self) -> bool: @cache_readonly @final - def is_all_dates(self): + def is_all_dates(self) -> bool: """ Whether or not the index values only consist of dates. """ @@ -3380,7 +3380,7 @@ def get_loc(self, key, method=None, tolerance=None): Returns ------- - indexer : ndarray of int + indexer : np.ndarray[np.intp] Integers from 0 to n - 1 indicating that the index at these positions matches the corresponding target values. Missing values in the target are marked by -1. @@ -4610,7 +4610,7 @@ def _can_hold_identifiers_and_holds_name(self, name) -> bool: return name in self return False - def append(self, other) -> Index: + def append(self, other: Index | Sequence[Index]) -> Index: """ Append a collection of Index options together. @@ -4627,7 +4627,9 @@ def append(self, other) -> Index: if isinstance(other, (list, tuple)): to_concat += list(other) else: - to_concat.append(other) + # error: Argument 1 to "append" of "list" has incompatible type + # "Union[Index, Sequence[Index]]"; expected "Index" + to_concat.append(other) # type: ignore[arg-type] for obj in to_concat: if not isinstance(obj, Index): @@ -5181,11 +5183,11 @@ def set_value(self, arr, key, value): Returns ------- - indexer : ndarray of int + indexer : np.ndarray[np.intp] Integers from 0 to n - 1 indicating that the index at these positions matches the corresponding target values. Missing values in the target are marked by -1. - missing : ndarray of int + missing : np.ndarray[np.intp] An indexer into the target of the values not found. These correspond to the -1 in the indexer array. """ @@ -5227,7 +5229,7 @@ def get_indexer_for(self, target, **kwargs) -> np.ndarray: Returns ------- - numpy.ndarray + np.ndarray[np.intp] List of indices. """ if self._index_as_unique: diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 724caebd69c23..5b98b956e33e6 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -457,8 +457,8 @@ def reindex( # in which case we are going to conform to the passed Categorical new_target = np.asarray(new_target) if is_categorical_dtype(target): - new_target = Categorical(new_target, dtype=target.dtype) - new_target = type(self)._simple_new(new_target, name=self.name) + cat = Categorical(new_target, dtype=target.dtype) + new_target = type(self)._simple_new(cat, name=self.name) else: new_target = Index(new_target, name=self.name) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 9f02196466ebf..f77f28deecf57 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -391,7 +391,7 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: # -------------------------------------------------------------------- # Rendering Methods - def _mpl_repr(self): + def _mpl_repr(self) -> np.ndarray: # how to represent ourselves to matplotlib return ints_to_pydatetime(self.asi8, self.tz) @@ -448,7 +448,7 @@ def _maybe_utc_convert(self, other: Index) -> tuple[DatetimeIndex, Index]: # -------------------------------------------------------------------- - def _get_time_micros(self): + def _get_time_micros(self) -> np.ndarray: """ Return the number of microseconds since midnight. @@ -541,7 +541,7 @@ def to_series(self, keep_tz=lib.no_default, index=None, name=None): return Series(values, index=index, name=name) - def snap(self, freq="S"): + def snap(self, freq="S") -> DatetimeIndex: """ Snap time stamps to nearest occurring frequency. @@ -891,7 +891,7 @@ def indexer_at_time(self, time, asof: bool = False) -> np.ndarray: else: time_micros = self._get_time_micros() micros = _time_to_micros(time) - return (micros == time_micros).nonzero()[0] + return (time_micros == micros).nonzero()[0] def indexer_between_time( self, start_time, end_time, include_start: bool = True, include_end: bool = True diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index f7ab09e4f176f..171ab57264f85 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -390,7 +390,7 @@ def from_tuples( # -------------------------------------------------------------------- @cache_readonly - def _engine(self): + def _engine(self) -> IntervalTree: left = self._maybe_convert_i8(self.left) right = self._maybe_convert_i8(self.right) return IntervalTree(left, right, closed=self.closed) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 5b4f3e1bb9e09..59ff128713aca 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2673,6 +2673,7 @@ def _get_indexer( limit: int | None = None, tolerance=None, ) -> np.ndarray: + # returned ndarray is np.intp # empty indexer if not len(target): diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 8e8c67927c20f..1e974063bd839 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -249,7 +249,7 @@ def _format_with_header(self, header: list[str], na_rep: str = "NaN") -> list[st ) @property - def start(self): + def start(self) -> int: """ The value of the `start` parameter (``0`` if this was not supplied). """ @@ -257,7 +257,7 @@ def start(self): return self._range.start @property - def _start(self): + def _start(self) -> int: """ The value of the `start` parameter (``0`` if this was not supplied). @@ -272,14 +272,14 @@ def _start(self): return self.start @property - def stop(self): + def stop(self) -> int: """ The value of the `stop` parameter. """ return self._range.stop @property - def _stop(self): + def _stop(self) -> int: """ The value of the `stop` parameter. @@ -295,7 +295,7 @@ def _stop(self): return self.stop @property - def step(self): + def step(self) -> int: """ The value of the `step` parameter (``1`` if this was not supplied). """ @@ -303,7 +303,7 @@ def step(self): return self._range.step @property - def _step(self): + def _step(self) -> int: """ The value of the `step` parameter (``1`` if this was not supplied). @@ -405,6 +405,7 @@ def _get_indexer( limit: int | None = None, tolerance=None, ) -> np.ndarray: + # -> np.ndarray[np.intp] if com.any_not_none(method, tolerance, limit): return super()._get_indexer( target, method=method, tolerance=tolerance, limit=limit @@ -522,7 +523,7 @@ def argsort(self, *args, **kwargs) -> np.ndarray: Returns ------- - argsorted : numpy array + np.ndarray[np.intp] See Also -------- @@ -532,9 +533,9 @@ def argsort(self, *args, **kwargs) -> np.ndarray: nv.validate_argsort(args, kwargs) if self._range.step > 0: - result = np.arange(len(self)) + result = np.arange(len(self), dtype=np.intp) else: - result = np.arange(len(self) - 1, -1, -1) + result = np.arange(len(self) - 1, -1, -1, dtype=np.intp) if not ascending: result = result[::-1] @@ -759,7 +760,7 @@ def symmetric_difference(self, other, result_name: Hashable = None, sort=None): # -------------------------------------------------------------------- - def _concat(self, indexes, name: Hashable): + def _concat(self, indexes: list[Index], name: Hashable): """ Overriding parent method for the case of all RangeIndex instances. @@ -780,7 +781,8 @@ def _concat(self, indexes, name: Hashable): non_empty_indexes = [obj for obj in indexes if len(obj)] for obj in non_empty_indexes: - rng: range = obj._range + # error: "Index" has no attribute "_range" + rng: range = obj._range # type: ignore[attr-defined] if start is None: # This is set by the first non-empty index @@ -808,7 +810,12 @@ def _concat(self, indexes, name: Hashable): if non_empty_indexes: # Get the stop value from "next" or alternatively # from the last non-empty index - stop = non_empty_indexes[-1].stop if next_ is None else next_ + # error: "Index" has no attribute "stop" + stop = ( + non_empty_indexes[-1].stop # type: ignore[attr-defined] + if next_ is None + else next_ + ) return RangeIndex(start, stop, step).rename(name) # Here all "indexes" had 0 length, i.e. were empty. diff --git a/pandas/core/series.py b/pandas/core/series.py index 5c605a6b441c6..fac87515c7d96 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2756,13 +2756,15 @@ def __rmatmul__(self, other): return self.dot(np.transpose(other)) @doc(base.IndexOpsMixin.searchsorted, klass="Series") - def searchsorted(self, value, side="left", sorter=None): + def searchsorted(self, value, side="left", sorter=None) -> np.ndarray: return algorithms.searchsorted(self._values, value, side=side, sorter=sorter) # ------------------------------------------------------------------- # Combination - def append(self, to_append, ignore_index=False, verify_integrity=False): + def append( + self, to_append, ignore_index: bool = False, verify_integrity: bool = False + ): """ Concatenate two or more Series. @@ -2846,7 +2848,7 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): to_concat, ignore_index=ignore_index, verify_integrity=verify_integrity ) - def _binop(self, other, func, level=None, fill_value=None): + def _binop(self, other: Series, func, level=None, fill_value=None): """ Perform generic binary operation with optional fill value. @@ -3609,7 +3611,7 @@ def argsort(self, axis=0, kind="quicksort", order=None) -> Series: Returns ------- - Series + Series[np.intp] Positions of values within the sort order with -1 indicating nan values. @@ -3730,7 +3732,7 @@ def nlargest(self, n=5, keep="first") -> Series: """ return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest() - def nsmallest(self, n=5, keep="first") -> Series: + def nsmallest(self, n: int = 5, keep: str = "first") -> Series: """ Return the smallest `n` elements. @@ -3942,7 +3944,7 @@ def explode(self, ignore_index: bool = False) -> Series: return self._constructor(values, index=index, name=self.name) - def unstack(self, level=-1, fill_value=None): + def unstack(self, level=-1, fill_value=None) -> DataFrame: """ Unstack, also known as pivot, Series with MultiIndex to produce DataFrame. @@ -4294,7 +4296,11 @@ def _reduce( with np.errstate(all="ignore"): return op(delegate, skipna=skipna, **kwds) - def _reindex_indexer(self, new_index, indexer, copy): + def _reindex_indexer( + self, new_index: Index | None, indexer: np.ndarray | None, copy: bool + ) -> Series: + # Note: new_index is None iff indexer is None + # if not None, indexer is np.intp if indexer is None: if copy: return self.copy()