Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,10 @@ def _hash_pandas_object(
values, encoding=encoding, hash_key=hash_key, categorize=categorize
)

def _cast_pointwise_result(self, values: ArrayLike) -> ArrayLike:
values = np.asarray(values, dtype=object)
return lib.maybe_convert_objects(values, convert_non_numeric=True)

# Signature of "argmin" incompatible with supertype "ExtensionArray"
def argmin(self, axis: AxisInt = 0, skipna: bool = True): # type: ignore[override]
# override base class by adding axis keyword
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,8 @@ def _cast_pointwise_result(self, values) -> ArrayLike:
# e.g. test_by_column_values_with_same_starting_value with nested
# values, one entry of which is an ArrowStringArray
# or test_agg_lambda_complex128_dtype_conversion for complex values
return super()._cast_pointwise_result(values)
values = np.asarray(values, dtype=object)
return lib.maybe_convert_objects(values, convert_non_numeric=True)

if pa.types.is_null(arr.type):
if lib.infer_dtype(values) == "decimal":
Expand Down Expand Up @@ -498,7 +499,8 @@ def _cast_pointwise_result(self, values) -> ArrayLike:
if self.dtype.na_value is np.nan:
# ArrowEA has different semantics, so we return numpy-based
# result instead
return super()._cast_pointwise_result(values)
values = np.asarray(values, dtype=object)
return lib.maybe_convert_objects(values, convert_non_numeric=True)
return ArrowExtensionArray(arr)
return self._from_pyarrow_array(arr)

Expand Down
65 changes: 61 additions & 4 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
cast,
overload,
)
import warnings

import numpy as np

Expand All @@ -33,6 +34,7 @@
cache_readonly,
set_module,
)
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import (
validate_bool_kwarg,
validate_insert_loc,
Expand Down Expand Up @@ -86,6 +88,7 @@
AstypeArg,
AxisInt,
Dtype,
DtypeObj,
FillnaOptions,
InterpolateOptions,
NumpySorter,
Expand Down Expand Up @@ -383,13 +386,67 @@ def _from_factorized(cls, values, original):
"""
raise AbstractMethodError(cls)

def _cast_pointwise_result(self, values) -> ArrayLike:
@classmethod
def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:
"""
Strict analogue to _from_sequence, allowing only sequences of scalars
that should be specifically inferred to the given dtype.

Parameters
----------
scalars : sequence
dtype : ExtensionDtype

Raises
------
TypeError or ValueError

Notes
-----
This is called in a try/except block when casting the result of a
pointwise operation in ExtensionArray._cast_pointwise_result.
"""
try:
return cls._from_sequence(scalars, dtype=dtype, copy=False)
except (ValueError, TypeError):
raise
except Exception:
warnings.warn(
"_from_scalars should only raise ValueError or TypeError. "
"Consider overriding _from_scalars where appropriate.",
stacklevel=find_stack_level(),
)
raise

def _cast_pointwise_result(self, values, **kwargs) -> ArrayLike:
"""
Construct an ExtensionArray after a pointwise operation.

Cast the result of a pointwise operation (e.g. Series.map) to an
array, preserve dtype_backend if possible.
array. This is not required to return an ExtensionArray of the same
type as self or of the same dtype. It can also return another
ExtensionArray of the same "family" if you implement multiple
ExtensionArrays/Dtypes that are interoperable (e.g. if you have float
array with units, this method can return an int array with units).

If converting to your own ExtensionArray is not possible, this method
falls back to returning an array with the default type inference.
If you only need to cast to `self.dtype`, it is recommended to override
`_from_scalars` instead of this method.

Parameters
----------
values : sequence

Returns
-------
ExtensionArray or ndarray
"""
values = np.asarray(values, dtype=object)
return lib.maybe_convert_objects(values, convert_non_numeric=True)
try:
return type(self)._from_scalars(values, dtype=self.dtype)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC this change is bc older geopandas implements _from_scalars? can we catch+deprecate this so that we can eventually re-simplify this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, see my note about that in the top post (first bullet point)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I'm asking you to catch+deprecate

except (ValueError, TypeError):
values = np.asarray(values, dtype=object)
return lib.maybe_convert_objects(values, convert_non_numeric=True)

# ------------------------------------------------------------------------
# Must be a Sequence
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,8 @@ def _from_factorized(cls, values, original) -> Self:
return cls(values, dtype=original.dtype)

def _cast_pointwise_result(self, values):
result = super()._cast_pointwise_result(values)
values = np.asarray(values, dtype=object)
result = lib.maybe_convert_objects(values, convert_non_numeric=True)
if result.dtype.kind == self.dtype.kind:
try:
# e.g. test_groupby_agg_extension
Expand Down
18 changes: 9 additions & 9 deletions pandas/tests/extension/decimal/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,15 @@ def _from_sequence_of_strings(cls, strings, *, dtype: ExtensionDtype, copy=False
def _from_factorized(cls, values, original):
return cls(values)

def _cast_pointwise_result(self, values):
result = super()._cast_pointwise_result(values)
try:
# If this were ever made a non-test EA, special-casing could
# be avoided by handling Decimal in maybe_convert_objects
res = type(self)._from_sequence(result, dtype=self.dtype)
except (ValueError, TypeError):
return result
return res
# test to ensure that the base class _cast_pointwise_result works as expected
# def _cast_pointwise_result(self, values):
# try:
# # If this were ever made a non-test EA, special-casing could
# # be avoided by handling Decimal in maybe_convert_objects
# res = type(self)._from_sequence(values, dtype=self.dtype)
# except (ValueError, TypeError):
# return values
# return res

_HANDLED_TYPES = (decimal.Decimal, numbers.Number, np.ndarray)

Expand Down
9 changes: 6 additions & 3 deletions pandas/tests/extension/json/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,14 @@ def _from_factorized(cls, values, original):
return cls([UserDict(x) for x in values if x != ()])

def _cast_pointwise_result(self, values):
result = super()._cast_pointwise_result(values)
try:
return type(self)._from_sequence(result, dtype=self.dtype)
return type(self)._from_sequence(values, dtype=self.dtype)
except (ValueError, TypeError):
return result
# TODO replace with public function
from pandas._libs import lib

values = np.asarray(values, dtype=object)
return lib.maybe_convert_objects(values, convert_non_numeric=True)

def __getitem__(self, item):
if isinstance(item, tuple):
Expand Down
Loading