-
-
Notifications
You must be signed in to change notification settings - Fork 19.4k
API: Copy inputs in Index subclass constructors by default (GH#63388) #63398
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
0537d03
3b7b85b
67eaf05
f59ab9e
9221e5a
32ee4dc
b55f4a0
91e78da
1252782
923c949
122f554
66a37fe
5a211cc
9373333
bc3c54a
6731252
9acc8d1
180fa7e
f7a00a4
ca90c29
75a3aac
e689f7b
65d8012
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -5196,6 +5196,19 @@ def _raise_scalar_data_error(cls, data): | |||||
| "was passed" | ||||||
| ) | ||||||
|
|
||||||
| @classmethod | ||||||
| def _maybe_copy_input(cls, data, copy, dtype): | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| """ | ||||||
| Ensure that the input data is copied if necessary. | ||||||
| GH#63388 | ||||||
| """ | ||||||
| if isinstance(data, (ExtensionArray, np.ndarray)): | ||||||
| if copy is not False: | ||||||
| if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): | ||||||
| data = data.copy() | ||||||
| copy = False | ||||||
| return data, copy | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Then you can remove the |
||||||
|
|
||||||
| def _validate_fill_value(self, value): | ||||||
| """ | ||||||
| Check if the value can be inserted into our array without casting, | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,7 +27,9 @@ | |
| set_module, | ||
| ) | ||
|
|
||
| from pandas.core.dtypes.common import is_integer | ||
| from pandas.core.dtypes.common import ( | ||
| is_integer, | ||
| ) | ||
|
Comment on lines
+30
to
+32
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you undo these import changes if the contents haven't changed in your PR? |
||
| from pandas.core.dtypes.dtypes import PeriodDtype | ||
| from pandas.core.dtypes.generic import ABCSeries | ||
| from pandas.core.dtypes.missing import is_valid_na_for_dtype | ||
|
|
@@ -101,8 +103,13 @@ class PeriodIndex(DatetimeIndexOpsMixin): | |
| One of pandas period strings or corresponding objects. | ||
| dtype : str or PeriodDtype, default None | ||
| A dtype from which to extract a freq. | ||
| copy : bool | ||
| Make a copy of input ndarray. | ||
| copy : bool, default None | ||
| Whether to copy input data, only relevant for array, Series, and Index | ||
| inputs (for other input, e.g. a list, a new array is created anyway). | ||
| Defaults to True for array input and False for Index/Series. | ||
| Set to False to avoid copying array input at your own risk (if you | ||
| know the input data won't be modified elsewhere). | ||
| Set to True to force copying Series/Index input up front. | ||
| name : str, default None | ||
| Name of the resulting PeriodIndex. | ||
|
|
@@ -220,7 +227,7 @@ def __new__( | |
| data=None, | ||
| freq=None, | ||
| dtype: Dtype | None = None, | ||
| copy: bool = False, | ||
| copy: bool | None = None, | ||
| name: Hashable | None = None, | ||
| ) -> Self: | ||
| refs = None | ||
|
|
@@ -231,6 +238,9 @@ def __new__( | |
|
|
||
| freq = validate_dtype_freq(dtype, freq) | ||
|
|
||
| # GH#63388 | ||
| data, copy = cls._maybe_copy_input(data, copy, dtype) | ||
|
|
||
| # PeriodIndex allow PeriodIndex(period_index, freq=different) | ||
| # Let's not encourage that kind of behavior in PeriodArray. | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| import numpy as np | ||
|
|
||
| from pandas import ( | ||
| Interval, | ||
| IntervalIndex, | ||
| Series, | ||
| array, | ||
| ) | ||
| import pandas._testing as tm | ||
| from pandas.tests.copy_view.util import get_array | ||
|
|
||
|
|
||
| def test_constructor_copy_input_interval_ea_default(): | ||
| # GH 63388 | ||
| arr = array([Interval(0, 1), Interval(1, 2)]) | ||
| idx = IntervalIndex(arr) | ||
| assert not tm.shares_memory(arr, idx.array) | ||
|
|
||
|
|
||
| def test_series_from_temporary_intervalindex_readonly_data(): | ||
| # GH 63388 | ||
| arr = array([Interval(0, 1), Interval(1, 2)]) | ||
| arr._left.flags.writeable = False | ||
| arr._right.flags.writeable = False | ||
| ser = Series(IntervalIndex(arr)) | ||
| assert not np.shares_memory(arr._left, get_array(ser)._left) | ||
| ser.iloc[0] = Interval(5, 6) | ||
| expected = Series([Interval(5, 6), Interval(1, 2)], dtype="interval[int64, right]") | ||
| tm.assert_series_equal(ser, expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you remove this note (I'll probably be adding something related to this in the copy on write guide)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it is probably still worth mentioning it here explicitly as well? But I think we can simplify this to just say that all the Index constructors now copy array input by default, to be consistent with Series
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK sure sounds good. @zacharym-collins could you apply that suggestion. Also could you clarify that only numpy arrays and pandas ExtensionArrays are copied by default?