diff --git a/arkouda/pdarrayclass.py b/arkouda/pdarrayclass.py index 7086335903..feb74cb5e3 100644 --- a/arkouda/pdarrayclass.py +++ b/arkouda/pdarrayclass.py @@ -450,6 +450,23 @@ def max_bits(self, max_bits): generic_msg(cmd="set_max_bits", args={"array": self, "max_bits": max_bits}) self._max_bits = max_bits + def copy(self) -> pdarray: + """ + Return an array copy of the given object. + + Returns + ------- + pdarray + A deep copy of the pdarray. + """ + from arkouda.pdarraycreation import array + + ret = array(self, copy=True) + if isinstance(ret, pdarray): + return ret + else: + raise RuntimeError("Could not copy pdarray.") + def equals(self, other) -> bool_scalars: """ Whether pdarrays are the same size and all entries are equal. diff --git a/arkouda/pdarraycreation.py b/arkouda/pdarraycreation.py index 5031094ed0..c172ec949a 100644 --- a/arkouda/pdarraycreation.py +++ b/arkouda/pdarraycreation.py @@ -139,8 +139,9 @@ def from_series(series: pd.Series, dtype: Optional[Union[type, str]] = None) -> def array( - a: Union[pdarray, np.ndarray, Iterable], + a: Union[pdarray, np.ndarray, Iterable, Strings], dtype: Union[np.dtype, type, str, None] = None, + copy: bool = False, max_bits: int = -1, ) -> Union[pdarray, Strings]: """ @@ -153,6 +154,10 @@ def array( Rank-1 array of a supported dtype dtype: np.dtype, type, or str The target dtype to cast values to + copy: bool=False, optional + If True, then the array data is copied. + Note that any copy of the data is deep, which differs from numpy. + default=False also differs from numpy. max_bits: int Specifies the maximum number of bits; only used for bigint pdarrays @@ -167,6 +172,8 @@ def array( TypeError Raised if a is not a pdarray, np.ndarray, or Python Iterable such as a list, array, tuple, or deque + + Raised if a Strings is called with dtype other than ak.str_ RuntimeError Raised if a is not one-dimensional, nbytes > maxTransferBytes, a.dtype is not supported (not in DTypes), or if the product of a size and @@ -192,6 +199,7 @@ def array( to create the Strings object and the two corresponding pdarrays for string bytes and offsets, respectively. + Examples -------- >>> ak.array(np.arange(1,10)) @@ -206,9 +214,18 @@ def array( """ from arkouda.numpy import cast as akcast + if copy is False: + if isinstance(a, (Strings, pdarray)): + return a + + if isinstance(a, Strings): + if dtype and dtype != "str_": + raise TypeError(f"Cannot cast Strings to dtype {dtype} in ak.array") + return a[:] + # If a is already a pdarray, do nothing if isinstance(a, pdarray): - casted = a if dtype is None else akcast(a, dtype) + casted = a[:] if dtype is None else akcast(a, dtype) if dtype == bigint and max_bits != -1: casted.max_bits = max_bits return casted diff --git a/arkouda/strings.py b/arkouda/strings.py index 48ffa943d3..cb5bfb8dbd 100644 --- a/arkouda/strings.py +++ b/arkouda/strings.py @@ -339,6 +339,23 @@ def inferred_type(self) -> str: """ return "string" + def copy(self) -> Strings: + """ + Return an array copy of the given object. + + Returns + ------- + Strings + A deep copy of the Strings. + """ + from arkouda.pdarraycreation import array + + ret = array(self, copy=True) + if isinstance(ret, Strings): + return ret + else: + raise RuntimeError("Could not copy Strings object.") + def equals(self, other) -> bool_scalars: """ Whether Strings are the same size and all entries are equal. diff --git a/arkouda/util.py b/arkouda/util.py index e84d7dfa5f..a9a3af80ed 100644 --- a/arkouda/util.py +++ b/arkouda/util.py @@ -2,12 +2,12 @@ import builtins import json -from typing import TYPE_CHECKING, Sequence, Tuple, Union, cast +from typing import TYPE_CHECKING, Iterable, Sequence, Tuple, TypeVar, Union, cast from warnings import warn +import numpy as np from typeguard import typechecked -from arkouda.categorical import Categorical from arkouda.client import generic_msg, get_config, get_mem_used from arkouda.client_dtypes import BitVector, BitVectorizer, IPv4 from arkouda.groupbyclass import GroupBy, broadcast @@ -24,12 +24,18 @@ from arkouda.pdarraysetops import unique from arkouda.segarray import SegArray from arkouda.sorting import coargsort -from arkouda.strings import Strings from arkouda.timeclass import Datetime, Timedelta if TYPE_CHECKING: from arkouda.index import Index from arkouda.series import Series + from arkouda.strings import Strings + from arkouda.categorical import Categorical +else: + Strings = TypeVar("Strings") + Series = TypeVar("Series") + Categorical = TypeVar("Categorical") + Index = TypeVar("Index") def identity(x): @@ -172,7 +178,7 @@ def convert_if_categorical(values): """ Convert a Categorical array to Strings for display """ - + from arkouda.categorical import Categorical if isinstance(values, Categorical): values = values.categories[values.codes] return values @@ -192,6 +198,8 @@ def attach(name: str): from arkouda.index import Index, MultiIndex from arkouda.pdarrayclass import pdarray from arkouda.series import Series + from arkouda.categorical import Categorical + from arkouda.strings import Strings rep_msg = json.loads(cast(str, generic_msg(cmd="attach", args={"name": name}))) rtn_obj = None @@ -425,7 +433,7 @@ def convert_bytes(nbytes, unit="B"): def is_numeric( - arry: Union[pdarray, Strings, Categorical, "Series", "Index"] # noqa: F821 + arry: Union[pdarray, Strings, "Categorical", "Series", "Index"] # noqa: F821 ) -> builtins.bool: """ Check if the dtype of the given array is numeric. @@ -460,7 +468,7 @@ def is_numeric( return False -def is_float(arry: Union[pdarray, Strings, Categorical, "Series", "Index"]): # noqa: F821 +def is_float(arry: Union[pdarray, Strings, "Categorical", "Series", "Index"]): # noqa: F821 """ Check if the dtype of the given array is float. @@ -494,7 +502,7 @@ def is_float(arry: Union[pdarray, Strings, Categorical, "Series", "Index"]): # return False -def is_int(arry: Union[pdarray, Strings, Categorical, "Series", "Index"]): # noqa: F821 +def is_int(arry: Union[pdarray, Strings, "Categorical", "Series", "Index"]): # noqa: F821 """ Check if the dtype of the given array is int. @@ -529,9 +537,10 @@ def is_int(arry: Union[pdarray, Strings, Categorical, "Series", "Index"]): # no return False +@typechecked def map( - values: Union[pdarray, Strings, Categorical], mapping: Union[dict, "Series"] -) -> Union[pdarray, Strings]: + values: Union[pdarray, "Strings", "Categorical"], mapping: Union[dict, "Series"] +) -> Union[pdarray, "Strings"]: """ Map values of an array according to an input mapping. @@ -573,7 +582,9 @@ def map( import numpy as np from arkouda import Series, array, broadcast, full + from arkouda.categorical import Categorical from arkouda.pdarraysetops import in1d + from arkouda.strings import Strings keys = values gb = GroupBy(keys, dropna=False) @@ -623,3 +634,24 @@ def _infer_shape_from_size(size): shape = full_size ndim = 1 return shape, ndim, full_size + + +@typechecked +def copy(a: Union[pdarray, np.ndarray, Iterable, "Strings"]) -> Union[pdarray, "Strings"]: + """ + Return an array copy of the given object. + + Returns + ------- + pdarray + Array interpretation of a. + """ + from arkouda.strings import Strings + + if isinstance(a, Strings): + cpy = a[:] + return cpy + + from arkouda.pdarraycreation import array + + return array(a, copy=True) diff --git a/tests/pdarray_creation_test.py b/tests/pdarray_creation_test.py index f3878eb069..48b716b2c3 100644 --- a/tests/pdarray_creation_test.py +++ b/tests/pdarray_creation_test.py @@ -8,7 +8,9 @@ import pytest import arkouda as ak -from arkouda.testing import assert_arkouda_array_equal, assert_equivalent +from arkouda.testing import assert_arkouda_array_equal +from arkouda.testing import assert_equal as ak_assert_equal +from arkouda.testing import assert_equivalent INT_SCALARS = list(ak.dtypes.int_scalars.__args__) NUMERIC_SCALARS = list(ak.dtypes.numeric_scalars.__args__) @@ -43,6 +45,15 @@ def test_array_creation(self, dtype): assert len(pda) == fixed_size assert dtype == pda.dtype + def test_array_creation_strings(self): + fixed_size = 100 + pda = ak.array(ak.arange(fixed_size, dtype=ak.str_)) + assert isinstance(pda, ak.Strings) + assert len(pda) == fixed_size + + with pytest.raises(TypeError): + ak.array(ak.arange(fixed_size, dtype=ak.str_), dtype=ak.int64), + @pytest.mark.skip_if_max_rank_less_than(3) @pytest.mark.parametrize("size", pytest.prob_size) @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]) @@ -105,6 +116,19 @@ def test_array_creation_misc(self): with pytest.raises(TypeError): ak.array(list(list(0))) + @pytest.mark.parametrize("dtype", [ak.int64, ak.float64, ak.bool_, ak.bigint]) + def test_array_copy(self, dtype): + + a = ak.arange(100, dtype=dtype) + + b = ak.array(a, copy=True) + assert not a is b + ak_assert_equal(a, b) + + c = ak.array(a, copy=False) + assert a is c + ak_assert_equal(a, c) + @pytest.mark.skip_if_max_rank_less_than(2) def test_array_creation_transpose_bug_reproducer(self): diff --git a/tests/pdarrayclass_test.py b/tests/pdarrayclass_test.py index 33a4a20e97..9c40fe4ced 100644 --- a/tests/pdarrayclass_test.py +++ b/tests/pdarrayclass_test.py @@ -219,3 +219,21 @@ def test_reductions_match_numpy_1D_TF(self, op, axis): def test_reductions_match_numpy_3D_TF(self, op, axis): pda = ak.array([True, True, False, True, True, True, True, True]).reshape((2, 2, 2)) self.assert_reduction_ops_match(op, pda, axis=axis) + + @pytest.mark.parametrize("dtype", DTYPES) + def test_copy(self, dtype): + fixed_size = 100 + a = ak.arange(fixed_size, dtype=dtype) + a_cpy = a.copy() + + assert not a_cpy is a + ak_assert_equal(a, a_cpy) + + @pytest.mark.skip_if_max_rank_less_than(3) + @pytest.mark.parametrize("dtype", DTYPES) + def test_copy(self, dtype): + a = ak.arange(1000, dtype=dtype).reshape((10, 10, 10)) + a_cpy = a.copy() + + assert not a_cpy is a + ak_assert_equal(a, a_cpy) diff --git a/tests/string_test.py b/tests/string_test.py index 05bd1c151e..120b7af555 100644 --- a/tests/string_test.py +++ b/tests/string_test.py @@ -918,3 +918,11 @@ def test_flatten(self, size): strings = self.get_strings(size, base_words) ak_assert_equal(strings.flatten(), strings) + + def test_copy(self): + fixed_size = 100 + a = ak.arange(fixed_size, dtype=ak.str_) + a_cpy = a.copy() + + assert not a_cpy is a + ak_assert_equal(a, a_cpy) diff --git a/tests/util_test.py b/tests/util_test.py index 5482bb7a67..a255be921a 100644 --- a/tests/util_test.py +++ b/tests/util_test.py @@ -2,6 +2,7 @@ import arkouda as ak from arkouda.util import is_float, is_int, is_numeric, map +import pytest class TestUtil: @@ -119,3 +120,13 @@ def test_map(self): result = map(d, {"1": 7.0}) assert np.allclose(result.to_list(), [7.0, 7.0, np.nan, np.nan, np.nan], equal_nan=True) + + @pytest.mark.parametrize("dtype", [ak.int64, ak.float64, ak.bool_, ak.bigint, ak.str_]) + def test_copy(self, dtype): + a = ak.arange(10, dtype=dtype) + b = ak.util.copy(a) + + from arkouda import assert_equal as ak_assert_equal + + assert not a is b + ak_assert_equal(a, b)