Source code for arkouda.pandas.extension._arkouda_array

from __future__ import annotations

from types import NotImplementedType
from typing import TYPE_CHECKING, Any, Callable, Sequence, TypeVar, Union, overload
from typing import cast as type_cast

import numpy as np
import pandas as pd

from numpy import ndarray
from numpy.typing import NDArray
from pandas.api.extensions import ExtensionArray
from pandas.core.dtypes.dtypes import ExtensionDtype

from arkouda.numpy.dtypes import dtype as ak_dtype

from ._arkouda_extension_array import ArkoudaExtensionArray
from ._dtypes import (
    ArkoudaBigintDtype,
    ArkoudaBoolDtype,
    ArkoudaFloat64Dtype,
    ArkoudaInt64Dtype,
    ArkoudaUint8Dtype,
    ArkoudaUint64Dtype,
)


if TYPE_CHECKING:
    from arkouda.numpy.pdarrayclass import pdarray
else:
    pdarray = TypeVar("pdarray")

__all__ = ["ArkoudaArray"]



[docs]
class ArkoudaArray(ArkoudaExtensionArray, ExtensionArray):
    """
    Arkouda-backed numeric/bool pandas ExtensionArray.

    Wraps or converts supported inputs into an Arkouda ``pdarray`` to serve as the
    backing store. Ensures the underlying array is 1-D and lives on the Arkouda server.

    Parameters
    ----------
    data : pdarray | ndarray | Sequence[Any] | ArkoudaArray
        Input to wrap or convert.
        - If an Arkouda ``pdarray``, it is used directly unless ``dtype`` is given
          or ``copy=True``, in which case a new array is created via ``ak.array``.
        - If a NumPy array, it is transferred to Arkouda via ``ak.array``.
        - If a Python sequence, it is converted to NumPy then to Arkouda.
        - If another ``ArkoudaArray``, its underlying ``pdarray`` is reused.
    dtype : Any, optional
        Desired dtype to cast to (NumPy dtype or Arkouda dtype string). If omitted,
        dtype is inferred from ``data``.
    copy : bool
        If True, attempt to copy the underlying data when converting/wrapping.
        Default is False.

    Raises
    ------
    TypeError
        If ``data`` cannot be interpreted as an Arkouda array-like object.
    ValueError
        If the resulting array is not one-dimensional.

    Attributes
    ----------
    default_fill_value : int
        Sentinel used when filling missing values (default: -1).

    Examples
    --------
    >>> import arkouda as ak
    >>> from arkouda.pandas.extension import ArkoudaArray
    >>> ArkoudaArray(ak.arange(5))
    ArkoudaArray([0 1 2 3 4])
    >>> ArkoudaArray([10, 20, 30])
    ArkoudaArray([10 20 30])
    """

    default_fill_value: int = -1

    def __init__(
        self,
        data: pdarray | ndarray | Sequence[Any] | ArkoudaArray,
        dtype: Any = None,
        copy: bool = False,
    ):
        from arkouda.numpy.pdarrayclass import pdarray
        from arkouda.numpy.pdarraycreation import array as ak_array

        if isinstance(data, ArkoudaArray):
            data = data._data
        elif isinstance(data, (list, tuple)):
            data = type_cast(pdarray, ak_array(np.asarray(data), dtype=dtype))
        elif isinstance(data, np.ndarray):
            data = type_cast(pdarray, ak_array(data, dtype=dtype, copy=copy))
        elif not isinstance(data, pdarray):
            raise TypeError(
                f"Expected arkouda.pdarray, ndarray, or ArkoudaArray, got {type(data).__name__}"
            )
        elif dtype is not None or copy:
            data = type_cast(pdarray, ak_array(data, dtype=dtype, copy=copy))

        if getattr(data, "ndim", 1) != 1:
            raise ValueError(
                f"ArkoudaArray must be 1-dimensional, got shape {getattr(data, 'shape', None)}"
            )

        self._data = data

    @classmethod
    def _from_sequence(cls, scalars, dtype=None, copy=False):
        from arkouda.numpy.numeric import cast as ak_cast
        from arkouda.numpy.pdarrayclass import pdarray
        from arkouda.numpy.pdarraycreation import array as ak_array
        from arkouda.pandas.categorical import Categorical

        from ._dtypes import ArkoudaBigintDtype

        if (
            dtype is not None
            and (
                getattr(dtype, "name", None) in {"bigint", "ak.bigint"}
                or str(dtype) in {"bigint", "ak.bigint"}
            )
            or dtype is ArkoudaBigintDtype
            or isinstance(dtype, ArkoudaBigintDtype)
        ):
            dtype = "bigint"

        if dtype is not None and hasattr(dtype, "numpy_dtype"):
            dtype = dtype.numpy_dtype

        if isinstance(scalars, Categorical):
            codes = scalars.codes

            # Some implementations might return an ArkoudaArray here
            if isinstance(codes, ArkoudaArray):
                codes = codes._data

            if not isinstance(codes, pdarray):
                raise TypeError(f"Categorical.codes expected pdarray, got {type(codes).__name__}")

            if dtype is not None:
                codes = ak_cast(codes, dtype)

            return cls(codes)

        return cls(ak_array(scalars, dtype=dtype, copy=copy))

    def __getitem__(self, key: Any) -> Any:
        """
        Retrieve one or more values using a pandas/NumPy-style indexer.

        Parameters
        ----------
        key : Any
            A valid indexer for 1D array-like data. This may be:
            - A scalar integer position (e.g. ``1``)
            - A Python ``slice`` (e.g. ``1:3``)
            - A list-like of integer positions
            - A boolean mask (NumPy array, pandas Series, or Arkouda ``pdarray``)
            - A NumPy array, pandas Index/Series, or Arkouda ``pdarray``/``Strings``.

        Returns
        -------
        Any
            A scalar value for scalar indexers, or an ``ArkoudaArray`` for sequence-like
            indexers.

        Raises
        ------
        TypeError
            If ``key`` is not a supported indexer type, or if a NumPy array or
            list-like indexer has an unsupported dtype.
        NotImplementedError
            If a list-like indexer contains mixed element dtypes (e.g. a mixture
            of booleans and integers), which is not supported.

        Examples
        --------
        >>> import arkouda as ak
        >>> from arkouda.pandas.extension import ArkoudaArray
        >>> data = ak.arange(5)
        >>> arr = ArkoudaArray(data)

        Scalar integer index returns a Python scalar:

        >>> arr[1]
        np.int64(1)

        Slicing returns another ArkoudaArray:

        >>> arr[1:4]
        ArkoudaArray([1 2 3])

        List-like integer positions:

        >>> arr[[0, 2, 4]]
        ArkoudaArray([0 2 4])

        Boolean mask (NumPy array):

        >>> import numpy as np
        >>> mask = np.array([True, False, True, False, True])
        >>> arr[mask]
        ArkoudaArray([0 2 4])
        """
        from arkouda.numpy.pdarrayclass import pdarray
        from arkouda.numpy.pdarraycreation import array as ak_array

        # Normalize NumPy ndarray indexers
        if isinstance(key, np.ndarray):
            if key.dtype == bool or key.dtype == np.bool_:
                key = ak_array(key, dtype=bool)
            elif np.issubdtype(key.dtype, np.integer):
                key = ak_array(key, dtype="int64")
            elif np.issubdtype(key.dtype, np.unsignedinteger):
                key = ak_array(key, dtype="uint64")
            else:
                raise TypeError(f"Unsupported NumPy index type {key.dtype}")

        # Normalize Python lists
        elif isinstance(key, list):
            if len(key) == 0:
                # Empty selection -> empty ArkoudaArray of same dtype
                empty = ak_array([], dtype=self._data.dtype)
                return self.__class__(empty)

            first = key[0]
            first_dtype = ak_dtype(first)
            for item in key:
                item_dtype = ak_dtype(item)
                if first_dtype != item_dtype:
                    raise NotImplementedError(
                        f"Mixed dtypes are not supported: {item_dtype} vs {first_dtype}"
                    )

            if isinstance(first, (bool, np.bool_)):
                key = ak_array(np.array(key, dtype=bool))
            elif isinstance(first, (int, np.integer)):
                key = ak_array(np.array(key, dtype=np.int64))
            else:
                raise TypeError(f"Unsupported list index type: {type(first)}")

        # Perform the indexing operation
        result = self._data[key]

        # Scalar key → return Python scalar
        if np.isscalar(key):
            # If server returned a pdarray of length 1, extract scalar
            if isinstance(result, pdarray) and result.size == 1:
                return result[0]
            return result

        # All other cases → wrap result in same class
        return self.__class__(result)

    def __setitem__(self, key: Any, value: Any) -> None:
        """
        Assign one or more values to the underlying Arkouda array in-place.

        Parameters
        ----------
        key : Any
            A positional indexer selecting the locations to modify. Supported forms include:

            - Scalar integer position (e.g. ``arr[3] = ...``)
            - Slice (e.g. ``arr[1:4] = ...``)
            - Boolean mask (NumPy ``ndarray`` of bools, or Python ``list`` of bools)
            - Integer indexer (NumPy ``ndarray`` of integers, Python ``list`` of ints)
            - Arkouda ``pdarray`` indexer (integer index array or boolean mask)

            For Python ``list`` indexers, all elements must be of a single supported type:
            all-bool or all-int. Mixed bool/int lists are rejected. Boolean-mask lists
            must have length equal to ``len(self)``.

        value : Any
            The value(s) to assign.

            - If a scalar (NumPy scalar or Python scalar), it is broadcast to all selected
              positions.
            - If an ``ArkoudaArray`` or Arkouda ``pdarray``, it is assigned directly.
            - Otherwise, array-like inputs (e.g. Python lists, NumPy arrays) are converted
              to an Arkouda ``pdarray`` and must be aligned with ``key``.

        Raises
        ------
        TypeError
            If a Python list indexer contains unsupported element types.
        NotImplementedError
            If a Python list indexer mixes boolean and integer elements.
        IndexError
            If a Python list boolean mask has length different from ``len(self)``.

        Notes
        -----
        This operation mutates the underlying server-side array in-place.

        Empty indexers (e.g. an empty Python list, or an empty NumPy integer indexer
        after normalization) are treated as a no-op.

        Examples
        --------
        Basic scalar assignment by position:

        >>> import arkouda as ak
        >>> import numpy as np
        >>> from arkouda.pandas.extension import ArkoudaArray
        >>> arr = ArkoudaArray(ak.arange(5))
        >>> arr[0] = 42
        >>> arr
        ArkoudaArray([42 1 2 3 4])

        Assigning with a Python list of integer positions:

        >>> arr = ArkoudaArray(ak.arange(5))
        >>> arr[[1, 3]] = 99
        >>> arr
        ArkoudaArray([0 99 2 99 4])

        Assigning with a NumPy boolean mask:

        >>> arr = ArkoudaArray(ak.arange(5))
        >>> mask = arr.to_ndarray() % 2 == 0
        >>> arr[mask] = -1
        >>> arr
        ArkoudaArray([-1 1 -1 3 -1])

        Assigning with a NumPy integer indexer:

        >>> arr = ArkoudaArray(ak.arange(5))
        >>> idx = np.array([1, 3], dtype=np.int64)
        >>> arr[idx] = 7
        >>> arr
        ArkoudaArray([0 7 2 7 4])

        Assigning from another ArkoudaArray:

        >>> arr = ArkoudaArray(ak.arange(5))
        >>> other = ArkoudaArray(ak.arange(10, 15))
        >>> idx = [1, 3, 4]
        >>> arr[idx] = other[idx]
        >>> arr
        ArkoudaArray([0 11 2 13 14])

        Python list boolean masks must match the array length:

        >>> arr = ArkoudaArray(ak.arange(5))
        >>> arr[[True, False, True]] = 0
        Traceback (most recent call last):
        ...
        IndexError: Boolean indexer has wrong length: 3 instead of 5
        """
        from arkouda.numpy.pdarrayclass import pdarray
        from arkouda.numpy.pdarraycreation import array as ak_array

        if isinstance(key, list):
            if len(key) == 0:
                return  # empty list => noop

            # validate element types + detect mixed
            has_bool = False
            has_int = False
            for k in key:
                if isinstance(k, (bool, np.bool_)):
                    has_bool = True
                elif isinstance(k, (int, np.integer)) and not isinstance(k, (bool, np.bool_)):
                    has_int = True
                else:
                    raise TypeError(
                        "Only lists of ints or bools are supported for __setitem__ indexers."
                    )

                if has_bool and has_int:
                    raise NotImplementedError("Mixed index list dtypes (bool + int) are not supported.")

            if has_bool:
                # boolean mask must match array length
                if len(key) != len(self):
                    raise IndexError(
                        f"Boolean indexer has wrong length: {len(key)} instead of {len(self)}"
                    )
                key = np.array(key, dtype=bool)
            else:
                key = np.array(key, dtype=np.int64)

        # Normalize NumPy / Python indexers into Arkouda pdarrays where needed
        if isinstance(key, np.ndarray):
            # NumPy bool mask or integer indexer
            if key.dtype == bool or key.dtype == np.bool_ or np.issubdtype(key.dtype, np.integer):
                key = ak_array(key)
        elif isinstance(key, list):
            # Python list of bools or ints - convert to NumPy then to pdarray
            if key and isinstance(key[0], (bool, np.bool_)):
                key = ak_array(np.array(key, dtype=bool))
            elif key and isinstance(key[0], (int, np.integer)):
                key = ak_array(np.array(key, dtype=np.int64))

        if _is_empty_indexer(key):
            # Setting nothing is a no-op, consistent with numpy/pandas
            return

        # Normalize the value into something the underlying pdarray understands
        if isinstance(value, ArkoudaArray):
            value = value._data
        elif isinstance(value, pdarray):
            # already an Arkouda pdarray; nothing to do
            pass
        elif np.isscalar(value):
            # Fast path for scalar assignment

            self._data[key] = value
            return
        else:
            # Convert generic array-likes (Python lists, NumPy arrays, etc.)
            # into Arkouda pdarrays.
            value = ak_array(value)

        self._data[key] = value

    # -------------------------------------------------------------------------
    # pandas comparison protocol hook
    # -------------------------------------------------------------------------

    def _cmp_method(
        self,
        other: Any,
        op: Callable[[Any, Any], Any],
    ) -> ArkoudaArray | NotImplementedType:
        """
        Perform an elementwise comparison operation.

        This method implements the pandas ``ExtensionArray`` comparison
        protocol and may be invoked internally by pandas for comparison
        operations (e.g., ``==``, ``!=``, ``<``, ``<=``, ``>``, ``>=``).

        Parameters
        ----------
        other : Any
            The right-hand operand. Supported inputs include another
            ``ArkoudaArray``, an Arkouda ``pdarray``, a NumPy ``ndarray``,
            a Python sequence (list/tuple), or a scalar value. Unsupported
            types result in ``NotImplemented``.

        op : Callable[[Any, Any], Any]
            A binary operator implementing the comparison (for example
            functions from the ``operator`` module such as ``operator.eq``
            or ``operator.lt``).

        Returns
        -------
        ArkoudaArray | NotImplementedType
            A boolean ``ArkoudaArray`` containing the elementwise comparison
            result, or ``NotImplemented`` if the operation cannot be performed.

        Notes
        -----
        Length compatibility is enforced for elementwise comparisons.
        Scalar operands are broadcast. Comparison results are always boolean.
        """
        result = self._binary_op(other, lambda a, b: op(a, b))
        if result is NotImplemented:
            return NotImplemented
        return result

    def _coerce_other_for_binop(self, other: Any) -> tuple[Any, str]:
        """
        Normalize ``other`` for binary operations.

        Parameters
        ----------
        other : Any
            The right-hand operand to normalize. Supported inputs include
            ``ArkoudaArray``, Arkouda ``pdarray``, NumPy ``ndarray``, Python
            sequences (list/tuple), and scalars.

        Returns
        -------
        tuple[Any, str]
            A pair ``(other_norm, kind)`` where:

            - ``other_norm`` is the normalized operand (a scalar or an Arkouda ``pdarray``),
              or ``None`` when unsupported.
            - ``kind`` is one of ``"scalar"``, ``"pdarray"``, or ``"notimpl"``.
        """
        from arkouda.numpy.pdarrayclass import pdarray
        from arkouda.numpy.pdarraycreation import array as ak_array

        if isinstance(other, ArkoudaArray):
            return other._data, "pdarray"

        if isinstance(other, pdarray):
            return other, "pdarray"

        if np.isscalar(other):
            return other, "scalar"

        if isinstance(other, (list, tuple, np.ndarray)):
            return ak_array(other), "pdarray"

        return None, "notimpl"

    def _check_compatible_lengths(self, other_pdarray) -> None:
        """
        Enforce elementwise length compatibility.
        Allow scalar-broadcast pdarray of size 1.
        """
        if getattr(other_pdarray, "size", None) not in (1, len(self)):
            raise ValueError("Lengths must match for elementwise operation")

    def _binary_op(self, other: Any, op, *, require_bool: bool = False):
        """
        Core binary operator for self <op> other.
        `op` should be a callable accepting (lhs, rhs) returning a pdarray/scalar.
        """
        other_norm, kind = self._coerce_other_for_binop(other)
        if kind == "notimpl":
            return NotImplemented

        if require_bool and self._data.dtype != "bool":
            return NotImplemented

        if kind == "pdarray":
            if require_bool and getattr(other_norm, "dtype", None) != "bool":
                return NotImplemented
            # elementwise length check unless scalar-broadcast pdarray
            self._check_compatible_lengths(other_norm)
            return type(self)(op(self._data, other_norm))

        # scalar
        if require_bool and not isinstance(other_norm, (bool, np.bool_)):
            return NotImplemented
        return type(self)(op(self._data, other_norm))

    def _rbinary_op(self, other: Any, op, *, require_bool: bool = False):
        """Core binary operator for other <op> self (reverse op)."""
        other_norm, kind = self._coerce_other_for_binop(other)
        if kind == "notimpl":
            return NotImplemented

        if require_bool and self._data.dtype != "bool":
            return NotImplemented

        if kind == "pdarray":
            if require_bool and getattr(other_norm, "dtype", None) != "bool":
                return NotImplemented
            self._check_compatible_lengths(other_norm)
            return type(self)(op(other_norm, self._data))

        # scalar
        if require_bool and not isinstance(other_norm, (bool, np.bool_)):
            return NotImplemented
        return type(self)(op(other_norm, self._data))

    def _unary_op(self, op):
        """Core unary operator, returning ArkoudaArray or NotImplemented."""
        try:
            return type(self)(op(self._data))
        except Exception:
            return NotImplemented

    # -------------------------------------------------------------------------
    # Arithmetic dunders
    # -------------------------------------------------------------------------
    def __add__(self, other: Any):
        return self._binary_op(other, lambda a, b: a + b)

    def __radd__(self, other: Any):
        return self._rbinary_op(other, lambda a, b: a + b)

    def __sub__(self, other: Any):
        return self._binary_op(other, lambda a, b: a - b)

    def __rsub__(self, other: Any):
        return self._rbinary_op(other, lambda a, b: a - b)

    def __mul__(self, other: Any):
        return self._binary_op(other, lambda a, b: a * b)

    def __rmul__(self, other: Any):
        return self._rbinary_op(other, lambda a, b: a * b)

    def __truediv__(self, other: Any):
        return self._binary_op(other, lambda a, b: a / b)

    def __rtruediv__(self, other: Any):
        return self._rbinary_op(other, lambda a, b: a / b)

    def __floordiv__(self, other: Any):
        return self._binary_op(other, lambda a, b: a // b)

    def __rfloordiv__(self, other: Any):
        return self._rbinary_op(other, lambda a, b: a // b)

    def __mod__(self, other: Any):
        return self._binary_op(other, lambda a, b: a % b)

    def __rmod__(self, other: Any):
        return self._rbinary_op(other, lambda a, b: a % b)

    def __pow__(self, other: Any):
        return self._binary_op(other, lambda a, b: a**b)

    def __rpow__(self, other: Any):
        return self._rbinary_op(other, lambda a, b: a**b)

    # Unary arithmetic
    def __neg__(self):
        return self._unary_op(lambda a: -a)

    def __pos__(self):
        return self._unary_op(lambda a: +a)

    def __abs__(self):
        return self._unary_op(lambda a: abs(a))

    # -------------------------------------------------------------------------
    # Comparison dunders (elementwise, return ArkoudaArray[bool])
    # -------------------------------------------------------------------------

    def __eq__(self, other: Any):
        return self._binary_op(other, lambda a, b: a == b)

    def __ne__(self, other: Any):
        return self._binary_op(other, lambda a, b: a != b)

    def __lt__(self, other: Any):
        return self._binary_op(other, lambda a, b: a < b)

    def __le__(self, other: Any):
        return self._binary_op(other, lambda a, b: a <= b)

    def __gt__(self, other: Any):
        return self._binary_op(other, lambda a, b: a > b)

    def __ge__(self, other: Any):
        return self._binary_op(other, lambda a, b: a >= b)

    # -------------------------------------------------------------------------
    # Bitwise / logical dunders (only for bool dtype)
    # -------------------------------------------------------------------------
    def __and__(self, other: Any):
        return self._binary_op(other, lambda a, b: a & b, require_bool=True)

    def __rand__(self, other: Any):
        return self._rbinary_op(other, lambda a, b: a & b, require_bool=True)

    def __or__(self, other: Any):
        return self._binary_op(other, lambda a, b: a | b, require_bool=True)

    def __ror__(self, other: Any):
        return self._rbinary_op(other, lambda a, b: a | b, require_bool=True)

    def __xor__(self, other: Any):
        return self._binary_op(other, lambda a, b: a ^ b, require_bool=True)

    def __rxor__(self, other: Any):
        return self._rbinary_op(other, lambda a, b: a ^ b, require_bool=True)

    def __invert__(self):
        # ~ only makes sense for boolean arrays here (or integer bitwise if you later want it)
        if self._data.dtype != "bool":
            return NotImplemented
        return type(self)(~self._data)

    # docstr-coverage:excused `typing-only overload stub`
    @overload
    def astype(self, dtype: np.dtype[Any], copy: bool = True) -> NDArray[Any]: ...

    # docstr-coverage:excused `typing-only overload stub`
    @overload
    def astype(self, dtype: ExtensionDtype, copy: bool = True) -> ExtensionArray: ...

    # docstr-coverage:excused `typing-only overload stub`
    @overload
    def astype(self, dtype: Any, copy: bool = True) -> Union[ExtensionArray, NDArray[Any]]: ...


[docs]
    def astype(
        self,
        dtype: Any,
        copy: bool = True,
    ) -> Union[ExtensionArray, NDArray[Any]]:
        """
        Cast the array to a specified dtype.

        Casting rules:

        * If ``dtype`` requests ``object``, returns a NumPy ``NDArray[Any]`` of
          dtype ``object`` containing the array values.
        * Otherwise, the target dtype is normalized using Arkouda's dtype
          resolution rules.
        * If the normalized dtype matches the current dtype and ``copy=False``,
          returns ``self``.
        * In all other cases, casts the underlying Arkouda array to the target
          dtype and returns an Arkouda-backed ``ArkoudaExtensionArray``.

        Parameters
        ----------
        dtype : Any
            Target dtype. May be a NumPy dtype, pandas dtype, Arkouda dtype,
            or any dtype-like object accepted by Arkouda.
        copy : bool
            Whether to force a copy when the target dtype matches the current dtype.
            Default is True.

        Returns
        -------
        Union[ExtensionArray, NDArray[Any]]
            The cast result. Returns a NumPy array only when casting to ``object``;
            otherwise returns an Arkouda-backed ExtensionArray.

        Examples
        --------
        Basic numeric casting returns an Arkouda-backed array:

        >>> import arkouda as ak
        >>> from arkouda.pandas.extension import ArkoudaArray
        >>> a = ArkoudaArray(ak.array([1, 2, 3], dtype="int64"))
        >>> a.astype("float64").to_ndarray()
        array([1., 2., 3.])

        Casting to the same dtype with ``copy=False`` returns the original object:

        >>> b = a.astype("int64", copy=False)
        >>> b is a
        True

        Forcing a copy when the dtype is unchanged returns a new array:

        >>> c = a.astype("int64", copy=True)
        >>> c is a
        False
        >>> c.to_ndarray()
        array([1, 2, 3])

        Casting to ``object`` materializes the data to a NumPy array:

        >>> a.astype(object)
        array([1, 2, 3], dtype=object)

        NumPy and pandas dtype objects are also accepted:

        >>> import numpy as np
        >>> a.astype(np.dtype("bool")).to_ndarray()
        array([ True,  True,  True])
        """
        from arkouda.numpy.dtypes import dtype as ak_dtype

        # --- 1) ExtensionDtype branch (satisfies overload #2) ---
        if isinstance(dtype, ExtensionDtype):
            # pandas extension dtypes typically have .numpy_dtype
            if hasattr(dtype, "numpy_dtype"):
                dtype = dtype.numpy_dtype

            if copy is False and self.dtype.numpy_dtype == dtype:
                return self

            casted = self._data.astype(dtype)
            return type_cast(ExtensionArray, ArkoudaExtensionArray._from_sequence(casted))

        # --- 2) object -> numpy (satisfies overload #1 / general) ---

        if dtype in (object, np.object_, "object", np.dtype("O")):
            return self.to_ndarray().astype(object, copy=False)

        dtype = ak_dtype(dtype)

        if copy is False and self.dtype.numpy_dtype == dtype:
            return self

        casted = self._data.astype(dtype)
        return ArkoudaExtensionArray._from_sequence(casted)


    @property
    def dtype(self):
        if self._data.dtype == "int64":
            return ArkoudaInt64Dtype()
        elif self._data.dtype == "float64":
            return ArkoudaFloat64Dtype()
        elif self._data.dtype == "bool":
            return ArkoudaBoolDtype()
        elif self._data.dtype == "uint64":
            return ArkoudaUint64Dtype()
        elif self._data.dtype == "uint8":
            return ArkoudaUint8Dtype()
        elif self._data.dtype == "bigint":
            return ArkoudaBigintDtype()
        else:
            raise TypeError(f"Unsupported dtype {self._data.dtype}")

    @property
    def nbytes(self):
        return self._data.nbytes


[docs]
    def equals(self, other):
        if not isinstance(other, ArkoudaArray):
            return False
        return self._data.equals(other._data)


    def _reduce(self, name: str, skipna: bool = True, **kwargs: Any) -> Any:
        """
        Reduce the array to a single value (or a small array result) using a named reduction.

        This implements the pandas ExtensionArray reduction protocol and is called by pandas
        for operations like ``Series.sum()`` and ``Series.min()``.

        Parameters
        ----------
        name : str
            Name of the reduction to perform (e.g., ``"sum"``, ``"min"``, ``"std"``).
        skipna : bool
            Whether to ignore missing values. Accepted for pandas compatibility.
            Default is True.

            NOTE
            ----
            ``skipna`` semantics are **not fully supported** for Arkouda-backed arrays.
            Except where explicitly implemented (e.g., ``count`` for float64),
            reductions are delegated directly to Arkouda operations, which typically
            propagate ``NaN`` values rather than skipping them.

            As a result, reductions such as ``sum``, ``mean``, ``min``, and ``max`` on
            float arrays may return ``NaN`` even when ``skipna=True``.
        **kwargs : Any
            Additional keyword arguments forwarded by pandas. Currently unused unless
            explicitly supported.

        Returns
        -------
        Any
            A scalar result for scalar reductions (e.g., ``sum``, ``min``, ``mean``), or an
            ``ArkoudaArray`` for array-returning reductions such as ``mode``.

        Raises
        ------
        TypeError
            If ``name`` is not a recognized reduction.
        """
        from arkouda.numpy import isnan

        ddof = int(kwargs.get("ddof", 1))

        op = name.lower()
        data = self._data

        def _count_nonmissing() -> int:
            # Minimal NA handling: treat NaN as missing only for float64.
            if data.dtype == "float64":
                return int((~isnan(data)).sum())
            return int(data.size)

        def _first() -> Any:
            if data.size == 0:
                # Throw an error for now; pandas often raises or returns NA depending on context.
                raise ValueError("Reduction 'first' requires at least one element")
            return data[0]

        def _var() -> Any:
            return data.var(ddof=ddof)

        def _std() -> Any:
            return data.std(ddof=ddof)

        # All listed reductions are guaranteed to exist on pdarray for all dtypes
        scalar_fns: dict[str, Callable[[], Any]] = {
            "sum": data.sum,
            "count": _count_nonmissing,
            "prod": data.prod,
            "min": data.min,
            "max": data.max,
            "mean": data.mean,
            "var": _var,
            "std": _std,
            "argmin": data.argmin,
            "argmax": data.argmax,
            "first": _first,
            "any": data.any,
            "or": data.any,  # "any" and "or" are the same op
            "all": data.all,
            "and": data.all,  # "all" and "and" are the same op
        }

        fn = scalar_fns.get(op)
        if fn is not None:
            return fn()
        else:
            #   op was not in the keys of scalar_fns:
            raise TypeError(f"Unknown reduction '{name}'")

    def __repr__(self):
        return f"ArkoudaArray({self._data})"

    #   TODO:  refine this.
    def _values_for_factorize(self):
        """
        Return (values, na_value) as NumPy for pandas.factorize.
        Ensure 'values' is 1-D numpy array and 'na_value' is the sentinel to use.
        """
        vals = self.to_ndarray()  # materialize to numpy
        if vals.dtype.kind in {"U", "S", "O"}:
            na = ""  # strings: empty as sentinel is OK for factorize
        elif vals.dtype.kind in {"i", "u"}:
            na = -1
        else:
            na = np.nan
        return vals, na

    @classmethod
    def _from_factorized(cls, uniques, original):
        # pandas gives us numpy uniques; preserve dtype by deferring to _from_sequence
        return cls._from_sequence(uniques)


[docs]
    def all(self, axis=0, skipna=True, **kwargs):
        """
        Return whether all elements are True.

        This is mainly to support pandas' BaseExtensionArray.equals, which
        calls `.all()` on the result of a boolean expression.
        """
        return bool(self._data.all())



[docs]
    def any(self, axis=0, skipna=True, **kwargs):
        """
        Return whether any element is True.

        Added for symmetry with `.all()` and to support potential pandas
        boolean-reduction calls.
        """
        return bool(self._data.any())



[docs]
    def isna(self) -> np.ndarray:
        """
        Return a boolean mask indicating missing values.

        This method implements the pandas ExtensionArray.isna contract
        and always returns a NumPy ndarray of dtype ``bool`` with the
        same length as the array.

        Returns
        -------
        np.ndarray
            A boolean mask where ``True`` marks elements considered missing.

        Raises
        ------
        TypeError
            If the underlying data buffer does not support missing-value
            detection or cannot produce a boolean mask.
        """
        from arkouda.numpy import isnan
        from arkouda.numpy.pdarrayclass import pdarray
        from arkouda.numpy.pdarraycreation import full
        from arkouda.numpy.segarray import SegArray
        from arkouda.pandas.categorical import Categorical

        data = self._data

        # SegArray
        if isinstance(data, SegArray):
            raise TypeError("isna is not supported for SegArray-backed ArkoudaArray")

        # Categorical
        if isinstance(data, Categorical):
            return (data.codes == -1).to_ndarray()
        # pdarray
        if isinstance(data, pdarray):
            if data.dtype in ("float64", "float32"):
                return (isnan(data)).to_ndarray()

            return (full(data.size, False, dtype=bool)).to_ndarray()

        return NotImplemented



[docs]
    def isnull(self):
        """Alias for isna()."""
        return self.isna()



[docs]
    def value_counts(self, dropna: bool = True) -> pd.Series:
        """
        Return counts of unique values as a pandas Series.

        This method computes the frequency of each distinct value in the
        underlying Arkouda array and returns the result as a pandas
        ``Series``, with the unique values as the index and their counts
        as the data.

        Parameters
        ----------
        dropna : bool
            Whether to exclude missing values. Currently, missing-value
            handling is supported only for floating-point data, where
            ``NaN`` values are treated as missing. Default is True.

        Returns
        -------
        pd.Series
            A Series containing the counts of unique values.
            The index is an ``ArkoudaArray`` of unique values, and the
            values are an ``ArkoudaArray`` of counts.

        Notes
        -----
        - Only ``dropna=True`` is supported.
        - The following pandas options are not yet implemented:
          ``normalize``, ``sort``, and ``bins``.
        - Counting is performed server-side in Arkouda; only the small
          result (unique values and counts) is materialized on the client.

        Examples
        --------
        >>> import arkouda as ak
        >>> from arkouda.pandas.extension import ArkoudaArray
        >>>
        >>> a = ArkoudaArray(ak.array([1, 2, 1, 3, 2, 1]))
        >>> a.value_counts()
        1    3
        2    2
        3    1
        dtype: int64

        Floating-point data with NaN values:

        >>> b = ArkoudaArray(ak.array([1.0, 2.0, float("nan"), 1.0]))
        >>> b.value_counts()
        1.0    2
        2.0    1
        dtype: int64
        """
        from arkouda.numpy.numeric import isnan as ak_isnan

        data = self._data

        # Handle NA only for floats (pandas-compatible)
        if dropna and data.dtype == "float64":
            mask = ~ak_isnan(data)
            data = data[mask]

        if data.size == 0:
            return pd.Series(dtype="int64")

        keys, counts = data.value_counts()

        return_index = ArkoudaArray._from_sequence(keys)
        return_values = ArkoudaArray._from_sequence(counts)

        return pd.Series(return_values, index=return_index)




def _is_empty_indexer(key) -> bool:
    from arkouda.numpy.pdarrayclass import pdarray

    # Python containers
    if isinstance(key, (list, tuple)):
        return len(key) == 0

    # NumPy arrays
    if isinstance(key, np.ndarray):
        return key.size == 0

    # Arkouda arrays
    if isinstance(key, pdarray):
        return key.size == 0

    # Pandas Index/Series often implement __len__ and are safe here,
    # but we keep it conservative (optional):
    if isinstance(key, Sequence) and not isinstance(key, (str, bytes)):
        try:
            return len(key) == 0
        except TypeError:
            return False

    return False