Source code for arkouda.pandas.accessor

"""
Accessor utilities for Arkouda Series-like objects.

This module defines infrastructure for namespace-based accessors (e.g., `.str`, `.dt`)
on Arkouda Series, mimicking the behavior of pandas-style accessors. It supports
extension methods for string and datetime-like values, enabling operations to be
performed in a clean, grouped syntax.

Components
----------
:class:`.CachedAccessor`
    Descriptor that lazily initializes and caches accessor objects, such as `.str` or `.dt`.

:class:`.DatetimeAccessor`
    Implements datetime-like operations (e.g., floor, ceil, round) via the `.dt` accessor.

:class:`.StringAccessor`
    Implements string-like operations (e.g., contains, startswith, endswith) via the `.str` accessor.

:class:`.Properties`
    Base class that provides `_make_op` for dynamically attaching operations to accessors.

:func:`.date_operators`
    Class decorator that adds datetime operations to `DatetimeAccessor`.

:func:`.string_operators`
    Class decorator that adds string operations to `StringAccessor`.

Usage
-----
>>> import arkouda as ak
>>> from arkouda import Series
>>> s = Series(["apple", "banana", "apricot"])
>>> s.str.startswith("a")
0     True
1    False
2     True
dtype: bool

>>> from arkouda import Datetime
>>> t = Series(Datetime(ak.array([1_000_000_000_000])))
>>> t.dt.floor("D")
0   1970-01-01
dtype: datetime64[ns]

Notes
-----
These accessors are automatically attached to compatible Series objects.
Users should not instantiate accessors directly — use `.str` and `.dt` instead.

"""

from typing import TYPE_CHECKING, TypeVar

from arkouda.numpy.timeclass import Datetime
from arkouda.pandas.categorical import Categorical


if TYPE_CHECKING:
    from arkouda.numpy.strings import Strings
else:
    Strings = TypeVar("Strings")

__all__ = [
    "CachedAccessor",
    "DatetimeAccessor",
    "Properties",
    "StringAccessor",
    "date_operators",
    "string_operators",
]


[docs] class CachedAccessor: """ Descriptor for caching namespace-based accessors. This custom property-like object enables lazy initialization of accessors (e.g., `.str`, `.dt`) on Series-like objects, similar to pandas-style extension accessors. Parameters ---------- name : str The name of the namespace to be accessed (e.g., ``df.foo``). accessor : type A class implementing the accessor logic. Notes ----- The `accessor` class's ``__init__`` method must accept a single positional argument, which should be one of ``Series``, ``DataFrame``, or ``Index``. """ def __init__(self, name: str, accessor) -> None: self._name = name self._accessor = accessor def __get__(self, obj, cls): """ Retrieve and cache the accessor instance for the calling object. Parameters ---------- obj : object The instance that the accessor is being called on. cls : type The class of the object. Returns ------- object The accessor instance attached to the object. """ if obj is None: # we're accessing the attribute of the class, i.e., Dataset.geo return self._accessor accessor_obj = self._accessor(obj) # Replace the property with the accessor object. Inspired by: # https://www.pydanny.com/cached-property.html # We need to use object.__setattr__ because we overwrite __setattr__ on # NDFrame object.__setattr__(obj, self._name, accessor_obj) return accessor_obj
[docs] def string_operators(cls): """ Add common string operation methods to a StringAccessor class. This class decorator dynamically attaches string operations (`contains`, `startswith`, `endswith`) to the given class using the `_make_op` helper. Parameters ---------- cls : type The accessor class to decorate. Returns ------- type The accessor class with string methods added. Notes ----- Used internally to implement the `.str` accessor API. """ for name in ["contains", "startswith", "endswith"]: setattr(cls, name, cls._make_op(name)) return cls
[docs] def date_operators(cls): """ Add common datetime operation methods to a DatetimeAccessor class. This class decorator dynamically attaches datetime operations (`floor`, `ceil`, `round`) to the given class using the `_make_op` helper. Parameters ---------- cls : type The accessor class to decorate. Returns ------- type The accessor class with datetime methods added. Notes ----- Used internally to implement the `.dt` accessor API. """ for name in ["floor", "ceil", "round"]: setattr(cls, name, cls._make_op(name)) return cls
[docs] class Properties: """ Base class for accessor implementations in Arkouda. Provides the `_make_op` class method to dynamically generate accessor methods that wrap underlying `Strings` or `Datetime` operations and return new Series. Notes ----- This class is subclassed by `StringAccessor` and `DatetimeAccessor`, and is not intended to be used directly. Examples -------- Subclasses should define `_make_op("operation_name")`, which will generate a method that applies `series.values.operation_name(...)` and returns a new Series. """ @classmethod def _make_op(cls, name): def accessop(self, *args, **kwargs): from . import Series results = getattr(self.series.values, name)(*args, **kwargs) return Series(data=results, index=self.series.index) return accessop
[docs] @date_operators class DatetimeAccessor(Properties): r""" Accessor for datetime-like operations on Arkouda Series. Provides datetime methods such as `.floor()`, `.ceil()`, and `.round()`, mirroring the `.dt` accessor in pandas. This accessor is automatically attached to Series objects that wrap `arkouda.Datetime` values. It should not be instantiated directly. Parameters ---------- series : arkouda.pandas.Series The Series object containing `Datetime` values. Raises ------ AttributeError If the underlying Series values are not of type `arkouda.Datetime`. Examples -------- >>> import arkouda as ak >>> from arkouda import Datetime, Series >>> s = Series(Datetime(ak.array([1_000_000_000_000]))) >>> s.dt.floor("D") 0 1970-01-01 dtype: datetime64[ns] """ def __init__(self, series): data = series.values if not isinstance(data, Datetime): raise AttributeError("Can only use .dt accessor with datetimelike values") self.series = series
[docs] @string_operators class StringAccessor(Properties): """ Accessor for string operations on Arkouda Series. Provides string-like methods such as `.contains()`, `.startswith()`, and `.endswith()` via the `.str` accessor, similar to pandas. This accessor is automatically attached to Series objects that wrap `arkouda.Strings` or `arkouda.Categorical` values. It should not be instantiated directly. Parameters ---------- series : arkouda.pandas.Series The Series object containing `Strings` or `Categorical` values. Raises ------ AttributeError If the underlying Series values are not `Strings` or `Categorical`. Examples -------- >>> import arkouda as ak >>> from arkouda import Series >>> s = Series(["apple", "banana", "apricot"]) >>> s.str.startswith("a") 0 True 1 False 2 True dtype: bool """ def __init__(self, series): from arkouda.numpy.strings import Strings data = series.values if not (isinstance(data, Categorical) or isinstance(data, Strings)): raise AttributeError("Can only use .str accessor with string like values") self.series = series