import datetime
import json
from warnings import warn
import numpy as np
from pandas import Series as pdSeries
from pandas import Timestamp as pdTimestamp
from pandas import Timedelta as pdTimedelta
from pandas import date_range as pd_date_range
from pandas import timedelta_range as pd_timedelta_range
from pandas import to_datetime, to_timedelta
from arkouda.client import generic_msg
from arkouda.numpy.dtypes import int64, int_scalars, intTypes, isSupportedInt
from arkouda.numeric import abs as akabs
from arkouda.numeric import cast
from arkouda.pdarrayclass import RegistrationError, create_pdarray, pdarray
from arkouda.pdarraycreation import from_series
_BASE_UNIT = "ns"
_unit2normunit = {
"weeks": "w",
"days": "d",
"hours": "h",
"hrs": "h",
"minutes": "m",
"t": "m",
"milliseconds": "ms",
"l": "ms",
"microseconds": "us",
"u": "us",
"nanoseconds": "ns",
"n": "ns",
}
_unit2factor = {
"w": 7 * 24 * 60 * 60 * 10**9,
"d": 24 * 60 * 60 * 10**9,
"h": 60 * 60 * 10**9,
"m": 60 * 10**9,
"s": 10**9,
"ms": 10**6,
"us": 10**3,
"ns": 1,
}
def _get_factor(unit: str) -> int:
unit = unit.lower()
if unit in _unit2factor:
return _unit2factor[unit]
else:
for key, normunit in _unit2normunit.items():
if key.startswith(unit):
return _unit2factor[normunit]
raise ValueError(
f"Argument must be one of {set(_unit2factor.keys()) | set(_unit2normunit.keys())}"
)
def _identity(x, **kwargs):
return x
class _Timescalar:
def __init__(self, scalar):
if isinstance(scalar, np.datetime64) or isinstance(scalar, datetime.datetime):
scalar = to_datetime(scalar).to_numpy()
elif isinstance(scalar, np.timedelta64) or isinstance(scalar, datetime.timedelta):
scalar = to_timedelta(scalar).to_numpy()
self.unit = np.datetime_data(scalar.dtype)[0]
self._factor = _get_factor(self.unit)
# int64 in nanoseconds
self.value = self._factor * scalar.astype("int64")
class _AbstractBaseTime(pdarray):
"""Base class for Datetime and Timedelta; not user-facing. Arkouda handles
time similar to Pandas (albeit with less functionality), in that all absolute
and relative times are represented in nanoseconds as int64 behind the scenes.
Datetime and Timedelta can be constructed from Arkouda, NumPy, or Pandas arrays;
in each case, the input values are normalized to nanoseconds on initialization,
so that all resulting operations are transparent.
"""
special_objType = "Time"
def __init__(self, pda, unit: str = _BASE_UNIT):
if isinstance(pda, Datetime) or isinstance(pda, Timedelta):
self.unit: str = pda.unit
self._factor: int = pda._factor
# Make a copy to avoid unknown symbol errors
self.values: pdarray = cast(pda.values, int64)
# Convert the input to int64 pdarray of nanoseconds
elif isinstance(pda, pdarray):
if pda.dtype not in intTypes:
raise TypeError(f"{self.__class__.__name__} array must have int64 dtype")
# Already int64 pdarray, just scale
self.unit = unit
self._factor = _get_factor(self.unit)
# This makes a copy of the input array, to leave input unchanged
self.values = cast(self._factor * pda, int64) # Mimics a datetime64[ns] array
elif hasattr(pda, "dtype"):
# Handles all pandas and numpy datetime/timedelta arrays
if pda.dtype.kind not in ("M", "m"):
# M = datetime64, m = timedelta64
raise TypeError(f"Invalid dtype: {pda.dtype.name}")
if isinstance(pda, pdSeries):
# Pandas Datetime and Timedelta
# Get units of underlying numpy datetime64 array
self.unit = np.datetime_data(pda.values.dtype)[0] # type: ignore [arg-type]
self._factor = _get_factor(self.unit)
# Create pdarray
self.values = from_series(pda)
# Scale if necessary
# This is futureproofing; it will not be used unless pandas
# changes its Datetime implementation
if self._factor != 1:
# Scale inplace because we already created a copy
self.values *= self._factor
elif isinstance(pda, np.ndarray):
# Numpy datetime64 and timedelta64
# Force through pandas.Series
self.__init__(to_datetime(pda).to_series()) # type: ignore
elif hasattr(pda, "to_series"):
# Pandas DatetimeIndex
# Force through pandas.Series
self.__init__(pda.to_series()) # type: ignore
else:
raise TypeError(f"Unsupported type: {type(pda)}")
# Now that self.values is correct, init self with same metadata
super().__init__(
self.values.name,
self.values.dtype,
self.values.size,
self.values.ndim,
self.values.shape,
self.values.itemsize,
)
self._data = self.values
self._is_populated = False
@classmethod
def _get_callback(cls, other, op):
# Will be overridden by all children
return _identity
def floor(self, freq):
"""Round times down to the nearest integer of a given frequency.
Parameters
----------
freq : str {'d', 'm', 'h', 's', 'ms', 'us', 'ns'}
Frequency to round to
Returns
-------
self.__class__
Values rounded down to nearest frequency
"""
f = _get_factor(freq)
return self.__class__(self.values // f, unit=freq)
def ceil(self, freq):
"""Round times up to the nearest integer of a given frequency.
Parameters
----------
freq : str {'d', 'm', 'h', 's', 'ms', 'us', 'ns'}
Frequency to round to
Returns
-------
self.__class__
Values rounded up to nearest frequency
"""
f = _get_factor(freq)
return self.__class__((self.values + (f - 1)) // f, unit=freq)
def round(self, freq):
"""Round times to the nearest integer of a given frequency. Midpoint
values will be rounded to nearest even integer.
Parameters
----------
freq : str {'d', 'm', 'h', 's', 'ms', 'us', 'ns'}
Frequency to round to
Returns
-------
self.__class__
Values rounded to nearest frequency
"""
f = _get_factor(freq)
offset = self.values + ((f + 1) // 2)
rounded = offset // f
# Halfway values are supposed to round to the nearest even integer
# Need to figure out which ones ended up odd and fix them
decrement = ((offset % f) == 0) & ((rounded % 2) == 1)
rounded[decrement] = rounded[decrement] - 1
return self.__class__(rounded, unit=freq)
def to_ndarray(self):
__doc__ = super().to_ndarray.__doc__ # noqa
return np.array(
self.values.to_ndarray(),
dtype="{}64[ns]".format(self.__class__.__name__.lower()),
)
def to_list(self):
__doc__ = super().to_list().__doc__ # noqa
return self.to_ndarray().tolist()
def to_hdf(
self,
prefix_path: str,
dataset: str = "array",
mode: str = "truncate",
file_type: str = "distribute",
):
"""
Override of the pdarray to_hdf to store the special dtype
"""
from typing import cast as typecast
from arkouda.io import _file_type_to_int, _mode_str_to_int
return typecast(
str,
generic_msg(
cmd="tohdf",
args={
"values": self,
"dset": dataset,
"write_mode": _mode_str_to_int(mode),
"filename": prefix_path,
"dtype": self.dtype,
"objType": self.special_objType,
"file_format": _file_type_to_int(file_type),
},
),
)
def update_hdf(self, prefix_path: str, dataset: str = "array", repack: bool = True):
"""
Override the pdarray implementation so that the special object type will be used.
"""
from arkouda.io import (
_file_type_to_int,
_get_hdf_filetype,
_mode_str_to_int,
_repack_hdf,
)
# determine the format (single/distribute) that the file was saved in
file_type = _get_hdf_filetype(prefix_path + "*")
generic_msg(
cmd="tohdf",
args={
"values": self,
"dset": dataset,
"write_mode": _mode_str_to_int("append"),
"filename": prefix_path,
"dtype": self.dtype,
"objType": self.special_objType,
"file_format": _file_type_to_int(file_type),
"overwrite": True,
},
)
if repack:
_repack_hdf(prefix_path)
def __str__(self):
from arkouda.client import pdarrayIterThresh
if self.size <= pdarrayIterThresh:
vals = [f"'{self[i]}'" for i in range(self.size)]
else:
vals = [f"'{self[i]}'" for i in range(3)]
vals.append("... ")
vals.extend([f"'{self[i]}'" for i in range(self.size - 3, self.size)])
spaces = " " * (len(self.__class__.__name__) + 1)
return "{}([{}],\n{}dtype='{}64[ns]')".format(
self.__class__.__name__,
",\n{} ".format(spaces).join(vals),
spaces,
self.__class__.__name__.lower(),
)
def __repr__(self) -> str:
return self.__str__()
def _binop(self, other, op):
# Need to do 2 things:
# 1) Determine return type, based on other's class
# 2) Get other's int64 data to combine with self's data
if isinstance(other, Datetime) or self._is_datetime_scalar(other):
if op not in self.supported_with_datetime:
raise TypeError(f"{op} not supported between {self.__class__.__name__} and Datetime")
otherclass = "Datetime"
if self._is_datetime_scalar(other):
otherdata = _Timescalar(other).value
else:
otherdata = other.values
elif isinstance(other, Timedelta) or self._is_timedelta_scalar(other):
if op not in self.supported_with_timedelta:
raise TypeError(f"{op} not supported between {self.__class__.__name__} and Timedelta")
otherclass = "Timedelta"
if self._is_timedelta_scalar(other):
otherdata = _Timescalar(other).value
else:
otherdata = other.values
elif (isinstance(other, pdarray) and other.dtype in intTypes) or isSupportedInt(other):
if op not in self.supported_with_pdarray:
raise TypeError(f"{op} not supported between {self.__class__.__name__} and integer")
otherclass = "pdarray"
otherdata = other
else:
return NotImplemented
# Determines return type (Datetime, Timedelta, or pdarray)
callback = self._get_callback(otherclass, op)
# Actual operation evaluates on the underlying int64 data
return callback(self.values._binop(otherdata, op))
def _r_binop(self, other, op):
# Need to do 2 things:
# 1) Determine return type, based on other's class
# 2) Get other's int64 data to combine with self's data
# First case is pdarray <op> self
if isinstance(other, pdarray) and other.dtype in intTypes:
if op not in self.supported_with_r_pdarray:
raise TypeError(f"{op} not supported between int64 and {self.__class__.__name__}")
callback = self._get_callback("pdarray", op)
# Need to use other._binop because self.values._r_binop can only handle scalars
return callback(other._binop(self.values, op))
# All other cases are scalars, so can use self.values._r_binop
elif self._is_datetime_scalar(other):
if op not in self.supported_with_r_datetime:
raise TypeError(
f"{op} not supported between scalar datetime and {self.__class__.__name__}"
)
otherclass = "Datetime"
otherdata = _Timescalar(other).value
elif self._is_timedelta_scalar(other):
if op not in self.supported_with_r_timedelta:
raise TypeError(
f"{op} not supported between scalar timedelta and {self.__class__.__name__}"
)
otherclass = "Timedelta"
otherdata = _Timescalar(other).value
elif isSupportedInt(other):
if op not in self.supported_with_r_pdarray:
raise TypeError(f"{op} not supported between int64 and {self.__class__.__name__}")
otherclass = "pdarray"
otherdata = other
else:
# If here, type is not handled
return NotImplemented
callback = self._get_callback(otherclass, op)
return callback(self.values._r_binop(otherdata, op))
def opeq(self, other, op):
if isinstance(other, Timedelta) or self._is_timedelta_scalar(other):
if op not in self.supported_opeq:
raise TypeError(f"{self.__class__.__name__} {op} Timedelta not supported")
if self._is_timedelta_scalar(other):
otherdata = _Timescalar(other).value
else:
otherdata = other.values
self.values.opeq(otherdata, op)
elif isinstance(other, Datetime) or self._is_datetime_scalar(other):
raise TypeError(f"{self.__class__.__name__} {op} datetime not supported")
else:
return NotImplemented
@staticmethod
def _is_datetime_scalar(scalar):
return (
isinstance(scalar, pdTimestamp)
or (isinstance(scalar, np.datetime64) and np.isscalar(scalar))
or isinstance(scalar, datetime.datetime)
)
@staticmethod
def _is_timedelta_scalar(scalar):
return (
isinstance(scalar, pdTimedelta)
or (isinstance(scalar, np.timedelta64) and np.isscalar(scalar))
or isinstance(scalar, datetime.timedelta)
)
def _scalar_callback(self, key):
# Will be overridden in all children
return key
def __getitem__(self, key):
if isSupportedInt(key):
# Single integer index will return a pandas scalar
return self._scalar_callback(self.values[key])
else:
# Slice or array index should return same class
return self.__class__(self.values[key])
def __setitem__(self, key, value):
# RHS can only be vector or scalar of same class
if isinstance(value, self.__class__):
# Value.values is already in nanoseconds, so self.values
# can be set directly
self.values[key] = value.values
elif self._is_supported_scalar(value):
# _Timescalar takes care of normalization to nanoseconds
normval = _Timescalar(value)
self.values[key] = normval.value
else:
return NotImplemented
def min(self):
__doc__ = super().min.__doc__ # noqa
# Return type is pandas scalar
return self._scalar_callback(self.values.min())
def max(self):
__doc__ = super().max.__doc__ # noqa
# Return type is pandas scalar
return self._scalar_callback(self.values.max())
def mink(self, k):
__doc__ = super().mink.__doc__ # noqa
# Return type is same class
return self.__class__(self.values.mink(k))
def maxk(self, k):
__doc__ = super().maxk.__doc__ # noqa
# Return type is same class
return self.__class__(self.values.maxk(k))
[docs]
class Datetime(_AbstractBaseTime):
"""Represents a date and/or time.
Datetime is the Arkouda analog to pandas DatetimeIndex and
other timeseries data types.
Parameters
----------
pda : int64 pdarray, pd.DatetimeIndex, pd.Series, or np.datetime64 array
unit : str, default 'ns'
For int64 pdarray, denotes the unit of the input. Ignored for pandas
and numpy arrays, which carry their own unit. Not case-sensitive;
prefixes of full names (like 'sec') are accepted.
Possible values:
* 'weeks' or 'w'
* 'days' or 'd'
* 'hours' or 'h'
* 'minutes', 'm', or 't'
* 'seconds' or 's'
* 'milliseconds', 'ms', or 'l'
* 'microseconds', 'us', or 'u'
* 'nanoseconds', 'ns', or 'n'
Unlike in pandas, units cannot be combined or mixed with integers
Notes
-----
The ``.values`` attribute is always in nanoseconds with int64 dtype.
"""
supported_with_datetime = frozenset(("==", "!=", "<", "<=", ">", ">=", "-"))
supported_with_r_datetime = frozenset(("==", "!=", "<", "<=", ">", ">=", "-"))
supported_with_timedelta = frozenset(("+", "-", "/", "//", "%"))
supported_with_r_timedelta = frozenset(("+"))
supported_opeq = frozenset(("+=", "-="))
supported_with_pdarray = frozenset(()) # type: ignore
supported_with_r_pdarray = frozenset(()) # type: ignore
special_objType = "Datetime"
def _ensure_components(self):
if self._is_populated:
return
# lazy initialize all attributes in one server call
attribute_dict = json.loads(generic_msg(cmd="dateTimeAttributes", args={"values": self.values}))
self._ns = create_pdarray(attribute_dict["nanosecond"])
self._us = create_pdarray(attribute_dict["microsecond"])
self._ms = create_pdarray(attribute_dict["millisecond"])
self._s = create_pdarray(attribute_dict["second"])
self._min = create_pdarray(attribute_dict["minute"])
self._hour = create_pdarray(attribute_dict["hour"])
self._day = create_pdarray(attribute_dict["day"])
self._month = create_pdarray(attribute_dict["month"])
self._year = create_pdarray(attribute_dict["year"])
self._iso_year = create_pdarray(attribute_dict["isoYear"])
self._day_of_week = create_pdarray(attribute_dict["dayOfWeek"])
self._week_of_year = create_pdarray(attribute_dict["weekOfYear"])
self._day_of_year = create_pdarray(attribute_dict["dayOfYear"])
self._is_leap_year = create_pdarray(attribute_dict["isLeapYear"])
self._date = self.floor("d")
self._is_populated = True
@property
def nanosecond(self):
self._ensure_components()
return self._ns
@property
def microsecond(self):
self._ensure_components()
return self._us
@property
def millisecond(self):
self._ensure_components()
return self._ms
@property
def second(self):
self._ensure_components()
return self._s
@property
def minute(self):
self._ensure_components()
return self._min
@property
def hour(self):
self._ensure_components()
return self._hour
@property
def day(self):
self._ensure_components()
return self._day
@property
def month(self):
self._ensure_components()
return self._month
@property
def year(self):
self._ensure_components()
return self._year
@property
def day_of_year(self):
self._ensure_components()
return self._day_of_year
@property
def dayofyear(self):
return self.day_of_year
@property
def day_of_week(self):
self._ensure_components()
return self._day_of_week
@property
def dayofweek(self):
return self.day_of_week
@property
def weekday(self):
return self.day_of_week
@property
def week(self):
self._ensure_components()
return self._week_of_year
@property
def weekofyear(self):
return self.week
@property
def date(self):
# no need to call _ensure_components for the date
# if _date has been set, return it. Otherwise set it first
if not hasattr(self, "_date"):
self._date = self.floor("d")
return self._date
@property
def is_leap_year(self):
self._ensure_components()
return self._is_leap_year
[docs]
def isocalendar(self):
from arkouda import DataFrame
self._ensure_components()
return DataFrame(
{
"year": self._iso_year,
"week": self._week_of_year,
"day": self._day_of_week + 1,
}
)
@classmethod
def _get_callback(cls, otherclass, op):
callbacks = {
("Datetime", "-"): Timedelta, # Datetime - Datetime -> Timedelta
("Timedelta", "+"): cls, # Datetime + Timedelta -> Datetime
("Timedelta", "-"): cls, # Datetime - Timedelta -> Datetime
("Timedelta", "%"): Timedelta,
} # Datetime % Timedelta -> Timedelta
# Every other supported op returns an int64 pdarray, so callback is identity
return callbacks.get((otherclass, op), _identity)
def _scalar_callback(self, scalar):
# Formats a scalar return value as pandas Timestamp
return pdTimestamp(int(scalar), unit=_BASE_UNIT)
@staticmethod
def _is_supported_scalar(self, scalar):
# Tests whether scalar has compatible type with self's elements
return self.is_datetime_scalar(scalar)
[docs]
def to_pandas(self):
"""Convert array to a pandas DatetimeIndex. Note: if the array size
exceeds client.maxTransferBytes, a RuntimeError is raised.
See Also
--------
to_ndarray
"""
return to_datetime(self.to_ndarray())
[docs]
def sum(self):
raise TypeError("Cannot sum datetime64 values")
[docs]
def register(self, user_defined_name):
"""
Register this Datetime object and underlying components with the Arkouda server
Parameters
----------
user_defined_name : str
user defined name the Datetime is to be registered under,
this will be the root name for underlying components
Returns
-------
Datetime
The same Datetime which is now registered with the arkouda server and has an updated name.
This is an in-place modification, the original is returned to support
a fluid programming style.
Please note you cannot register two different Datetimes with the same name.
Raises
------
TypeError
Raised if user_defined_name is not a str
RegistrationError
If the server was unable to register the Datetimes with the user_defined_name
See also
--------
unregister, attach, is_registered
Notes
-----
Objects registered with the server are immune to deletion until
they are unregistered.
"""
from arkouda.client import generic_msg
if self.registered_name is not None and self.is_registered():
raise RegistrationError(f"This object is already registered as {self.registered_name}")
generic_msg(
cmd="register",
args={
"name": user_defined_name,
"objType": self.special_objType,
"array": self.values,
},
)
self.registered_name = user_defined_name
return self
[docs]
def unregister(self):
"""
Unregister this Datetime object in the arkouda server which was previously
registered using register() and/or attached to using attach()
Raises
------
RegistrationError
If the object is already unregistered or if there is a server error
when attempting to unregister
See also
--------
register, attach, is_registered
Notes
-----
Objects registered with the server are immune to deletion until
they are unregistered.
"""
from arkouda.util import unregister
if not self.registered_name:
raise RegistrationError("This object is not registered")
unregister(self.registered_name)
self.registered_name = None
[docs]
def is_registered(self) -> np.bool_:
"""
Return True iff the object is contained in the registry or is a component of a
registered object.
Returns
-------
numpy.bool
Indicates if the object is contained in the registry
Raises
------
RegistrationError
Raised if there's a server-side error or a mis-match of registered components
See Also
--------
register, attach, unregister
Notes
-----
Objects registered with the server are immune to deletion until
they are unregistered.
"""
from arkouda.util import is_registered
if self.registered_name is None:
return np.bool_(is_registered(self.values.name, as_component=True))
else:
return np.bool_(is_registered(self.registered_name))
[docs]
class Timedelta(_AbstractBaseTime):
"""Represents a duration, the difference between two dates or times.
Timedelta is the Arkouda equivalent of pandas.TimedeltaIndex.
Parameters
----------
pda : int64 pdarray, pd.TimedeltaIndex, pd.Series, or np.timedelta64 array
unit : str, default 'ns'
For int64 pdarray, denotes the unit of the input. Ignored for pandas
and numpy arrays, which carry their own unit. Not case-sensitive;
prefixes of full names (like 'sec') are accepted.
Possible values:
* 'weeks' or 'w'
* 'days' or 'd'
* 'hours' or 'h'
* 'minutes', 'm', or 't'
* 'seconds' or 's'
* 'milliseconds', 'ms', or 'l'
* 'microseconds', 'us', or 'u'
* 'nanoseconds', 'ns', or 'n'
Unlike in pandas, units cannot be combined or mixed with integers
Notes
-----
The ``.values`` attribute is always in nanoseconds with int64 dtype.
"""
supported_with_datetime = frozenset(("+"))
supported_with_r_datetime = frozenset(("+", "-", "/", "//", "%"))
supported_with_timedelta = frozenset(("==", "!=", "<", "<=", ">", ">=", "+", "-", "/", "//", "%"))
supported_with_r_timedelta = frozenset(("==", "!=", "<", "<=", ">", ">=", "+", "-", "/", "//", "%"))
supported_opeq = frozenset(("+=", "-=", "%="))
supported_with_pdarray = frozenset(("*", "//"))
supported_with_r_pdarray = frozenset(("*"))
special_objType = "Timedelta"
def _ensure_components(self):
if self._is_populated:
return
# lazy initialize all attributes in one server call
attribute_dict = json.loads(generic_msg(cmd="timeDeltaAttributes", args={"values": self.values}))
self._ns = create_pdarray(attribute_dict["nanosecond"])
self._us = create_pdarray(attribute_dict["microsecond"])
self._ms = create_pdarray(attribute_dict["millisecond"])
self._s = create_pdarray(attribute_dict["second"])
self._m = create_pdarray(attribute_dict["minute"])
self._h = create_pdarray(attribute_dict["hour"])
self._d = create_pdarray(attribute_dict["day"])
self._nanoseconds = self._ns
self._microseconds = self._ms * 1000 + self._us
self._seconds = self._h * 3600 + self._m * 60 + self._s
self._days = self._d
self._total_seconds = self._days * (24 * 3600) + self._seconds + (self._microseconds / 10**6)
self._is_populated = True
@property
def nanoseconds(self):
self._ensure_components()
return self._nanoseconds
@property
def microseconds(self):
self._ensure_components()
return self._microseconds
@property
def seconds(self):
self._ensure_components()
return self._seconds
@property
def days(self):
self._ensure_components()
return self._days
[docs]
def total_seconds(self):
self._ensure_components()
return self._total_seconds
@property
def components(self):
from arkouda import DataFrame
self._ensure_components()
return DataFrame(
{
"days": self._d,
"hours": self._h,
"minutes": self._m,
"seconds": self._s,
"milliseconds": self._ms,
"microseconds": self._us,
"nanoseconds": self._ns,
}
)
@classmethod
def _get_callback(cls, otherclass, op):
callbacks = {
("Timedelta", "-"): cls, # Timedelta - Timedelta -> Timedelta
("Timedelta", "+"): cls, # Timedelta + Timedelta -> Timedelta
("Datetime", "+"): Datetime, # Timedelta + Datetime -> Datetime
("Datetime", "-"): Datetime, # Datetime - Timedelta -> Datetime
("Timedelta", "%"): cls, # Timedelta % Timedelta -> Timedelta
("pdarray", "//"): cls, # Timedelta // pdarray -> Timedelta
("pdarray", "*"): cls,
} # Timedelta * pdarray -> Timedelta
# Every other supported op returns an int64 pdarray, so callback is identity
return callbacks.get((otherclass, op), _identity)
def _scalar_callback(self, scalar):
# Formats a returned scalar as a pandas.Timedelta
return pdTimedelta(int(scalar), unit=_BASE_UNIT)
@staticmethod
def _is_supported_scalar(self, scalar):
return self.is_timedelta_scalar(scalar)
[docs]
def to_pandas(self):
"""Convert array to a pandas TimedeltaIndex. Note: if the array size
exceeds client.maxTransferBytes, a RuntimeError is raised.
See Also
--------
to_ndarray
"""
return to_timedelta(self.to_ndarray())
[docs]
def std(self, ddof: int_scalars = 0):
"""
Returns the standard deviation as a pd.Timedelta object
"""
return self._scalar_callback(self.values.std(ddof=ddof))
[docs]
def sum(self):
# Sum as a pd.Timedelta
return self._scalar_callback(self.values.sum())
[docs]
def abs(self):
"""Absolute value of time interval."""
return self.__class__(cast(akabs(self.values), "int64"))
[docs]
def register(self, user_defined_name):
"""
Register this Timedelta object and underlying components with the Arkouda server
Parameters
----------
user_defined_name : str
user defined name the timedelta is to be registered under,
this will be the root name for underlying components
Returns
-------
Timedelta
The same Timedelta which is now registered with the arkouda server and has an updated name.
This is an in-place modification, the original is returned to support
a fluid programming style.
Please note you cannot register two different Timedeltas with the same name.
Raises
------
TypeError
Raised if user_defined_name is not a str
RegistrationError
If the server was unable to register the timedelta with the user_defined_name
See also
--------
unregister, attach, is_registered
Notes
-----
Objects registered with the server are immune to deletion until
they are unregistered.
"""
from arkouda.client import generic_msg
if self.registered_name is not None and self.is_registered():
raise RegistrationError(f"This object is already registered as {self.registered_name}")
generic_msg(
cmd="register",
args={
"name": user_defined_name,
"objType": self.special_objType,
"array": self.values,
},
)
self.registered_name = user_defined_name
return self
[docs]
def unregister(self):
"""
Unregister this timedelta object in the arkouda server which was previously
registered using register() and/or attached to using attach()
Raises
------
RegistrationError
If the object is already unregistered or if there is a server error
when attempting to unregister
See also
--------
register, attach, is_registered
Notes
-----
Objects registered with the server are immune to deletion until
they are unregistered.
"""
from arkouda.util import unregister
if not self.registered_name:
raise RegistrationError("This object is not registered")
unregister(self.registered_name)
self.registered_name = None
[docs]
def is_registered(self) -> np.bool_:
"""
Return True iff the object is contained in the registry or is a component of a
registered object.
Returns
-------
numpy.bool
Indicates if the object is contained in the registry
Raises
------
RegistrationError
Raised if there's a server-side error or a mis-match of registered components
See Also
--------
register, attach, unregister
Notes
-----
Objects registered with the server are immune to deletion until
they are unregistered.
"""
from arkouda.util import is_registered
if self.registered_name is None:
return np.bool_(is_registered(self.values.name, as_component=True))
else:
return np.bool_(is_registered(self.registered_name))
[docs]
def date_range(
start=None,
end=None,
periods=None,
freq=None,
tz=None,
normalize=False,
name=None,
closed=None,
inclusive="both",
**kwargs,
):
"""Creates a fixed frequency Datetime range. Alias for
``ak.Datetime(pd.date_range(args))``. Subject to size limit
imposed by client.maxTransferBytes.
Parameters
----------
start : str or datetime-like, optional
Left bound for generating dates.
end : str or datetime-like, optional
Right bound for generating dates.
periods : int, optional
Number of periods to generate.
freq : str or DateOffset, default 'D'
Frequency strings can have multiples, e.g. '5H'. See
timeseries.offset_aliases for a list of
frequency aliases.
tz : str or tzinfo, optional
Time zone name for returning localized DatetimeIndex, for example
'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is
timezone-naive.
normalize : bool, default False
Normalize start/end dates to midnight before generating date range.
name : str, default None
Name of the resulting DatetimeIndex.
closed : {None, 'left', 'right'}, optional
Make the interval closed with respect to the given frequency to
the 'left', 'right', or both sides (None, the default).
*Deprecated*
inclusive : {"both", "neither", "left", "right"}, default "both"
Include boundaries. Whether to set each bound as closed or open.
**kwargs
For compatibility. Has no effect on the result.
Returns
-------
rng : DatetimeIndex
Notes
-----
Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
exactly three must be specified. If ``freq`` is omitted, the resulting
``DatetimeIndex`` will have ``periods`` linearly spaced elements between
``start`` and ``end`` (closed on both sides).
To learn more about the frequency strings, please see `this link
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
"""
if closed is not None:
warn(
"closed has been deprecated. Please use the inclusive parameter instead.",
DeprecationWarning,
)
inclusive = closed
return Datetime(
pd_date_range(
start,
end,
periods,
freq,
tz,
normalize,
name,
inclusive=inclusive,
**kwargs,
)
)
[docs]
def timedelta_range(start=None, end=None, periods=None, freq=None, name=None, closed=None, **kwargs):
"""Return a fixed frequency TimedeltaIndex, with day as the default
frequency. Alias for ``ak.Timedelta(pd.timedelta_range(args))``.
Subject to size limit imposed by client.maxTransferBytes.
Parameters
----------
start : str or timedelta-like, default None
Left bound for generating timedeltas.
end : str or timedelta-like, default None
Right bound for generating timedeltas.
periods : int, default None
Number of periods to generate.
freq : str or DateOffset, default 'D'
Frequency strings can have multiples, e.g. '5H'.
name : str, default None
Name of the resulting TimedeltaIndex.
closed : str, default None
Make the interval closed with respect to the given frequency to
the 'left', 'right', or both sides (None).
Returns
-------
rng : TimedeltaIndex
Notes
-----
Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
exactly three must be specified. If ``freq`` is omitted, the resulting
``TimedeltaIndex`` will have ``periods`` linearly spaced elements between
``start`` and ``end`` (closed on both sides).
To learn more about the frequency strings, please see `this link
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
"""
return Timedelta(pd_timedelta_range(start, end, periods, freq, name, closed, **kwargs))