import json
from enum import Enum
from typing import TYPE_CHECKING, List, Sequence, Tuple, TypeVar, Union
from typing import cast as type_cast
from typing import no_type_check
import numpy as np
from typeguard import typechecked
from arkouda.client import generic_msg
from arkouda.groupbyclass import GroupBy
from arkouda.numpy.dtypes import DTypes, bigint
from arkouda.numpy.dtypes import dtype as akdtype
from arkouda.numpy.dtypes import int64 as akint64
from arkouda.numpy.dtypes import (
int_scalars,
isSupportedNumber,
numeric_scalars,
resolve_scalar_dtype,
)
from arkouda.numpy.dtypes import str_
from arkouda.numpy.dtypes import str_ as akstr_
from arkouda.pdarrayclass import all as ak_all
from arkouda.pdarrayclass import any as ak_any
from arkouda.pdarrayclass import argmax, create_pdarray, pdarray, sum
from arkouda.pdarraycreation import array, linspace, scalar_array
from arkouda.sorting import sort
from arkouda.strings import Strings
if TYPE_CHECKING:
from arkouda.categorical import Categorical
from arkouda.segarray import SegArray
else:
Categorical = TypeVar("Categorical")
SegArray = TypeVar("SegArray")
__all__ = [
"cast",
"abs",
"ceil",
"clip",
"count_nonzero",
"eye",
"floor",
"trunc",
"round",
"sign",
"isfinite",
"isinf",
"isnan",
"log",
"log2",
"log10",
"log1p",
"exp",
"expm1",
"square",
"matmul",
"triu",
"tril",
"transpose",
"vecdot",
"cumsum",
"cumprod",
"sin",
"cos",
"tan",
"arcsin",
"arccos",
"arctan",
"arctan2",
"sinh",
"cosh",
"tanh",
"arcsinh",
"arccosh",
"arctanh",
"rad2deg",
"deg2rad",
"hash",
"array_equal",
"putmask",
"where",
"histogram",
"histogram2d",
"histogramdd",
"median",
"value_counts",
"ErrorMode",
]
[docs]
class ErrorMode(Enum):
strict = "strict"
ignore = "ignore"
return_validity = "return_validity"
[docs]
@typechecked
def cast(
pda: Union[pdarray, Strings, Categorical], # type: ignore
dt: Union[np.dtype, type, str, bigint],
errors: ErrorMode = ErrorMode.strict,
) -> Union[Union[pdarray, Strings, Categorical], Tuple[pdarray, pdarray]]: # type: ignore
"""
Cast an array to another dtype.
Parameters
----------
pda : pdarray or Strings
The array of values to cast
dt : np.dtype, type, or str
The target dtype to cast values to
errors : {strict, ignore, return_validity}
Controls how errors are handled when casting strings to a numeric type
(ignored for casts from numeric types).
- strict: raise RuntimeError if *any* string cannot be converted
- ignore: never raise an error. Uninterpretable strings get
converted to NaN (float64), -2**63 (int64), zero (uint64 and
uint8), or False (bool)
- return_validity: in addition to returning the same output as
"ignore", also return a bool array indicating where the cast
was successful.
Returns
-------
pdarray or Strings
Array of values cast to desired dtype
[validity : pdarray(bool)]
If errors="return_validity" and input is Strings, a second array is
returned with True where the cast succeeded and False where it failed.
Notes
-----
The cast is performed according to Chapel's casting rules and is NOT safe
from overflows or underflows. The user must ensure that the target dtype
has the precision and capacity to hold the desired result.
Examples
--------
>>> ak.cast(ak.linspace(1.0,5.0,5), dt=ak.int64)
array([1, 2, 3, 4, 5])
>>> ak.cast(ak.arange(0,5), dt=ak.float64).dtype
dtype('float64')
>>> ak.cast(ak.arange(0,5), dt=ak.bool_)
array([False, True, True, True, True])
>>> ak.cast(ak.linspace(0,4,5), dt=ak.bool_)
array([False, True, True, True, True])
"""
from arkouda.categorical import Categorical # type: ignore
if isinstance(pda, pdarray):
if dt is Strings or akdtype(dt) == str_:
if pda.ndim > 1:
raise ValueError("Cannot cast a multi-dimensional pdarray to Strings")
repMsg = generic_msg(
cmd=f"castToStrings<{pda.dtype}>",
args={"name": pda},
)
return Strings.from_parts(*(type_cast(str, repMsg).split("+")))
else:
dt = akdtype(dt)
return create_pdarray(
generic_msg(
cmd=f"cast<{pda.dtype},{dt},{pda.ndim}>",
args={"name": pda},
)
)
elif isinstance(pda, Strings):
if dt is Categorical or dt == "Categorical":
return Categorical(pda) # type: ignore
elif dt is Strings or akdtype(dt) == str_:
return pda[:]
else:
dt = akdtype(dt)
repMsg = generic_msg(
cmd=f"castStringsTo<{dt}>",
args={
"name": pda.entry.name,
"opt": errors.name,
},
)
if errors == ErrorMode.return_validity:
a, b = type_cast(str, repMsg).split("+")
return create_pdarray(type_cast(str, a)), create_pdarray(type_cast(str, b))
else:
return create_pdarray(type_cast(str, repMsg))
elif isinstance(pda, Categorical): # type: ignore
if dt is Strings or dt in ["Strings", "str"] or dt == str_:
return pda.categories[pda.codes]
else:
raise ValueError("Categoricals can only be casted to Strings")
else:
raise TypeError("pda must be a pdarray, Strings, or Categorical object")
[docs]
@typechecked
def abs(pda: pdarray) -> pdarray:
"""
Return the element-wise absolute value of the array.
Parameters
----------
pda : pdarray
Returns
-------
pdarray
A pdarray containing absolute values of the input array elements
Raises
------
TypeError
Raised if the parameter is not a pdarray
Examples
--------
>>> ak.abs(ak.arange(-5,-1))
array([5, 4, 3, 2])
>>> ak.abs(ak.linspace(-5,-1,5))
array([5, 4, 3, 2, 1])
"""
repMsg = generic_msg(
cmd=f"efunc{pda.ndim}D",
args={
"func": "abs",
"array": pda,
},
)
return create_pdarray(type_cast(str, repMsg))
[docs]
@typechecked
def ceil(pda: pdarray) -> pdarray:
"""
Return the element-wise ceiling of the array.
Parameters
----------
pda : pdarray
Returns
-------
pdarray
A pdarray containing ceiling values of the input array elements
Raises
------
TypeError
Raised if the parameter is not a pdarray
Examples
--------
>>> ak.ceil(ak.linspace(1.1,5.5,5))
array([2, 3, 4, 5, 6])
"""
repMsg = generic_msg(
cmd=f"efunc{pda.ndim}D",
args={
"func": "ceil",
"array": pda,
},
)
return create_pdarray(type_cast(str, repMsg))
[docs]
@typechecked
def floor(pda: pdarray) -> pdarray:
"""
Alias of arkouda.numpy.floor
"""
from arkouda.numpy import floor as ak_floor
return ak_floor(pda)
[docs]
@typechecked
def round(pda: pdarray) -> pdarray:
"""
Return the element-wise rounding of the array.
Parameters
----------
pda : pdarray
Returns
-------
pdarray
A pdarray containing input array elements rounded to the nearest integer
Raises
------
TypeError
Raised if the parameter is not a pdarray
Examples
--------
>>> ak.round(ak.array([1.1, 2.5, 3.14159]))
array([1, 3, 3])
"""
repMsg = generic_msg(
cmd=f"efunc{pda.ndim}D",
args={
"func": "round",
"array": pda,
},
)
return create_pdarray(type_cast(str, repMsg))
[docs]
@typechecked
def trunc(pda: pdarray) -> pdarray:
"""
Return the element-wise truncation of the array.
Parameters
----------
pda : pdarray
Returns
-------
pdarray
A pdarray containing input array elements truncated to the nearest integer
Raises
------
TypeError
Raised if the parameter is not a pdarray
Examples
--------
>>> ak.trunc(ak.array([1.1, 2.5, 3.14159]))
array([1, 2, 3])
"""
repMsg = generic_msg(
cmd=f"efunc{pda.ndim}D",
args={
"func": "trunc",
"array": pda,
},
)
return create_pdarray(type_cast(str, repMsg))
[docs]
@typechecked
def sign(pda: pdarray) -> pdarray:
"""
Return the element-wise sign of the array.
Parameters
----------
pda : pdarray
Returns
-------
pdarray
A pdarray containing sign values of the input array elements
Raises
------
TypeError
Raised if the parameter is not a pdarray
Examples
--------
>>> ak.sign(ak.array([-10, -5, 0, 5, 10]))
array([-1, -1, 0, 1, 1])
"""
repMsg = generic_msg(
cmd=f"efunc{pda.ndim}D",
args={
"func": "sign",
"array": pda,
},
)
return create_pdarray(type_cast(str, repMsg))
[docs]
@typechecked
def isfinite(pda: pdarray) -> pdarray:
"""
Return the element-wise isfinite check applied to the array.
Parameters
----------
pda : pdarray
Returns
-------
pdarray
A pdarray containing boolean values indicating whether the
input array elements are finite
Raises
------
TypeError
Raised if the parameter is not a pdarray
RuntimeError
if the underlying pdarray is not float-based
Examples
--------
>>> ak.isfinite(ak.array[1.0, 2.0, ak.inf])
array([True, True, False])
"""
repMsg = generic_msg(
cmd=f"efunc{pda.ndim}D",
args={
"func": "isfinite",
"array": pda,
},
)
return create_pdarray(type_cast(str, repMsg))
[docs]
@typechecked
def isinf(pda: pdarray) -> pdarray:
"""
Return the element-wise isinf check applied to the array.
Parameters
----------
pda : pdarray
Returns
-------
pdarray
A pdarray containing boolean values indicating whether the
input array elements are infinite
Raises
------
TypeError
Raised if the parameter is not a pdarray
RuntimeError
if the underlying pdarray is not float-based
Examples
--------
>>> ak.isinf(ak.array[1.0, 2.0, ak.inf])
array([False, False, True])
"""
repMsg = generic_msg(
cmd=f"efunc{pda.ndim}D",
args={
"func": "isinf",
"array": pda,
},
)
return create_pdarray(type_cast(str, repMsg))
[docs]
@typechecked
def isnan(pda: pdarray) -> pdarray:
"""
Return the element-wise isnan check applied to the array.
Parameters
----------
pda : pdarray
Returns
-------
pdarray
A pdarray containing boolean values indicating whether the
input array elements are NaN
Raises
------
TypeError
Raised if the parameter is not a pdarray
RuntimeError
if the underlying pdarray is not float-based
Examples
--------
>>> ak.isnan(ak.array[1.0, 2.0, 1.0 / 0.0])
array([False, False, True])
"""
from arkouda.util import is_float, is_numeric
if is_numeric(pda) and not is_float(pda):
from arkouda.pdarraycreation import full
return full(pda.size, False, dtype=bool)
elif not is_numeric(pda):
raise TypeError("isnan only supports pdarray of numeric type.")
repMsg = generic_msg(
cmd=f"efunc{pda.ndim}D",
args={
"func": "isnan",
"array": pda,
},
)
return create_pdarray(type_cast(str, repMsg))
[docs]
@typechecked
def log(pda: pdarray) -> pdarray:
"""
Return the element-wise natural log of the array.
Parameters
----------
pda : pdarray
Returns
-------
pdarray
A pdarray containing natural log values of the input
array elements
Raises
------
TypeError
Raised if the parameter is not a pdarray
Notes
-----
Logarithms with other bases can be computed as follows:
Examples
--------
>>> A = ak.array([1, 10, 100])
# Natural log
>>> ak.log(A)
array([0, 2.3025850929940459, 4.6051701859880918])
# Log base 10
>>> ak.log(A) / np.log(10)
array([0, 1, 2])
# Log base 2
>>> ak.log(A) / np.log(2)
array([0, 3.3219280948873626, 6.6438561897747253])
"""
repMsg = generic_msg(
cmd=f"efunc{pda.ndim}D",
args={
"func": "log",
"array": pda,
},
)
return create_pdarray(type_cast(str, repMsg))
[docs]
@typechecked
def log10(x: pdarray) -> pdarray:
"""
Return the element-wise base 10 log of the array.
Parameters
__________
x : pdarray
array to compute on
Returns
_______
pdarray contain values of the base 10 log
"""
repMsg = generic_msg(
cmd=f"efunc{x.ndim}D",
args={
"func": "log10",
"array": x,
},
)
return create_pdarray(type_cast(str, repMsg))
[docs]
@typechecked
def log2(x: pdarray) -> pdarray:
"""
Return the element-wise base 2 log of the array.
Parameters
__________
x : pdarray
array to compute on
Returns
_______
pdarray contain values of the base 2 log
"""
repMsg = generic_msg(
cmd=f"efunc{x.ndim}D",
args={
"func": "log2",
"array": x,
},
)
return create_pdarray(type_cast(str, repMsg))
[docs]
@typechecked
def log1p(x: pdarray) -> pdarray:
"""
Return the element-wise natural log of one plus the array.
Parameters
__________
x : pdarray
array to compute on
Returns
_______
pdarray contain values of the natural log of one plus the array
"""
repMsg = generic_msg(
cmd=f"efunc{x.ndim}D",
args={
"func": "log1p",
"array": x,
},
)
return create_pdarray(repMsg)
[docs]
@typechecked
def exp(pda: pdarray) -> pdarray:
"""
Return the element-wise exponential of the array.
Parameters
----------
pda : pdarray
Returns
-------
pdarray
A pdarray containing exponential values of the input
array elements
Raises
------
TypeError
Raised if the parameter is not a pdarray
Examples
--------
>>> ak.exp(ak.arange(1,5))
array([2.7182818284590451, 7.3890560989306504, 20.085536923187668, 54.598150033144236])
>>> ak.exp(ak.uniform(5,1.0,5.0))
array([11.84010843172504, 46.454368507659211, 5.5571769623557188,
33.494295836924771, 13.478894913238722])
"""
repMsg = generic_msg(
cmd=f"efunc{pda.ndim}D",
args={
"func": "exp",
"array": pda,
},
)
return create_pdarray(type_cast(str, repMsg))
[docs]
@typechecked
def expm1(pda: pdarray) -> pdarray:
"""
Return the element-wise exponential of the array minus one.
Parameters
----------
pda : pdarray
Returns
-------
pdarray
A pdarray containing exponential values of the input
array elements minus one
Raises
------
TypeError
Raised if the parameter is not a pdarray
Examples
--------
>>> ak.exp1m(ak.arange(1,5))
array([1.7182818284590451, 6.3890560989306504, 19.085536923187668, 53.598150033144236])
>>> ak.exp1m(ak.uniform(5,1.0,5.0))
array([10.84010843172504, 45.454368507659211, 4.5571769623557188,
32.494295836924771, 12.478894913238722])
"""
repMsg = generic_msg(
cmd=f"efunc{pda.ndim}D",
args={
"func": "expm1",
"array": pda,
},
)
return create_pdarray(type_cast(str, repMsg))
[docs]
@typechecked
def square(pda: pdarray) -> pdarray:
"""
Return the element-wise square of the array.
Parameters
----------
pda : pdarray
Returns
-------
pdarray
A pdarray containing square values of the input
array elements
Raises
------
TypeError
Raised if the parameter is not a pdarray
Examples
--------
>>> ak.square(ak.arange(1,5))
array([1, 4, 9, 16])
"""
repMsg = generic_msg(
cmd=f"efunc{pda.ndim}D",
args={
"func": "square",
"array": pda,
},
)
return create_pdarray(type_cast(str, repMsg))
[docs]
@typechecked
def cumsum(pda: pdarray) -> pdarray:
"""
Return the cumulative sum over the array.
The sum is inclusive, such that the ``i`` th element of the
result is the sum of elements up to and including ``i``.
Parameters
----------
pda : pdarray
Returns
-------
pdarray
A pdarray containing cumulative sums for each element
of the original pdarray
Raises
------
TypeError
Raised if the parameter is not a pdarray
Examples
--------
>>> ak.cumsum(ak.arange([1,5]))
array([1, 3, 6])
>>> ak.cumsum(ak.uniform(5,1.0,5.0))
array([3.1598310770203937, 5.4110385860243131, 9.1622479306453748,
12.710615785506533, 13.945880905466208])
>>> ak.cumsum(ak.randint(0, 1, 5, dtype=ak.bool_))
array([0, 1, 1, 2, 3])
"""
repMsg = generic_msg(
cmd=f"efunc{pda.ndim}D",
args={
"func": "cumsum",
"array": pda,
},
)
return create_pdarray(type_cast(str, repMsg))
[docs]
@typechecked
def cumprod(pda: pdarray) -> pdarray:
"""
Return the cumulative product over the array.
The product is inclusive, such that the ``i`` th element of the
result is the product of elements up to and including ``i``.
Parameters
----------
pda : pdarray
Returns
-------
pdarray
A pdarray containing cumulative products for each element
of the original pdarray
Raises
------
TypeError
Raised if the parameter is not a pdarray
Examples
--------
>>> ak.cumprod(ak.arange(1,5))
array([1, 2, 6, 24]))
>>> ak.cumprod(ak.uniform(5,1.0,5.0))
array([1.5728783400481925, 7.0472855509390593, 33.78523998586553,
134.05309592737584, 450.21589865655358])
"""
repMsg = generic_msg(
cmd=f"efunc{pda.ndim}D",
args={
"func": "cumprod",
"array": pda,
},
)
return create_pdarray(type_cast(str, repMsg))
[docs]
@typechecked
def sin(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray:
"""
Return the element-wise sine of the array.
Parameters
----------
pda : pdarray
where : Boolean or pdarray
This condition is broadcast over the input. At locations where the condition is True,
the sine will be applied to the corresponding value. Elsewhere, it will retain
its original value. Default set to True.
Returns
-------
pdarray
A pdarray containing sin for each element
of the original pdarray
Raises
------
TypeError
Raised if the parameter is not a pdarray
"""
return _trig_helper(pda, "sin", where)
[docs]
@typechecked
def cos(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray:
"""
Return the element-wise cosine of the array.
Parameters
----------
pda : pdarray
where : Boolean or pdarray
This condition is broadcast over the input. At locations where the condition is True,
the cosine will be applied to the corresponding value. Elsewhere, it will retain
its original value. Default set to True.
Returns
-------
pdarray
A pdarray containing cosine for each element
of the original pdarray
Raises
------
TypeError
Raised if the parameter is not a pdarray
"""
return _trig_helper(pda, "cos", where)
[docs]
@typechecked
def tan(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray:
"""
Return the element-wise tangent of the array.
Parameters
----------
pda : pdarray
where : Boolean or pdarray
This condition is broadcast over the input. At locations where the condition is True,
the tangent will be applied to the corresponding value. Elsewhere, it will retain
its original value. Default set to True.
Returns
-------
pdarray
A pdarray containing tangent for each element
of the original pdarray
Raises
------
TypeError
Raised if the parameter is not a pdarray
"""
return _trig_helper(pda, "tan", where)
[docs]
@typechecked
def arcsin(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray:
"""
Return the element-wise inverse sine of the array. The result is between -pi/2 and pi/2.
Parameters
----------
pda : pdarray
where : Boolean or pdarray
This condition is broadcast over the input. At locations where the condition is True,
the inverse sine will be applied to the corresponding value. Elsewhere, it will retain
its original value. Default set to True.
Returns
-------
pdarray
A pdarray containing inverse sine for each element
of the original pdarray
Raises
------
TypeError
Raised if the parameter is not a pdarray
"""
return _trig_helper(pda, "arcsin", where)
[docs]
@typechecked
def arccos(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray:
"""
Return the element-wise inverse cosine of the array. The result is between 0 and pi.
Parameters
----------
pda : pdarray
where : Boolean or pdarray
This condition is broadcast over the input. At locations where the condition is True,
the inverse cosine will be applied to the corresponding value. Elsewhere, it will retain
its original value. Default set to True.
Returns
-------
pdarray
A pdarray containing inverse cosine for each element
of the original pdarray
Raises
------
TypeError
Raised if the parameter is not a pdarray
"""
return _trig_helper(pda, "arccos", where)
[docs]
@typechecked
def arctan(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray:
"""
Return the element-wise inverse tangent of the array. The result is between -pi/2 and pi/2.
Parameters
----------
pda : pdarray
where : Boolean or pdarray
This condition is broadcast over the input. At locations where the condition is True,
the inverse tangent will be applied to the corresponding value. Elsewhere, it will retain
its original value. Default set to True.
Returns
-------
pdarray
A pdarray containing inverse tangent for each element
of the original pdarray
Raises
------
TypeError
Raised if the parameter is not a pdarray
"""
return _trig_helper(pda, "arctan", where)
[docs]
@typechecked
def arctan2(
num: Union[pdarray, numeric_scalars],
denom: Union[pdarray, numeric_scalars],
where: Union[bool, pdarray] = True,
) -> pdarray:
"""
Return the element-wise inverse tangent of the array pair. The result chosen is the
signed angle in radians between the ray ending at the origin and passing through the
point (1,0), and the ray ending at the origin and passing through the point (denom, num).
The result is between -pi and pi.
Parameters
----------
num : Union[numeric_scalars, pdarray]
Numerator of the arctan2 argument.
denom : Union[numeric_scalars, pdarray]
Denominator of the arctan2 argument.
where : Boolean or pdarray
This condition is broadcast over the input. At locations where the condition is True,
the inverse tangent will be applied to the corresponding values. Elsewhere, it will retain
its original value. Default set to True.
Returns
-------
pdarray
A pdarray containing inverse tangent for each corresponding element pair
of the original pdarray, using the signed values or the numerator and
denominator to get proper placement on unit circle.
Raises
------
TypeError
Raised if the parameter is not a pdarray
"""
if not all(isSupportedNumber(arg) or isinstance(arg, pdarray) for arg in [num, denom]):
raise TypeError(
f"Unsupported types {type(num)} and/or {type(denom)}. Supported "
"types are numeric scalars and pdarrays. At least one argument must be a pdarray."
)
if isSupportedNumber(num) and isSupportedNumber(denom):
raise TypeError(
f"Unsupported types {type(num)} and/or {type(denom)}. Supported "
"types are numeric scalars and pdarrays. At least one argument must be a pdarray."
)
# TODO: handle shape broadcasting for multidimensional arrays
if isinstance(num, pdarray) or isinstance(denom, pdarray):
ndim = num.ndim if isinstance(num, pdarray) else denom.ndim # type: ignore[union-attr]
if where is True:
repMsg = type_cast(
str,
generic_msg(
cmd=f"efunc2Arg{ndim}D",
args={
"func": "arctan2",
"A": num,
"B": denom,
},
),
)
return create_pdarray(repMsg)
elif where is False:
return num / denom # type: ignore
else:
if where.dtype != bool:
raise TypeError(f"where must have dtype bool, got {where.dtype} instead")
if isinstance(num, pdarray) and isinstance(denom, pdarray):
# TODO: handle shape broadcasting for multidimensional arrays
repMsg = type_cast(
str,
generic_msg(
cmd=f"efunc2Arg{ndim}D",
args={
"func": "arctan2",
"A": num[where],
"B": denom[where],
},
),
)
if not isinstance(num, pdarray) or not isinstance(denom, pdarray):
repMsg = type_cast(
str,
generic_msg(
cmd=f"efunc2Arg{ndim}D",
args={
"func": "arctan2",
"A": num if not isinstance(num, pdarray) else num[where],
"B": denom if not isinstance(denom, pdarray) else denom[where],
},
),
)
new_pda = num / denom
ret = create_pdarray(repMsg)
new_pda = cast(new_pda, ret.dtype)
new_pda[where] = ret
return new_pda
else:
return scalar_array(arctan2(num, denom) if where else num / denom)
[docs]
@typechecked
def sinh(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray:
"""
Return the element-wise hyperbolic sine of the array.
Parameters
----------
pda : pdarray
where : Boolean or pdarray
This condition is broadcast over the input. At locations where the condition is True,
the hyperbolic sine will be applied to the corresponding value. Elsewhere, it will retain
its original value. Default set to True.
Returns
-------
pdarray
A pdarray containing hyperbolic sine for each element
of the original pdarray
Raises
------
TypeError
Raised if the parameter is not a pdarray
"""
return _trig_helper(pda, "sinh", where)
[docs]
@typechecked
def cosh(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray:
"""
Return the element-wise hyperbolic cosine of the array.
Parameters
----------
pda : pdarray
where : Boolean or pdarray
This condition is broadcast over the input. At locations where the condition is True,
the hyperbolic cosine will be applied to the corresponding value. Elsewhere, it will retain
its original value. Default set to True.
Returns
-------
pdarray
A pdarray containing hyperbolic cosine for each element
of the original pdarray
Raises
------
TypeError
Raised if the parameter is not a pdarray
"""
return _trig_helper(pda, "cosh", where)
[docs]
@typechecked
def tanh(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray:
"""
Return the element-wise hyperbolic tangent of the array.
Parameters
----------
pda : pdarray
where : Boolean or pdarray
This condition is broadcast over the input. At locations where the condition is True,
the hyperbolic tangent will be applied to the corresponding value. Elsewhere, it will retain
its original value. Default set to True.
Returns
-------
pdarray
A pdarray containing hyperbolic tangent for each element
of the original pdarray
Raises
------
TypeError
Raised if the parameter is not a pdarray
"""
return _trig_helper(pda, "tanh", where)
[docs]
@typechecked
def arcsinh(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray:
"""
Return the element-wise inverse hyperbolic sine of the array.
Parameters
----------
pda : pdarray
where : Boolean or pdarray
This condition is broadcast over the input. At locations where the condition is True,
the inverse hyperbolic sine will be applied to the corresponding value. Elsewhere, it will retain
its original value. Default set to True.
Returns
-------
pdarray
A pdarray containing inverse hyperbolic sine for each element
of the original pdarray
Raises
------
TypeError
Raised if the parameter is not a pdarray
"""
return _trig_helper(pda, "arcsinh", where)
[docs]
@typechecked
def arccosh(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray:
"""
Return the element-wise inverse hyperbolic cosine of the array.
Parameters
----------
pda : pdarray
where : Boolean or pdarray
This condition is broadcast over the input. At locations where the condition is True,
the inverse hyperbolic cosine will be applied to the corresponding value. Elsewhere, it will
retain its original value. Default set to True.
Returns
-------
pdarray
A pdarray containing inverse hyperbolic cosine for each element
of the original pdarray
Raises
------
TypeError
Raised if the parameter is not a pdarray
"""
return _trig_helper(pda, "arccosh", where)
[docs]
@typechecked
def arctanh(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray:
"""
Return the element-wise inverse hyperbolic tangent of the array.
Parameters
----------
pda : pdarray
where : Boolean or pdarray
This condition is broadcast over the input. At locations where the condition is True,
the inverse hyperbolic tangent will be applied to the corresponding value. Elsewhere,
it will retain its original value. Default set to True.
Returns
-------
pdarray
A pdarray containing inverse hyperbolic tangent for each element
of the original pdarray
Raises
------
TypeError
Raised if the parameters are not a pdarray or numeric scalar.
"""
return _trig_helper(pda, "arctanh", where)
def _trig_helper(pda: pdarray, func: str, where: Union[bool, pdarray] = True) -> pdarray:
"""
Returns the result of the input trig function acting element-wise on the array.
Parameters
----------
pda : pdarray
func : string
The designated trig function that is passed in
where : Boolean or pdarray
This condition is applied over the input. At locations where the condition is True, the
corresponding value will be acted on by the respective trig function. Elsewhere,
it will retain its original value. Default set to True.
Returns
-------
pdarray
A pdarray with the trig function applied at each element of pda
Raises
------
TypeError
Raised if the parameter is not a pdarray
TypeError
Raised if where condition is not type Boolean
"""
if where is True:
repMsg = type_cast(
str,
generic_msg(
cmd=f"efunc{pda.ndim}D",
args={
"func": func,
"array": pda,
},
),
)
return create_pdarray(repMsg)
elif where is False:
return pda
else:
if where.dtype != bool:
raise TypeError(f"where must have dtype bool, got {where.dtype} instead")
repMsg = type_cast(
str,
generic_msg(
cmd=f"efunc{pda.ndim}D",
args={
"func": func,
"array": pda[where],
},
),
)
new_pda = pda[:]
ret = create_pdarray(repMsg)
new_pda = cast(new_pda, ret.dtype)
new_pda[where] = ret
return new_pda
[docs]
@typechecked
def rad2deg(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray:
"""
Converts angles element-wise from radians to degrees.
Parameters
----------
pda : pdarray
where : Boolean or pdarray
This condition is broadcast over the input. At locations where the condition is True, the
corresponding value will be converted from radians to degrees. Elsewhere, it will retain its
original value. Default set to True.
Returns
-------
pdarray
A pdarray containing an angle converted to degrees, from radians, for each element
of the original pdarray
Raises
------
TypeError
Raised if the parameter is not a pdarray
"""
if where is True:
return 180 * (pda / np.pi)
elif where is False:
return pda
else:
new_pda = pda
ret = 180 * (pda[where] / np.pi)
new_pda = cast(new_pda, ret.dtype)
new_pda[where] = ret
return new_pda
[docs]
@typechecked
def deg2rad(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray:
"""
Converts angles element-wise from degrees to radians.
Parameters
----------
pda : pdarray
where : Boolean or pdarray
This condition is broadcast over the input. At locations where the condition is True, the
corresponding value will be converted from degrees to radians. Elsewhere, it will retain its
original value. Default set to True.
Returns
-------
pdarray
A pdarray containing an angle converted to radians, from degrees, for each element
of the original pdarray
Raises
------
TypeError
Raised if the parameter is not a pdarray
"""
if where is True:
return np.pi * pda / 180
elif where is False:
return pda
else:
new_pda = pda
ret = np.pi * pda[where] / 180
new_pda = cast(new_pda, ret.dtype)
new_pda[where] = ret
return new_pda
def _hash_helper(a):
from arkouda import Categorical as Categorical_
from arkouda import SegArray as SegArray_
if isinstance(a, SegArray_):
return json.dumps(
{
"segments": a.segments.name,
"values": a.values.name,
"valObjType": a.values.objType,
}
)
elif isinstance(a, Categorical_):
return json.dumps({"categories": a.categories.name, "codes": a.codes.name})
else:
return a.name
# this is # type: ignored and doesn't actually do any type checking
# the type hints are there as a reference to show which types are expected
# type validation is done within the function
[docs]
def hash(
pda: Union[ # type: ignore
Union[pdarray, Strings, SegArray, Categorical],
List[Union[pdarray, Strings, SegArray, Categorical]],
],
full: bool = True,
) -> Union[Tuple[pdarray, pdarray], pdarray]:
"""
Return an element-wise hash of the array or list of arrays.
Parameters
----------
pda : Union[pdarray, Strings, Segarray, Categorical],
List[Union[pdarray, Strings, Segarray, Categorical]]]
full : bool
This is only used when a single pdarray is passed into hash
By default, a 128-bit hash is computed and returned as
two int64 arrays. If full=False, then a 64-bit hash
is computed and returned as a single int64 array.
Returns
-------
hashes
If full=True or a list of pdarrays is passed,
a 2-tuple of pdarrays containing the high
and low 64 bits of each hash, respectively.
If full=False and a single pdarray is passed,
a single pdarray containing a 64-bit hash
Raises
------
TypeError
Raised if the parameter is not a pdarray
Notes
-----
In the case of a single pdarray being passed, this function
uses the SIPhash algorithm, which can output either a 64-bit
or 128-bit hash. However, the 64-bit hash runs a significant
risk of collisions when applied to more than a few million
unique values. Unless the number of unique values is known to
be small, the 128-bit hash is strongly recommended.
Note that this hash should not be used for security, or for
any cryptographic application. Not only is SIPhash not
intended for such uses, but this implementation employs a
fixed key for the hash, which makes it possible for an
adversary with control over input to engineer collisions.
In the case of a list of pdrrays, Strings, Categoricals, or Segarrays
being passed, a non-linear function must be applied to each
array since hashes of subsequent arrays cannot be simply XORed
because equivalent values will cancel each other out, hence we
do a rotation by the ordinal of the array.
"""
from arkouda import Categorical as Categorical_
from arkouda import SegArray as SegArray_
if isinstance(pda, (pdarray, Strings, SegArray_, Categorical_)):
return _hash_single(pda, full) if isinstance(pda, pdarray) else pda.hash()
elif isinstance(pda, List):
if any(
wrong_type := [not isinstance(a, (pdarray, Strings, SegArray_, Categorical_)) for a in pda]
):
raise TypeError(
f"Unsupported type {type(pda[np.argmin(wrong_type)])}. Supported types are pdarray,"
f" SegArray, Strings, Categoricals, and Lists of these types."
)
# replace bigint pdarrays with the uint limbs
expanded_pda = []
for a in pda:
if isinstance(a, pdarray) and a.dtype == bigint:
expanded_pda.extend(a.bigint_to_uint_arrays())
else:
expanded_pda.append(a)
types_list = [a.objType for a in expanded_pda]
names_list = [_hash_helper(a) for a in expanded_pda]
rep_msg = type_cast(
str,
generic_msg(
cmd="hashList",
args={
"nameslist": names_list,
"typeslist": types_list,
"length": len(expanded_pda),
"size": len(expanded_pda[0]),
},
),
)
hashes = json.loads(rep_msg)
return create_pdarray(hashes["upperHash"]), create_pdarray(hashes["lowerHash"])
else:
raise TypeError(
f"Unsupported type {type(pda)}. Supported types are pdarray,"
f" SegArray, Strings, Categoricals, and Lists of these types."
)
@typechecked
def _hash_single(pda: pdarray, full: bool = True):
if pda.dtype == bigint:
return hash(pda.bigint_to_uint_arrays())
repMsg = type_cast(
str,
generic_msg(
cmd=f"efunc{pda.ndim}D",
args={
"func": "hash128" if full else "hash64",
"array": pda,
},
),
)
if full:
a, b = repMsg.split("+")
return create_pdarray(a), create_pdarray(b)
else:
return create_pdarray(repMsg)
@no_type_check
def _str_cat_where(
condition: pdarray,
A: Union[str, Strings, Categorical],
B: Union[str, Strings, Categorical],
) -> Union[Strings, Categorical]:
# added @no_type_check because mypy can't handle Categorical not being declared
# sooner, but there are circular dependencies preventing that
from arkouda.categorical import Categorical
from arkouda.pdarraysetops import concatenate
if isinstance(A, str) and isinstance(B, (Categorical, Strings)):
# This allows us to assume if a str is present it is B
A, B, condition = B, A, ~condition
# one cat and one str
if isinstance(A, Categorical) and isinstance(B, str):
is_in_categories = A.categories == B
if ak_any(is_in_categories):
new_categories = A.categories
b_code = argmax(is_in_categories)
else:
new_categories = concatenate([A.categories, array([B])])
b_code = A.codes.size + 1
new_codes = where(condition, A.codes, b_code)
return Categorical.from_codes(new_codes, new_categories, NAvalue=A.NAvalue).reset_categories()
# both cat
if isinstance(A, Categorical) and isinstance(B, Categorical):
if A.codes.size != B.codes.size:
raise TypeError("Categoricals must be same length")
if A.categories.size != B.categories.size or not ak_all(A.categories == B.categories):
A, B = A.standardize_categories([A, B])
new_codes = where(condition, A.codes, B.codes)
return Categorical.from_codes(new_codes, A.categories, NAvalue=A.NAvalue).reset_categories()
# one strings and one str
if isinstance(A, Strings) and isinstance(B, str):
new_lens = where(condition, A.get_lengths(), len(B))
repMsg = generic_msg(
cmd="segmentedWhere",
args={
"seg_str": A,
"other": B,
"is_str_literal": True,
"new_lens": new_lens,
"condition": condition,
},
)
return Strings.from_return_msg(repMsg)
# both strings
if isinstance(A, Strings) and isinstance(B, Strings):
if A.size != B.size:
raise TypeError("Strings must be same length")
new_lens = where(condition, A.get_lengths(), B.get_lengths())
repMsg = generic_msg(
cmd="segmentedWhere",
args={
"seg_str": A,
"other": B,
"is_str_literal": False,
"new_lens": new_lens,
"condition": condition,
},
)
return Strings.from_return_msg(repMsg)
raise TypeError("ak.where is not supported between Strings and Categorical")
[docs]
@typechecked
def where(
condition: pdarray,
A: Union[str, numeric_scalars, pdarray, Strings, Categorical], # type: ignore
B: Union[str, numeric_scalars, pdarray, Strings, Categorical], # type: ignore
) -> Union[pdarray, Strings, Categorical]: # type: ignore
"""
Returns an array with elements chosen from A and B based upon a
conditioning array. As is the case with numpy.where, the return array
consists of values from the first array (A) where the conditioning array
elements are True and from the second array (B) where the conditioning
array elements are False.
Parameters
----------
condition : pdarray
Used to choose values from A or B
A : Union[numeric_scalars, str, pdarray, Strings, Categorical]
Value(s) used when condition is True
B : Union[numeric_scalars, str, pdarray, Strings, Categorical]
Value(s) used when condition is False
Returns
-------
pdarray
Values chosen from A where the condition is True and B where
the condition is False
Raises
------
TypeError
Raised if the condition object is not a pdarray, if A or B is not
an int, np.int64, float, np.float64, pdarray, str, Strings, Categorical
if pdarray dtypes are not supported or do not match, or multiple
condition clauses (see Notes section) are applied
ValueError
Raised if the shapes of the condition, A, and B pdarrays are unequal
Examples
--------
>>> a1 = ak.arange(1,10)
>>> a2 = ak.ones(9, dtype=np.int64)
>>> cond = a1 < 5
>>> ak.where(cond,a1,a2)
array([1, 2, 3, 4, 1, 1, 1, 1, 1])
>>> a1 = ak.arange(1,10)
>>> a2 = ak.ones(9, dtype=np.int64)
>>> cond = a1 == 5
>>> ak.where(cond,a1,a2)
array([1, 1, 1, 1, 5, 1, 1, 1, 1])
>>> a1 = ak.arange(1,10)
>>> a2 = 10
>>> cond = a1 < 5
>>> ak.where(cond,a1,a2)
array([1, 2, 3, 4, 10, 10, 10, 10, 10])
>>> s1 = ak.array([f'str {i}' for i in range(10)])
>>> s2 = 'str 21'
>>> cond = (ak.arange(10) % 2 == 0)
>>> ak.where(cond,s1,s2)
array(['str 0', 'str 21', 'str 2', 'str 21', 'str 4', 'str 21', 'str 6', 'str 21', 'str 8','str 21'])
>>> c1 = ak.Categorical(ak.array([f'str {i}' for i in range(10)]))
>>> c2 = ak.Categorical(ak.array([f'str {i}' for i in range(9, -1, -1)]))
>>> cond = (ak.arange(10) % 2 == 0)
>>> ak.where(cond,c1,c2)
array(['str 0', 'str 8', 'str 2', 'str 6', 'str 4', 'str 4', 'str 6', 'str 2', 'str 8', 'str 0'])
Notes
-----
A and B must have the same dtype and only one conditional clause
is supported e.g., n < 5, n > 1, which is supported in numpy
is not currently supported in Arkouda
"""
if (not isSupportedNumber(A) and not isinstance(A, pdarray)) or (
not isSupportedNumber(B) and not isinstance(B, pdarray)
):
from arkouda.categorical import Categorical # type: ignore
# fmt: off
if (
not isinstance(A, (str, Strings, Categorical)) # type: ignore
or not isinstance(B, (str, Strings, Categorical)) # type: ignore
):
# fmt:on
raise TypeError(
"both A and B must be an int, np.int64, float, np.float64, pdarray OR"
" both A and B must be an str, Strings, Categorical"
)
return _str_cat_where(condition, A, B)
if isinstance(A, pdarray) and isinstance(B, pdarray):
# TODO: handle shape broadcasting for multidimensional arrays
repMsg = generic_msg(
cmd=f"efunc3vv{condition.ndim}D",
args={
"func": "where",
"condition": condition,
"a": A,
"b": B,
},
)
# For scalars, try to convert it to the array's dtype
elif isinstance(A, pdarray) and np.isscalar(B):
repMsg = generic_msg(
cmd=f"efunc3vs{condition.ndim}D",
args={
"func": "where",
"condition": condition,
"a": A,
"dtype": A.dtype.name,
"scalar": A.format_other(B),
},
)
elif isinstance(B, pdarray) and np.isscalar(A):
repMsg = generic_msg(
cmd=f"efunc3sv{condition.ndim}D",
args={
"func": "where",
"condition": condition,
"dtype": B.dtype.name,
"scalar": B.format_other(A),
"b": B,
},
)
elif np.isscalar(A) and np.isscalar(B):
# Scalars must share a common dtype (or be cast)
dtA = resolve_scalar_dtype(A)
dtB = resolve_scalar_dtype(B)
# Make sure at least one of the dtypes is supported
if not (dtA in DTypes or dtB in DTypes):
raise TypeError(f"Not implemented for scalar types {dtA} and {dtB}")
# If the dtypes are the same, do not cast
if dtA == dtB: # type: ignore
dt = dtA
# If the dtypes are different, try casting one direction then the other
elif dtB in DTypes and np.can_cast(A, dtB):
A = np.dtype(dtB).type(A) # type: ignore
dt = dtB
elif dtA in DTypes and np.can_cast(B, dtA):
B = np.dtype(dtA).type(B) # type: ignore
dt = dtA
# Cannot safely cast
else:
raise TypeError(f"Cannot cast between scalars {str(A)} and {str(B)} to supported dtype")
repMsg = generic_msg(
cmd=f"efunc3ss{condition.ndim}D",
args={
"func": "where",
"condition": condition,
"dtype": dt,
"a": A,
"b": B,
},
)
return create_pdarray(type_cast(str, repMsg))
[docs]
@typechecked
def histogram(pda: pdarray, bins: int_scalars = 10) -> Tuple[pdarray, pdarray]:
"""
Compute a histogram of evenly spaced bins over the range of an array.
Parameters
----------
pda : pdarray
The values to histogram
bins : int_scalars
The number of equal-size bins to use (default: 10)
Returns
-------
(pdarray, Union[pdarray, int64 or float64])
Bin edges and The number of values present in each bin
Raises
------
TypeError
Raised if the parameter is not a pdarray or if bins is
not an int.
ValueError
Raised if bins < 1
NotImplementedError
Raised if pdarray dtype is bool or uint8
See Also
--------
value_counts, histogram2d
Notes
-----
The bins are evenly spaced in the interval [pda.min(), pda.max()].
Examples
--------
>>> import matplotlib.pyplot as plt
>>> A = ak.arange(0, 10, 1)
>>> nbins = 3
>>> h, b = ak.histogram(A, bins=nbins)
>>> h
array([3, 3, 4])
>>> b
array([0., 3., 6., 9.])
# To plot, export the left edges and the histogram to NumPy
>>> plt.plot(b.to_ndarray()[::-1], h.to_ndarray())
"""
if bins < 1:
raise ValueError("bins must be 1 or greater")
b = linspace(pda.min(), pda.max(), bins + 1)
repMsg = generic_msg(cmd="histogram", args={"array": pda, "bins": bins})
return create_pdarray(type_cast(str, repMsg)), b
# Typechecking removed due to circular dependencies with arrayview
# @typechecked
[docs]
def histogram2d(
x: pdarray, y: pdarray, bins: Union[int_scalars, Sequence[int_scalars]] = 10
) -> Tuple[pdarray, pdarray, pdarray]:
"""
Compute the bi-dimensional histogram of two data samples with evenly spaced bins
Parameters
----------
x : pdarray
A pdarray containing the x coordinates of the points to be histogrammed.
y : pdarray
A pdarray containing the y coordinates of the points to be histogrammed.
bins : int_scalars or [int, int] = 10
The number of equal-size bins to use.
If int, the number of bins for the two dimensions (nx=ny=bins).
If [int, int], the number of bins in each dimension (nx, ny = bins).
Defaults to 10
Returns
-------
hist : ArrayView, shape(nx, ny)
The bi-dimensional histogram of samples x and y.
Values in x are histogrammed along the first dimension and
values in y are histogrammed along the second dimension.
x_edges : pdarray
The bin edges along the first dimension.
y_edges : pdarray
The bin edges along the second dimension.
Raises
------
TypeError
Raised if x or y parameters are not pdarrays or if bins is
not an int or (int, int).
ValueError
Raised if bins < 1
NotImplementedError
Raised if pdarray dtype is bool or uint8
See Also
--------
histogram
Notes
-----
The x bins are evenly spaced in the interval [x.min(), x.max()]
and y bins are evenly spaced in the interval [y.min(), y.max()].
Examples
--------
>>> x = ak.arange(0, 10, 1)
>>> y = ak.arange(9, -1, -1)
>>> nbins = 3
>>> h, x_edges, y_edges = ak.histogram2d(x, y, bins=nbins)
>>> h
array([[0, 0, 3],
[0, 2, 1],
[3, 1, 0]])
>>> x_edges
array([0.0 3.0 6.0 9.0])
>>> x_edges
array([0.0 3.0 6.0 9.0])
"""
if not isinstance(bins, Sequence):
x_bins, y_bins = bins, bins
else:
if len(bins) != 2:
raise ValueError("Sequences of bins must contain two elements (num_x_bins, num_y_bins)")
x_bins, y_bins = bins
if x_bins < 1 or y_bins < 1:
raise ValueError("bins must be 1 or greater")
x_bin_boundaries = linspace(x.min(), x.max(), x_bins + 1)
y_bin_boundaries = linspace(y.min(), y.max(), y_bins + 1)
repMsg = generic_msg(cmd="histogram2D", args={"x": x, "y": y, "xBins": x_bins, "yBins": y_bins})
return (
create_pdarray(type_cast(str, repMsg)).reshape(x_bins, y_bins),
x_bin_boundaries,
y_bin_boundaries,
)
[docs]
def histogramdd(
sample: Sequence[pdarray], bins: Union[int_scalars, Sequence[int_scalars]] = 10
) -> Tuple[pdarray, Sequence[pdarray]]:
"""
Compute the multidimensional histogram of data in sample with evenly spaced bins.
Parameters
----------
sample : Sequence[pdarray]
A sequence of pdarrays containing the coordinates of the points to be histogrammed.
bins : int_scalars or Sequence[int_scalars] = 10
The number of equal-size bins to use.
If int, the number of bins for all dimensions (nx=ny=...=bins).
If [int, int, ...], the number of bins in each dimension (nx, ny, ... = bins).
Defaults to 10
Returns
-------
hist : ArrayView, shape(nx, ny, ..., nd)
The multidimensional histogram of pdarrays in sample.
Values in first pdarray are histogrammed along the first dimension.
Values in second pdarray are histogrammed along the second dimension and so on.
edges : List[pdarray]
A list of pdarrays containing the bin edges for each dimension.
Raises
------
ValueError
Raised if bins < 1
NotImplementedError
Raised if pdarray dtype is bool or uint8
See Also
--------
histogram
Notes
-----
The bins for each dimension, m, are evenly spaced in the interval [m.min(), m.max()]
Examples
--------
>>> x = ak.arange(0, 10, 1)
>>> y = ak.arange(9, -1, -1)
>>> z = ak.where(x % 2 == 0, x, y)
>>> h, edges = ak.histogramdd((x, y,z), bins=(2,2,5))
>>> h
array([[[0, 0, 0, 0, 0],
[1, 1, 1, 1, 1]],
[[1, 1, 1, 1, 1],
[0, 0, 0, 0, 0]]])
>>> edges
[array([0.0 4.5 9.0]),
array([0.0 4.5 9.0]),
array([0.0 1.6 3.2 4.8 6.4 8.0])]
"""
if not isinstance(sample, Sequence):
raise ValueError("Sample must be a sequence of pdarrays")
if len(set(pda.dtype for pda in sample)) != 1:
raise ValueError("All pdarrays in sample must have same dtype")
num_dims = len(sample)
if not isinstance(bins, Sequence):
bins = [bins] * num_dims
else:
if len(bins) != num_dims:
raise ValueError("Sequences of bins must contain same number of elements as the sample")
if any(b < 1 for b in bins):
raise ValueError("bins must be 1 or greater")
bins = list(bins) if isinstance(bins, tuple) else bins
sample = list(sample) if isinstance(sample, tuple) else sample
bin_boundaries = [linspace(a.min(), a.max(), b + 1) for a, b in zip(sample, bins)]
bins_pda = array(bins)[::-1]
dim_prod = (cumprod(bins_pda) // bins_pda)[::-1]
repMsg = generic_msg(
cmd="histogramdD",
args={
"sample": sample,
"num_dims": num_dims,
"bins": bins,
"dim_prod": dim_prod,
"num_samples": sample[0].size,
},
)
return create_pdarray(type_cast(str, repMsg)).reshape(bins), bin_boundaries
[docs]
@typechecked
def value_counts(
pda: pdarray,
) -> Tuple[
Union[Union[pdarray, Strings, Categorical], Sequence[Union[pdarray, Strings, Categorical]]], pdarray
]:
"""
Count the occurrences of the unique values of an array.
Parameters
----------
pda : pdarray, int64
The array of values to count
Returns
-------
unique_values : pdarray, int64 or Strings
The unique values, sorted in ascending order
counts : pdarray, int64
The number of times the corresponding unique value occurs
Raises
------
TypeError
Raised if the parameter is not a pdarray
See Also
--------
unique, histogram
Notes
-----
This function differs from ``histogram()`` in that it only returns
counts for values that are present, leaving out empty "bins". This
function delegates all logic to the unique() method where the
return_counts parameter is set to True.
Examples
--------
>>> A = ak.array([2, 0, 2, 4, 0, 0])
>>> ak.value_counts(A)
(array([0, 2, 4]), array([3, 2, 1]))
"""
return GroupBy(pda).size()
[docs]
@typechecked
def clip(
pda: pdarray,
lo: Union[numeric_scalars, pdarray],
hi: Union[numeric_scalars, pdarray],
) -> pdarray:
"""
Clip (limit) the values in an array to a given range [lo,hi]
Given an array a, values outside the range are clipped to the
range edges, such that all elements lie in the range.
There is no check to enforce that lo < hi. If lo > hi, the corresponding
value of the array will be set to hi.
If lo or hi (or both) are pdarrays, the check is by pairwise elements.
See examples.
Parameters
----------
pda : pdarray, int64 or float64
the array of values to clip
lo : scalar or pdarray, int64 or float64
the lower value of the clipping range
hi : scalar or pdarray, int64 or float64
the higher value of the clipping range
If lo or hi (or both) are pdarrays, the check is by pairwise elements.
See examples.
Returns
-------
arkouda.pdarrayclass.pdarray
A pdarray matching pda, except that element x remains x if lo <= x <= hi,
or becomes lo if x < lo,
or becomes hi if x > hi.
Examples
--------
>>> a = ak.array([1,2,3,4,5,6,7,8,9,10])
>>> ak.clip(a,3,8)
array([3,3,3,4,5,6,7,8,8,8])
>>> ak.clip(a,3,8.0)
array([3.00000000000000000 3.00000000000000000 3.00000000000000000 4.00000000000000000
5.00000000000000000 6.00000000000000000 7.00000000000000000 8.00000000000000000
8.00000000000000000 8.00000000000000000])
>>> ak.clip(a,None,7)
array([1,2,3,4,5,6,7,7,7,7])
>>> ak.clip(a,5,None)
array([5,5,5,5,5,6,7,8,9,10])
>>> ak.clip(a,None,None)
ValueError : either min or max must be supplied
>>> ak.clip(a,ak.array([2,2,3,3,8,8,5,5,6,6],8))
array([2,2,3,4,8,8,7,8,8,8])
>>> ak.clip(a,4,ak.array([10,9,8,7,6,5,5,5,5,5]))
array([4,4,4,4,5,5,5,5,5,5])
Notes
-----
Either lo or hi may be None, but not both.
If lo > hi, all x = hi.
If all inputs are int64, output is int64, but if any input is float64, output is float64.
Raises
------
ValueError
Raised if both lo and hi are None
"""
# Check that a range was actually supplied.
if lo is None and hi is None:
raise ValueError("Either min or max must be supplied.")
# If any of the inputs are float, then make everything float.
# Some type checking is needed, because scalars and pdarrays get cast differently.
dataFloat = pda.dtype == float
minFloat = isinstance(lo, float) or (isinstance(lo, pdarray) and lo.dtype == float)
maxFloat = isinstance(hi, float) or (isinstance(hi, pdarray) and hi.dtype == float)
forceFloat = dataFloat or minFloat or maxFloat
if forceFloat:
if not dataFloat:
pda = cast(pda, np.float64)
if lo is not None and not minFloat:
lo = cast(lo, np.float64) if isinstance(lo, pdarray) else float(lo)
if hi is not None and not maxFloat:
hi = cast(hi, np.float64) if isinstance(hi, pdarray) else float(hi)
# Now do the clipping.
pda1 = pda
if lo is not None:
pda1 = where(pda < lo, lo, pda)
if hi is not None:
pda1 = where(pda1 > hi, hi, pda1)
return pda1
[docs]
def count_nonzero(pda):
"""
Compute the nonzero count of a given array. 1D case only, for now.
Parameters
----------
pda: pdarray
The input data, in pdarray form, numeric, bool, or str
Returns
-------
np.int64
The nonzero count of the entire pdarray
Examples
--------
>>> pda = ak.array([0,4,7,8,1,3,5,2,-1])
>>> ak.count_nonzero(pda)
9
>>> pda = ak.array([False,True,False,True,False])
>>> ak.count_nonzero(pda)
3
>>> pda = ak.array(["hello","","there"])
>>> ak.count_nonzero(pda)
2
"""
from arkouda.util import is_numeric
# Handle different data types.
if is_numeric(pda):
return sum((pda != 0).astype(np.int64))
elif pda.dtype == bool:
return sum((pda).astype(np.int64))
elif pda.dtype == str:
return sum((pda != "").astype(np.int64))
[docs]
def array_equal(pda_a: pdarray, pda_b: pdarray, equal_nan: bool = False):
"""
Compares two pdarrays for equality.
If neither array has any nan elements, then if all elements are pairwise equal,
it returns True.
If equal_Nan is False, then any nan element in either array gives a False return.
If equal_Nan is True, then pairwise-corresponding nans are considered equal.
Parameters
----------
pda_a : pdarray
pda_b : pdarray
equal_nan : boolean to determine how to handle nans, default False
Returns
-------
boolean
With string data:
False if one array is type ak.str_ & the other isn't, True if both are ak.str_ & they match.
With numeric data:
True if neither array has any nan elements, and all elements pairwise equal.
True if equal_Nan True, all non-nans pairwise equal & nans in pda_a correspond to nans in pda_b
False if equal_Nan False, & either array has any nan element.
Examples
--------
>>> a = ak.randint(0,10,10,dtype=ak.float64)
>>> b = a
>>> ak.array_equal(a,b)
True
>>> b[9] = np.nan
>>> ak.array_equal(a,b)
False
>>> a[9] = np.nan
>>> ak.array_equal(a,b)
False
>>> ak.array_equal(a,b,True)
True
"""
if (pda_a.shape != pda_b.shape) or ((pda_a.dtype == akstr_) ^ (pda_b.dtype == akstr_)):
return False
elif equal_nan:
return ak_all(where(isnan(pda_a), isnan(pda_b), pda_a == pda_b))
else:
return ak_all(pda_a == pda_b)
[docs]
def putmask(pda: pdarray, mask: Union[bool, pdarray], values: pdarray):
"""
Overwrite elements of a pdarray at indices where mask is True
Parameters
----------
pda : pdarray, source data, also output data
pda = input where mask is False, = values where mask is True
mask : a scalar boolean, or a pdarray of booleans
values : pdarray, replacement data
Returns
-------
None - pda is modified in-place
Notes
-----
If values.size != a.size, values is repeated and/or pruned as needed to
make sizes match, because ak.where requires matching sizes.
Examples
-------
>>> a = ak.array(np.arange(10))
>>> ak.putmask (a,a>2,a**2)
array ([0,1,2,9,16,25,36,49,64,81])
>>> values = ak.array([3,2])
>>> ak.putmask (a,a>2,values)
array ([0,1,2,2,3,2,3,2,3,2])
Raises
------
TypeError
Raised if a and values are not the same type
"""
from arkouda.pdarraysetops import concatenate
# check for matching types
if values.dtype != pda.dtype:
raise TypeError("ak.putmask requires arrays of matching type")
# if values is not the same size as pda, repeat it and/or prune it as needed
growth = pda.size // values.size + (0 if pda.size % values.size == 0 else 1)
result = concatenate(growth * [values])
if result.size > pda.size:
reduction = result.size % pda.size
result = result[:-(reduction)]
pda[:] = where(mask, result, pda) # pda[:] = allows us to return modified value
[docs]
def eye(rows: int_scalars, cols: int_scalars, diag: int_scalars = 0, dt: type = akint64):
"""
Return a pdarray with zeros everywhere except along a diagonal, which is all ones.
The matrix need not be square.
Parameters
----------
rows : int_scalars
cols : int_scalars
diag : int_scalars
if diag = 0, zeros start at element [0,0] and proceed along diagonal
if diag > 0, zeros start at element [0,diag] and proceed along diagonal
if diag < 0, zeros start at element [diag,0] and proceed along diagonal
etc.
Returns
-------
pdarray
an array of zeros with ones along the specified diagonal
Examples
--------
>>> ak.eye(rows=4,cols=4,diag=0,dt=ak.int64)
array([array([1 0 0 0]) array([0 1 0 0]) array([0 0 1 0]) array([0 0 0 1])])
>>> ak.eye(rows=3,cols=3,diag=1,dt=ak.float64)
array([array([0.00000000000000000 1.00000000000000000 0.00000000000000000])
array([0.00000000000000000 0.00000000000000000 1.00000000000000000])
array([0.00000000000000000 0.00000000000000000 0.00000000000000000])])
>>> ak.eye(rows=4,cols=4,diag=-1,dt=ak.bool_)
array([array([False False False False]) array([True False False False])
array([False True False False]) array([False False True False])]
Notes
-----
if rows = cols and diag = 0, the result is an identity matrix
Server returns an error if rank of pda < 2
"""
cmd = f"eye<{akdtype(dt).name}>"
args = {
"rows": rows,
"cols": cols,
"diag": diag,
}
return create_pdarray(
generic_msg(
cmd=cmd,
args=args,
)
)
[docs]
def triu(pda: pdarray, diag: int_scalars = 0):
"""
Return a copy of the pda with the lower triangle zeroed out
Parameters
----------
pda : pdarray
diag : int_scalars
if diag = 0, zeros start just above the main diagonal
if diag = 1, zeros start at the main diagonal
if diag = 2, zeros start just below the main diagonal
etc.
Returns
-------
pdarray
a copy of pda with zeros in the lower triangle
Examples
--------
>>> a = ak.array([[1,2,3,4,5],[2,3,4,5,6],[3,4,5,6,7],[4,5,6,7,8],[5,6,7,8,9]])
>>> ak.triu(a,diag=0)
array([array([1 2 3 4 5]) array([0 3 4 5 6]) array([0 0 5 6 7])
array([0 0 0 7 8]) array([0 0 0 0 9])])
>>> ak.triu(a,diag=1)
array([array([0 2 3 4 5]) array([0 0 4 5 6]) array([0 0 0 6 7])
array([0 0 0 0 8]) array([0 0 0 0 0])])
>>> ak.triu(a,diag=2)
array([array([0 0 3 4 5]) array([0 0 0 5 6]) array([0 0 0 0 7])
array([0 0 0 0 0]) array([0 0 0 0 0])])
>>> ak.triu(a,diag=3)
array([array([0 0 0 4 5]) array([0 0 0 0 6]) array([0 0 0 0 0])
array([0 0 0 0 0]) array([0 0 0 0 0])])
>>> ak.triu(a,diag=4)
array([array([0 0 0 0 5]) array([0 0 0 0 0]) array([0 0 0 0 0])
array([0 0 0 0 0]) array([0 0 0 0 0])])
Notes
-----
Server returns an error if rank of pda < 2
"""
cmd = f"triu<{pda.dtype},{pda.ndim}>"
args = {
"array": pda,
"diag": diag,
}
return create_pdarray(
generic_msg(
cmd=cmd,
args=args,
)
)
[docs]
def tril(pda: pdarray, diag: int_scalars = 0):
"""
Return a copy of the pda with the upper triangle zeroed out
Parameters
----------
pda : pdarray
diag : int_scalars
if diag = 0, zeros start just below the main diagonal
if diag = 1, zeros start at the main diagonal
if diag = 2, zeros start just above the main diagonal
etc.
Returns
-------
pdarray
a copy of pda with zeros in the upper triangle
Examples
--------
>>> a = ak.array([[1,2,3,4,5],[2,3,4,5,6],[3,4,5,6,7],[4,5,6,7,8],[5,6,7,8,9]])
>>> ak.tril(a,diag=4)
array([array([1 2 3 4 5]) array([2 3 4 5 6]) array([3 4 5 6 7])
array([4 5 6 7 8]) array([5 6 7 8 9])])
>>> ak.tril(a,diag=3)
array([array([1 2 3 4 0]) array([2 3 4 5 6]) array([3 4 5 6 7])
array([4 5 6 7 8]) array([5 6 7 8 9])])
>>> ak.tril(a,diag=2)
array([array([1 2 3 0 0]) array([2 3 4 5 0]) array([3 4 5 6 7])
array([4 5 6 7 8]) array([5 6 7 8 9])])
>>> ak.tril(a,diag=1)
array([array([1 2 0 0 0]) array([2 3 4 0 0]) array([3 4 5 6 0])
array([4 5 6 7 8]) array([5 6 7 8 9])])
>>> ak.tril(a,diag=0)
array([array([1 0 0 0 0]) array([2 3 0 0 0]) array([3 4 5 0 0])
array([4 5 6 7 0]) array([5 6 7 8 9])])
Notes
-----
Server returns an error if rank of pda < 2
"""
cmd = f"tril<{pda.dtype},{pda.ndim}>"
args = {
"array": pda,
"diag": diag,
}
return create_pdarray(
generic_msg(
cmd=cmd,
args=args,
)
)
[docs]
def transpose(pda: pdarray):
"""
Compute the transpose of a matrix.
Parameters
----------
pda : pdarray
Returns
-------
pdarray
the transpose of the input matrix
Examples
--------
>>> a = ak.array([[1,2,3,4,5],[1,2,3,4,5]])
>>> ak.transpose(a)
array([array([1 1]) array([2 2]) array([3 3]) array([4 4]) array([5 5])])
Notes
-----
Server returns an error if rank of pda < 2
"""
cmd = f"transpose<{pda.dtype},{pda.ndim}>"
args = {
"array": pda,
}
return create_pdarray(
generic_msg(
cmd=cmd,
args=args,
)
)
[docs]
def matmul(pdaLeft: pdarray, pdaRight: pdarray):
"""
Compute the product of two matrices.
Parameters
----------
pdaLeft : pdarray
pdaRight : pdarray
Returns
-------
pdarray
the matrix product pdaLeft x pdaRight
Examples
--------
>>> a = ak.array([[1,2,3,4,5],[1,2,3,4,5]])
>>> b = ak.array(([1,1],[2,2],[3,3],[4,4],[5,5]])
>>> ak.matmul(a,b)
array([array([30 30]) array([45 45])])
>>> x = ak.array([[1,2,3],[1.1,2.1,3.1]])
>>> y = ak.array([[1,1,1],[0,2,2],[0,0,3]])
>>> ak.matmul(x,y)
array([array([1.00000000000000000 5.00000000000000000 14.00000000000000000])
array([1.1000000000000001 5.3000000000000007 14.600000000000001])])
Notes
-----
Server returns an error if shapes of pdaLeft and pdaRight
are incompatible with matrix multiplication.
"""
if pdaLeft.ndim != pdaRight.ndim:
raise ValueError("matmul requires matrices of matching rank.")
cmd = f"matmul<{pdaLeft.dtype},{pdaRight.dtype},{pdaLeft.ndim}>"
args = {
"x1": pdaLeft,
"x2": pdaRight,
}
return create_pdarray(
generic_msg(
cmd=cmd,
args=args,
)
)
[docs]
def vecdot(x1: pdarray, x2: pdarray):
"""
Compute the generalized dot product of two vectors along the given axis.
Assumes that both tensors have already been broadcast to the same shape.
Parameters
----------
x1 : pdarray
x2 : pdarray
Returns
-------
pdarray
x1 vecdot x2
Examples
--------
>>> a = ak.array([[1,2,3,4,5],[1,2,3,4,5]])
>>> b = ak.array(([2,2,2,2,2],[2,2,2,2,2]])
>>> ak.vecdot(a,b)
array([5 10 15 20 25])
>>> ak.vecdot(b,a)
array([5 10 15 20 25])
Raises
------
ValueTypeError
Raised if x1 and x2 are not of matching shape or if rank of x1 < 2
"""
if x1.shape != x2.shape:
raise ValueError("vecdot requires matrices of matching rank.")
if x1.ndim < 2:
raise ValueError("vector requires matrices of rank 2 or more.")
cmd = f"vecdot<{x1.dtype},{x2.dtype},{x1.ndim}>"
args = {
"x1": x1,
"x2": x2,
"bcShape": tuple(x1.shape),
"axis": 0,
}
return create_pdarray(
generic_msg(
cmd=cmd,
args=args,
)
)