from typing import Literal, Optional, Sequence, Tuple, Union, cast
from warnings import warn
import numpy as np
from typeguard import typechecked
from arkouda.numpy.dtypes import bigint
from arkouda.numpy.dtypes import dtype as akdtype
from arkouda.numpy.dtypes import result_type as ak_result_type
from arkouda.numpy.pdarrayclass import create_pdarray, pdarray
from arkouda.numpy.pdarraycreation import arange, array
__all__ = ["hstack", "vstack", "delete", "append"]
def _max_bits_list(pda_list: Sequence[pdarray]) -> Tuple[bool, int]:
"""
Finds the minimum `max_bits` when there are bigint arrays in the input.
Determines whether any arrays in the input list use the `bigint` dtype
and returns the minimum bit width among those that do.
Parameters
----------
pda_list : Sequence[pdarray]
A sequence of `pdarray` objects to examine.
Returns
-------
bool
A boolean indicating whether any array uses the `bigint` dtype.
int
An integer representing the smallest `max_bits` value among the
`bigint` arrays. Returns -1 if no `bigint` arrays are present.
"""
has_bigint = False
m_bits = -1
do_warn = False
for a in pda_list:
if a.dtype == bigint:
if has_bigint:
if a.max_bits != m_bits:
do_warn = True
else:
has_bigint = True
curr_bits = a.max_bits
if curr_bits > 0 and (m_bits == -1 or curr_bits < m_bits):
m_bits = curr_bits
if do_warn:
warn("Because two arrays with different max_bits were used, truncating to smaller max_bits")
return has_bigint, m_bits
[docs]
@typechecked
def hstack(
tup: Sequence[pdarray],
*,
dtype: Optional[Union[str, type]] = None,
casting: Literal["no", "equiv", "safe", "same_kind", "unsafe"] = "same_kind",
) -> pdarray:
"""
Stack arrays in sequence horizontally (column wise).
This is equivalent to concatenation along the second axis, except for 1-D arrays
where it concatenates along the first axis. Rebuilds arrays divided by ``hsplit``.
This function makes most sense for arrays with up to 3 dimensions. For instance, for pixel-data
with a height (first axis), width (second axis), and r/g/b channels (third axis). The functions
``concatenate``, ``stack`` and ``block`` provide more general stacking and concatenation operations.
Parameters
----------
tup : sequence of pdarray
The arrays must have the same shape along all but the second axis, except 1-D arrays which
can be any length. In the case of a single array_like input, it will be treated as a sequence of
arrays; i.e., each element along the zeroth axis is treated as a separate array.
dtype : str or type, optional
If provided, the destination array will have this type.
casting : {‘no’, ‘equiv’, ‘safe’, ‘same_kind’, ‘unsafe’}, optional
Controls what kind of data casting may occur. Defaults to ‘same_kind’. Currently unused.
Returns
-------
pdarray
The array formed by stacking the given arrays.
See Also
--------
concatenate, stack, block, vstack, dstack, column_stack, hsplit, unstack
Examples
--------
>>> import arkouda as ak
>>> a = ak.array([1, 2, 3])
>>> b = ak.array([4, 5, 6])
>>> ak.hstack((a, b))
array([1 2 3 4 5 6])
>>> a = ak.array([[1],[2],[3]])
>>> b = ak.array([[4],[5],[6]])
>>> ak.hstack((a, b))
array([array([1 4]) array([2 5]) array([3 6])])
"""
from arkouda.core.client import generic_msg
if casting != "same_kind":
# TODO: align with https://numpy.org/doc/stable/glossary.html#term-casting
raise NotImplementedError(f"casting={casting} is not yet supported")
# ensure all arrays have the same number of dimensions
ndim = tup[0].ndim
for a in tup:
if a.ndim != ndim:
raise ValueError("all input arrays must have the same number of dimensions")
has_bigint, m_bits = _max_bits_list(tup)
# establish the dtype of the output array
if has_bigint and dtype is None:
dtype = bigint
if dtype is None:
dtype_ = np.result_type(*[np.dtype(a.dtype) for a in tup])
else:
dtype_ = akdtype(dtype)
# cast the input arrays to the output dtype if necessary
arrays = [a.astype(dtype_) if a.dtype != dtype_ else a for a in tup]
if has_bigint:
for i in range(len(arrays)):
arrays[i].max_bits = m_bits
offsets = [0 for _ in range(len(arrays))]
if ndim == 1:
for i in range(1, len(arrays)):
offsets[i] = offsets[i - 1] + arrays[i - 1].shape[0]
return create_pdarray(
generic_msg(
cmd=f"concatenate<{akdtype(dtype_).name},{arrays[0].ndim}>",
args={
"names": list(arrays),
"axis": 0,
"offsets": offsets,
},
)
)
for i in range(1, len(arrays)):
offsets[i] = offsets[i - 1] + arrays[i - 1].shape[1]
# stack the arrays along the horizontal axis
return create_pdarray(
generic_msg(
cmd=f"concatenate<{akdtype(dtype_).name},{arrays[0].ndim}>",
args={
"names": list(arrays),
"axis": 1,
"offsets": offsets,
},
)
)
[docs]
@typechecked
def vstack(
tup: Sequence[pdarray],
*,
dtype: Optional[Union[str, type]] = None,
casting: Literal["no", "equiv", "safe", "same_kind", "unsafe"] = "same_kind",
) -> pdarray:
"""
Stack arrays in sequence vertically (row wise).
This is equivalent to concatenation along the first axis after
1-D arrays of shape `(N,)` have been reshaped to `(1,N)`. Rebuilds arrays divided by ``vsplit``.
This function makes most sense for arrays with up to 3 dimensions.
For instance, for pixel-data with a height (first axis), width (second axis),
and r/g/b channels (third axis). The functions ``concatenate``, ``stack`` and ``block``
provide more general stacking and concatenation operations.
Parameters
----------
tup : sequence of pdarray
The arrays must have the same shape along all but the first axis. 1-D arrays
must have the same length. In the case of a single array_like input, it will be
treated as a sequence of arrays; i.e., each element along the zeroth axis is treated
as a separate array.
dtype : str or type, optional
If provided, the destination array will have this dtype.
casting : {"no", "equiv", "safe", "same_kind", "unsafe"], optional
Controls what kind of data casting may occur. Defaults to ‘same_kind’. Currently unused.
Returns
-------
pdarray
The array formed by stacking the given arrays, will be at least 2-D.
See Also
--------
concatenate, stack, block, hstack, dstack, column_stack, hsplit, unstack
Examples
--------
>>> import arkouda as ak
>>> a = ak.array([1, 2, 3])
>>> b = ak.array([4, 5, 6])
>>> ak.vstack((a, b))
array([array([1 2 3]) array([4 5 6])])
>>> a = ak.array([[1],[2],[3]])
>>> b = ak.array([[4],[5],[6]])
>>> ak.vstack((a, b))
array([array([1]) array([2]) array([3]) array([4]) array([5]) array([6])])
"""
from arkouda.core.client import generic_msg
if casting != "same_kind":
# TODO: align with https://numpy.org/doc/stable/glossary.html#term-casting
raise NotImplementedError(f"casting={casting} is not yet supported")
# From docstring: "This is equivalent to concatenation along the first axis after 1-D arrays
# of shape (N,) have been reshaped to (1,N)."
arrays = [a if a.ndim != 1 else a.reshape((1, len(a))) for a in tup]
# ensure all arrays have the same number of dimensions
ndim = arrays[0].ndim
for a in arrays:
if a.ndim != ndim:
raise ValueError("all input arrays must have the same number of dimensions")
has_bigint, m_bits = _max_bits_list(tup)
# establish the dtype of the output array
if has_bigint and dtype is None:
dtype = bigint
if dtype is None:
dtype_ = np.result_type(*[np.dtype(a.dtype) for a in arrays])
else:
dtype_ = akdtype(dtype)
# cast the input arrays to the output dtype if necessary
arrays = [a.astype(dtype_) if a.dtype != dtype_ else a for a in arrays]
if has_bigint:
for i in range(len(arrays)):
arrays[i].max_bits = m_bits
offsets = [0 for _ in range(len(arrays))]
for i in range(1, len(arrays)):
offsets[i] = offsets[i - 1] + arrays[i - 1].shape[0]
# stack the arrays along the first axis
return create_pdarray(
generic_msg(
cmd=f"concatenate<{akdtype(dtype_).name},{arrays[0].ndim}>",
args={
"names": list(arrays),
"axis": 0,
"offsets": offsets,
},
)
)
[docs]
@typechecked
def delete(
arr: pdarray,
obj: Union[slice, int, Sequence[int], Sequence[bool], pdarray],
axis: Optional[int] = None,
) -> pdarray:
"""
Return a copy of 'arr' with elements along the specified axis removed.
Parameters
----------
arr : pdarray
The array to remove elements from
obj : slice, int, Sequence of int, Sequence of bool, or pdarray
The indices to remove from 'arr'. If obj is a pdarray, it must
have an integer or bool dtype.
axis : Optional[int], optional
The axis along which to remove elements. If None, the array will
be flattened before removing elements. Defaults to None.
Returns
-------
pdarray
A copy of 'arr' with elements removed
Examples
--------
>>> import arkouda as ak
>>> arr = ak.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
>>> arr
array([array([1 2 3 4]) array([5 6 7 8]) array([9 10 11 12])])
>>> ak.delete(arr, 1, 0)
array([array([1 2 3 4]) array([9 10 11 12])])
>>> ak.delete(arr, slice(0, 4, 2), 1)
array([array([2 4]) array([6 8]) array([10 12])])
>>> ak.delete(arr, [1, 3, 5], None)
array([1 3 5 7 8 9 10 11 12])
"""
from arkouda.core.client import generic_msg
shape = arr.shape
if axis is None and arr.ndim != 1:
# flatten the array if axis is None
_arr = arr.flatten()
_axis = 0
shape = _arr.shape
elif axis is None:
_axis = 0
_arr = arr
else:
_arr = arr
_axis = axis
slice_weight = 1
for i in range(_axis + 1, len(shape)):
slice_weight *= shape[i]
if isinstance(obj, pdarray):
_del = obj
elif isinstance(obj, Sequence):
_del = cast(pdarray, array(obj))
else:
if isinstance(obj, int):
start = obj
stop = obj + 1
stride = 1
elif isinstance(obj, slice):
start, stop, stride = obj.indices(_arr.shape[_axis])
else:
raise ValueError("obj must be a slice, int, Sequence of int, Sequence of bool, or pdarray")
_del = arange(start, stop, stride)
if _del.dtype == int and (shape[_axis] / max(int(_del.size), 1)) * slice_weight >= 100:
alg_choice = "BulkCopy"
else:
alg_choice = "AggCopy"
return create_pdarray(
generic_msg(
cmd=f"delete{alg_choice}<{_arr.dtype},{_arr.ndim},{_del.dtype},{_del.ndim}>",
args={
"eIn": _arr,
"axis": _axis,
"del": _del,
},
)
)
[docs]
@typechecked
def append(
arr: pdarray,
values: pdarray,
axis: Optional[int] = None,
) -> pdarray:
"""
Append values to the end of an array.
Parameters
----------
arr : pdarray
Values are appended to a copy of this array.
values : pdarray
These values are appended to a copy of arr.
It must be of the correct shape (the same shape as arr, excluding axis).
If axis is not specified, values can be any shape and will be flattened before use.
axis : Optional[int], default=None
The axis along which values are appended.
If axis is not given, both arr and values are flattened before use.
Returns
-------
pdarray
A copy of arr with values appended to axis.
Note that append does not occur in-place: a new array is allocated and filled.
If axis is None, out is a flattened array.
See Also
--------
delete
Examples
--------
>>> import arkouda as ak
>>> a = ak.array([1, 2, 3])
>>> b = ak.array([[4, 5, 6], [7, 8, 9]])
>>> ak.append(a, b)
array([1 2 3 4 5 6 7 8 9])
>>> ak.append(b, b, axis = 0)
array([array([4 5 6]) array([7 8 9]) array([4 5 6]) array([7 8 9])])
"""
from arkouda.core.client import generic_msg
if axis is None:
axis = 0
if arr.ndim != 1:
arr = arr.flatten()
if values.ndim != 1:
values = values.flatten()
else:
# ensure both arrays have the same number of dimensions
if arr.ndim != values.ndim:
raise ValueError("all input arrays must have the same number of dimensions")
if axis >= arr.ndim or axis < -arr.ndim:
raise ValueError(f"Axis {axis} out of bounds for {arr.ndim} dimensions")
axis = cast(int, (axis + arr.ndim) % arr.ndim)
has_bigint, m_bits = _max_bits_list([arr, values])
# establish the dtype of the output array
dtype_ = ak_result_type(arr, values)
# cast the input arrays to the output dtype if necessary
arrays = [a.astype(dtype_) if a.dtype != dtype_ else a for a in (arr, values)]
if has_bigint and akdtype(dtype_).name == "bigint":
for i in range(len(arrays)):
arrays[i].max_bits = m_bits
offsets = [0 for _ in range(len(arrays))]
for i in range(1, len(arrays)):
offsets[i] = offsets[i - 1] + arrays[i - 1].shape[axis]
# stack the arrays along the given axis
return create_pdarray(
generic_msg(
cmd=f"concatenate<{akdtype(dtype_).name},{arrays[0].ndim}>",
args={
"names": list(arrays),
"axis": axis,
"offsets": offsets,
},
)
)