from __future__ import annotations
import codecs
import itertools
import re
from typing import Dict, List, Optional, Tuple, Union, cast
import numpy as np
from typeguard import typechecked
import arkouda.numpy.dtypes
from arkouda.client import generic_msg
from arkouda.numpy.dtypes import NUMBER_FORMAT_STRINGS
from arkouda.numpy.dtypes import dtype as akdtype
from arkouda.numpy.dtypes import (
int_scalars,
resolve_scalar_dtype,
str_,
str_scalars,
)
from arkouda.infoclass import information, list_symbol_table
from arkouda.logger import getArkoudaLogger
from arkouda.match import Match, MatchType
from arkouda.pdarrayclass import RegistrationError
from arkouda.pdarrayclass import all as akall
from arkouda.pdarrayclass import create_pdarray, parse_single_value, pdarray
__all__ = ["Strings"]
# Command strings for message passing to arkouda server, specific to Strings
CMD_ASSEMBLE = "segStr-assemble"
CMD_TO_NDARRAY = "segStr-tondarray"
[docs]
class Strings:
"""
Represents an array of strings whose data resides on the
arkouda server. The user should not call this class directly;
rather its instances are created by other arkouda functions.
Attributes
----------
entry : pdarray
Encapsulation of a Segmented Strings array contained on
the arkouda server. This is a composite of
- offsets array: starting indices for each string
- bytes array: raw bytes of all strings joined by nulls
size : int_scalars
The number of strings in the array
nbytes : int_scalars
The total number of bytes in all strings
ndim : int_scalars
The rank of the array (currently only rank 1 arrays supported)
shape : tuple
The sizes of each dimension of the array
dtype : dtype
The dtype is ak.str
logger : ArkoudaLogger
Used for all logging operations
Notes
-----
Strings is composed of two pdarrays: (1) offsets, which contains the
starting indices for each string and (2) bytes, which contains the
raw bytes of all strings, delimited by nulls.
"""
BinOps = frozenset(["==", "!="])
objType = "Strings"
[docs]
@staticmethod
def from_return_msg(rep_msg: str) -> Strings:
"""
Factory method for creating a Strings object from an Arkouda server
response message
Parameters
----------
rep_msg : str
Server response message currently of form
`created name type size ndim shape itemsize+created bytes.size 1234`
Returns
-------
Strings
object representing a segmented strings array on the server
Raises
------
RuntimeError
Raised if there's an error converting a server-returned str-descriptor
Notes
-----
We really don't have an itemsize because these are variable length strings.
In the future we could probably use this position to store the total bytes.
"""
left, right = cast(str, rep_msg).split("+")
bytes_size: int_scalars = int(right.split()[-1])
return Strings(create_pdarray(left), bytes_size)
[docs]
@staticmethod
def from_parts(offset_attrib: Union[pdarray, str], bytes_attrib: Union[pdarray, str]) -> Strings:
"""
Factory method for creating a Strings object from an Arkouda server
response where the arrays are separate components.
Parameters
----------
offset_attrib : Union[pdarray, str]
the array containing the offsets
bytes_attrib : Union[pdarray, str]
the array containing the string values
Returns
-------
Strings
object representing a segmented strings array on the server
Raises
------
RuntimeError
Raised if there's an error converting a server-returned str-descriptor
Notes
-----
This factory method is used when we construct the parts of a Strings
object on the client side and transfer the offsets & bytes separately
to the server. This results in two entries in the symbol table and we
need to instruct the server to assemble the into a composite entity.
"""
if not isinstance(offset_attrib, pdarray):
try:
offset_attrib = create_pdarray(offset_attrib)
except Exception as e:
raise RuntimeError(e)
if not isinstance(bytes_attrib, pdarray):
try:
bytes_attrib = create_pdarray(bytes_attrib)
except Exception as e:
raise RuntimeError(e)
# Now we have two pdarray objects
response = cast(
str, generic_msg(cmd=CMD_ASSEMBLE, args={"offsets": offset_attrib, "values": bytes_attrib})
)
return Strings.from_return_msg(response)
def __init__(self, strings_pdarray: pdarray, bytes_size: int_scalars) -> None:
"""
Initializes the Strings instance by setting all instance
attributes, some of which are derived from the array parameters.
Parameters
----------
strings_pdarray : pdarray
the array containing the meta-info on a server side strings object
bytes_size : int_scalars
length of the bytes array contained on the server aka total bytes
Returns
-------
None
Raises
------
RuntimeError
Raised if there's an error converting a server-returned str-descriptor
or pdarray to either the offset_attrib or bytes_attrib
ValueError
Raised if there's an error in generating instance attributes
from either the offset_attrib or bytes_attrib parameter
"""
self.entry: pdarray = strings_pdarray
self.registered_name: Optional[str] = None
try:
self.size = self.entry.size
self.nbytes = bytes_size # This is a deficiency of server GenSymEntry right now
self.ndim = self.entry.ndim
self.shape = self.entry.shape
self.name: Optional[str] = self.entry.name
except Exception as e:
raise ValueError(e)
self._bytes: Optional[pdarray] = None
self._offsets: Optional[pdarray] = None
self.dtype = akdtype(str_)
self._regex_dict: Dict = dict()
self.logger = getArkoudaLogger(name=__class__.__name__) # type: ignore
"""
NOTE:
The Strings.__del__() method should NOT be implemented.
Python will invoke the __del__() of any components by default.
Overriding this default behavior with an explicitly specified Strings.__del__() method may
introduce unknown symbol errors.
By allowing Python's garbage collecting to handle this automatically, we avoid extra maintenance
"""
def __iter__(self):
raise NotImplementedError(
"Strings does not support iteration. To force data transfer from server, use to_ndarray"
)
def __len__(self) -> int:
return self.shape[0]
def __str__(self) -> str:
from arkouda.client import pdarrayIterThresh
if self.size <= pdarrayIterThresh:
vals = [f"'{self[i]}'" for i in range(self.size)]
else:
vals = [f"'{self[i]}'" for i in range(3)]
vals.append("... ")
vals.extend([f"'{self[i]}'" for i in range(self.size - 3, self.size)])
return "[{}]".format(", ".join(vals))
def __repr__(self) -> str:
return f"array({self.__str__()})"
@typechecked
def _binop(self, other: Union[Strings, str_scalars], op: str) -> pdarray:
"""
Executes the requested binop on this Strings instance and the
parameter Strings object and returns the results within
a pdarray object.
Parameters
----------
other : Strings, str_scalars
the other object is a Strings object
op : str
name of the binary operation to be performed
Returns
-------
pdarray
encapsulating the results of the requested binop
Raises
-----
ValueError
Raised if (1) the op is not in the self.BinOps set, or (2) if the
sizes of this and the other instance don't match, or (3) the other
object is not a Strings object
RuntimeError
Raised if a server-side error is thrown while executing the
binary operation
"""
if op not in self.BinOps:
raise ValueError(f"Strings: unsupported operator: {op}")
if isinstance(other, Strings):
if self.size != other.size:
raise ValueError(f"Strings: size mismatch {self.size} {other.size}")
cmd = "segmentedBinopvv"
args = {
"op": op,
"objType": self.objType,
"obj": self.entry,
"otherType": other.objType,
"other": other.entry,
"left": False, # placeholder for stick
"delim": "", # placeholder for stick
}
elif resolve_scalar_dtype(other) == "str":
cmd = "segmentedBinopvs"
args = {
"op": op,
"objType": self.objType,
"obj": self.entry,
"otherType": "str",
"other": other,
}
else:
raise ValueError(
f"Strings: {op} not supported between Strings and {other.__class__.__name__}"
)
return create_pdarray(generic_msg(cmd=cmd, args=args))
def __eq__(self, other) -> bool:
return self._binop(other, "==")
def __ne__(self, other) -> bool:
return self._binop(cast(Strings, other), "!=")
def __getitem__(self, key):
if np.isscalar(key) and (resolve_scalar_dtype(key) in ["int64", "uint64"]):
orig_key = key
if key < 0:
# Interpret negative key as offset from end of array
key += self.size
if key >= 0 and key < self.size:
repMsg = generic_msg(
cmd="segmentedIndex",
args={
"subcmd": "intIndex",
"objType": self.objType,
"dtype": self.entry.dtype,
"obj": self.entry,
"key": key,
},
)
_, value = repMsg.split(maxsplit=1)
return parse_single_value(value)
else:
raise IndexError(f"[int] {orig_key} is out of bounds with size {self.size}")
elif isinstance(key, slice):
(start, stop, stride) = key.indices(self.size)
self.logger.debug(f"start: {start}; stop: {stop}; stride: {stride}")
repMsg = generic_msg(
cmd="segmentedIndex",
args={
"subcmd": "sliceIndex",
"objType": self.objType,
"obj": self.entry,
"dtype": self.entry.dtype,
"key": [start, stop, stride],
},
)
return Strings.from_return_msg(repMsg)
elif isinstance(key, pdarray):
if key.dtype not in ("bool", "int", "uint"):
raise TypeError(f"unsupported pdarray index type {key.dtype}")
if key.dtype == "bool" and self.size != key.size:
raise ValueError(f"size mismatch {self.size} {key.size}")
repMsg = generic_msg(
cmd="segmentedIndex",
args={
"subcmd": "pdarrayIndex",
"objType": self.objType,
"dtype": self.entry.dtype,
"obj": self.entry,
"key": key,
},
)
return Strings.from_return_msg(repMsg)
else:
raise TypeError(f"unsupported pdarray index type {key.__class__.__name__}")
@property
def inferred_type(self) -> str:
"""
Return a string of the type inferred from the values.
"""
return "string"
[docs]
def equals(self, other) -> bool:
"""
Whether Strings are the same size and all entries are equal.
Parameters
----------
other : object
object to compare.
Returns
-------
bool
True if the Strings are the same, o.w. False.
Examples
--------
>>> import arkouda as ak
>>> ak.connect()
>>> s = ak.array(["a", "b", "c"])
>>> s_cpy = ak.array(["a", "b", "c"])
>>> s.equals(s_cpy)
True
>>> s2 = ak.array(["a", "x", "c"])
>>> s.equals(s2)
False
"""
if isinstance(other, Strings):
if other.size != self.size:
return False
else:
return akall(self == other)
else:
return False
[docs]
def get_lengths(self) -> pdarray:
"""
Return the length of each string in the array.
Returns
-------
pdarray, int
The length of each string
Raises
------
RuntimeError
Raised if there is a server-side error thrown
"""
return create_pdarray(
generic_msg(cmd="segmentLengths", args={"objType": self.objType, "obj": self.entry})
)
[docs]
def get_bytes(self):
"""
Getter for the bytes component (uint8 pdarray) of this Strings.
Returns
-------
pdarray, uint8
Pdarray of bytes of the string accessed
Example
-------
>>> x = ak.array(['one', 'two', 'three'])
>>> x.get_bytes()
[111 110 101 0 116 119 111 0 116 104 114 101 101 0]
"""
if self._bytes is None or self._bytes.name not in list_symbol_table():
self._bytes = create_pdarray(
generic_msg(
cmd="getSegStringProperty", args={"property": "get_bytes", "obj": self.entry}
)
)
return self._bytes
[docs]
def get_offsets(self):
"""
Getter for the offsets component (int64 pdarray) of this Strings.
Returns
-------
pdarray, int64
Pdarray of offsets of the string accessed
Example
-------
>>> x = ak.array(['one', 'two', 'three'])
>>> x.get_offsets()
[0 4 8]
"""
if self._offsets is None or self._offsets.name not in list_symbol_table():
self._offsets = create_pdarray(
generic_msg(
cmd="getSegStringProperty", args={"property": "get_offsets", "obj": self.entry}
)
)
return self._offsets
[docs]
def encode(self, toEncoding: str, fromEncoding: str = "UTF-8"):
"""
Return a new strings object in `toEncoding`, expecting that the
current Strings is encoded in `fromEncoding`
Parameters
----------
toEncoding: str
The encoding that the strings will be converted to
fromEncoding: str
The current encoding of the strings object, default to
UTF-8
Returns
-------
Strings
A new Strings object in `toEncoding`
Raises
------
RuntimeError
Raised if there is a server-side error thrown
"""
if (toEncoding.upper() == "IDNA" and fromEncoding.upper() != "UTF-8") or (
toEncoding.upper() != "UTF-8" and fromEncoding.upper() == "IDNA"
):
# first convert to UTF-8
rep_msg = generic_msg(
cmd="encode",
args={
"toEncoding": "UTF-8",
"fromEncoding": fromEncoding,
"obj": self.entry,
},
)
intermediate = Strings.from_return_msg(cast(str, rep_msg))
# now go to idna
rep_msg = generic_msg(
cmd="encode",
args={
"toEncoding": toEncoding,
"fromEncoding": "UTF-8",
"obj": intermediate,
},
)
return Strings.from_return_msg(cast(str, rep_msg))
rep_msg = generic_msg(
cmd="encode",
args={
"toEncoding": toEncoding,
"fromEncoding": fromEncoding,
"obj": self.entry,
},
)
return Strings.from_return_msg(cast(str, rep_msg))
[docs]
def decode(self, fromEncoding, toEncoding="UTF-8"):
"""
Return a new strings object in `fromEncoding`, expecting that the
current Strings is encoded in `toEncoding`
Parameters
----------
fromEncoding: str
The current encoding of the strings object
toEncoding: str
The encoding that the strings will be converted to,
default to UTF-8
Returns
-------
Strings
A new Strings object in `toEncoding`
Raises
------
RuntimeError
Raised if there is a server-side error thrown
"""
return self.encode(toEncoding, fromEncoding)
[docs]
@typechecked
def lower(self) -> Strings:
"""
Returns a new Strings with all uppercase characters from the original replaced with
their lowercase equivalent
Returns
-------
Strings
Strings with all uppercase characters from the original replaced with
their lowercase equivalent
Raises
------
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.upper
Examples
--------
>>> strings = ak.array([f'StrINgS {i}' for i in range(5)])
>>> strings
array(['StrINgS 0', 'StrINgS 1', 'StrINgS 2', 'StrINgS 3', 'StrINgS 4'])
>>> strings.lower()
array(['strings 0', 'strings 1', 'strings 2', 'strings 3', 'strings 4'])
"""
rep_msg = generic_msg(
cmd="caseChange", args={"subcmd": "toLower", "objType": self.objType, "obj": self.entry}
)
return Strings.from_return_msg(cast(str, rep_msg))
[docs]
@typechecked
def upper(self) -> Strings:
"""
Returns a new Strings with all lowercase characters from the original replaced with
their uppercase equivalent
Returns
-------
Strings
Strings with all lowercase characters from the original replaced with
their uppercase equivalent
Raises
------
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.lower
Examples
--------
>>> strings = ak.array([f'StrINgS {i}' for i in range(5)])
>>> strings
array(['StrINgS 0', 'StrINgS 1', 'StrINgS 2', 'StrINgS 3', 'StrINgS 4'])
>>> strings.upper()
array(['STRINGS 0', 'STRINGS 1', 'STRINGS 2', 'STRINGS 3', 'STRINGS 4'])
"""
rep_msg = generic_msg(
cmd="caseChange", args={"subcmd": "toUpper", "objType": self.objType, "obj": self.entry}
)
return Strings.from_return_msg(cast(str, rep_msg))
[docs]
@typechecked
def title(self) -> Strings:
"""
Returns a new Strings from the original replaced with their titlecase equivalent.
Returns
-------
Strings
Strings from the original replaced with their titlecase equivalent.
Raises
------
RuntimeError
Raised if there is a server-side error thrown.
See Also
--------
Strings.lower
String.upper
Examples
--------
>>> strings = ak.array([f'StrINgS {i}' for i in range(5)])
>>> strings
array(['StrINgS 0', 'StrINgS 1', 'StrINgS 2', 'StrINgS 3', 'StrINgS 4'])
>>> strings.title()
array(['Strings 0', 'Strings 1', 'Strings 2', 'Strings 3', 'Strings 4'])
"""
rep_msg = generic_msg(
cmd="caseChange", args={"subcmd": "toTitle", "objType": self.objType, "obj": self.entry}
)
return Strings.from_return_msg(cast(str, rep_msg))
[docs]
@typechecked
def isdecimal(self) -> pdarray:
"""
Returns a boolean pdarray where index i indicates whether string i of the
Strings has all decimal characters.
Returns
-------
pdarray, bool
True for elements that are decimals, False otherwise
Raises
------
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.isdigit
Examples
--------
>>> not_decimal = ak.array([f'Strings {i}' for i in range(3)])
>>> decimal = ak.array([f'12{i}' for i in range(3)])
>>> strings = ak.concatenate([not_decimal, decimal])
>>> strings
array(['Strings 0', 'Strings 1', 'Strings 2', '120', '121', '122'])
>>> strings.isdecimal()
array([False False False True True True])
Special Character Examples
>>> special_strings = ak.array(["3.14", "\u0030", "\u00B2", "2³₇", "2³x₇"])
>>> special_strings
array(['3.14', '0', '²', '2³₇', '2³x₇'])
>>> special_strings.isdecimal()
array([False True False False False])
"""
return create_pdarray(
generic_msg(
cmd="checkChars",
args={"subcmd": "isDecimal", "objType": self.objType, "obj": self.entry},
)
)
[docs]
@typechecked
def capitalize(self) -> Strings:
"""
Returns a new Strings from the original replaced with the first letter capitilzed
and the remaining letters lowercase.
Returns
-------
Strings
Strings from the original replaced with the capitalized equivalent.
Raises
------
RuntimeError
Raised if there is a server-side error thrown.
See Also
--------
Strings.lower
String.upper
String.title
Examples
--------
>>> strings = ak.array([f'StrINgS aRe Here {i}' for i in range(5)])
>>> strings
array(['StrINgS aRe Here 0', 'StrINgS aRe Here 1', 'StrINgS aRe Here 2', 'StrINgS aRe Here 3',
... 'StrINgS aRe Here 4'])
>>> strings.title()
array(['Strings are here 0', 'Strings are here 1', 'Strings are here 2', 'Strings are here 3',
... 'Strings are here 4'])
"""
rep_msg = generic_msg(
cmd="caseChange", args={"subcmd": "capitalize", "objType": self.objType, "obj": self.entry}
)
return Strings.from_return_msg(cast(str, rep_msg))
[docs]
@typechecked
def islower(self) -> pdarray:
"""
Returns a boolean pdarray where index i indicates whether string i of the
Strings is entirely lowercase
Returns
-------
pdarray, bool
True for elements that are entirely lowercase, False otherwise
Raises
------
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.isupper
Examples
--------
>>> lower = ak.array([f'strings {i}' for i in range(3)])
>>> upper = ak.array([f'STRINGS {i}' for i in range(3)])
>>> strings = ak.concatenate([lower, upper])
>>> strings
array(['strings 0', 'strings 1', 'strings 2', 'STRINGS 0', 'STRINGS 1', 'STRINGS 2'])
>>> strings.islower()
array([True True True False False False])
"""
return create_pdarray(
generic_msg(
cmd="checkChars", args={"subcmd": "isLower", "objType": self.objType, "obj": self.entry}
)
)
[docs]
@typechecked
def isupper(self) -> pdarray:
"""
Returns a boolean pdarray where index i indicates whether string i of the
Strings is entirely uppercase
Returns
-------
pdarray, bool
True for elements that are entirely uppercase, False otherwise
Raises
------
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.islower
Examples
--------
>>> lower = ak.array([f'strings {i}' for i in range(3)])
>>> upper = ak.array([f'STRINGS {i}' for i in range(3)])
>>> strings = ak.concatenate([lower, upper])
>>> strings
array(['strings 0', 'strings 1', 'strings 2', 'STRINGS 0', 'STRINGS 1', 'STRINGS 2'])
>>> strings.isupper()
array([False False False True True True])
"""
return create_pdarray(
generic_msg(
cmd="checkChars", args={"subcmd": "isUpper", "objType": self.objType, "obj": self.entry}
)
)
[docs]
@typechecked
def istitle(self) -> pdarray:
"""
Returns a boolean pdarray where index i indicates whether string i of the
Strings is titlecase
Returns
-------
pdarray, bool
True for elements that are titlecase, False otherwise
Raises
------
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.islower
Strings.isupper
Examples
--------
>>> mixed = ak.array([f'sTrINgs {i}' for i in range(3)])
>>> title = ak.array([f'Strings {i}' for i in range(3)])
>>> strings = ak.concatenate([mixed, title])
>>> strings
array(['sTrINgs 0', 'sTrINgs 1', 'sTrINgs 2', 'Strings 0', 'Strings 1', 'Strings 2'])
>>> strings.istitle()
array([False False False True True True])
"""
return create_pdarray(
generic_msg(
cmd="checkChars", args={"subcmd": "isTitle", "objType": self.objType, "obj": self.entry}
)
)
[docs]
@typechecked
def isalnum(self) -> pdarray:
"""
Returns a boolean pdarray where index i indicates whether string i of the
Strings is alphanumeric.
Returns
-------
pdarray, bool
True for elements that are alphanumeric, False otherwise
Raises
------
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.islower
Strings.isupper
Strings.istitle
Examples
--------
>>> not_alnum = ak.array([f'%Strings {i}' for i in range(3)])
>>> alnum = ak.array([f'Strings{i}' for i in range(3)])
>>> strings = ak.concatenate([not_alnum, alnum])
>>> strings
array(['%Strings 0', '%Strings 1', '%Strings 2', 'Strings0', 'Strings1', 'Strings2'])
>>> strings.isalnum()
array([False False False True True True])
"""
return create_pdarray(
generic_msg(
cmd="checkChars", args={"subcmd": "isalnum", "objType": self.objType, "obj": self.entry}
)
)
[docs]
@typechecked
def isalpha(self) -> pdarray:
"""
Returns a boolean pdarray where index i indicates whether string i of the
Strings is alphabetic. This means there is at least one character,
and all the characters are alphabetic.
Returns
-------
pdarray, bool
True for elements that are alphabetic, False otherwise
Raises
------
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.islower
Strings.isupper
Strings.istitle
Strings.isalnum
Examples
--------
>>> not_alpha = ak.array([f'%Strings {i}' for i in range(3)])
>>> alpha = ak.array(['StringA','StringB','StringC'])
>>> strings = ak.concatenate([not_alpha, alpha])
>>> strings
array(['%Strings 0', '%Strings 1', '%Strings 2', 'StringA','StringB','StringC'])
>>> strings.isalpha()
array([False False False True True True])
"""
return create_pdarray(
generic_msg(
cmd="checkChars", args={"subcmd": "isalpha", "objType": self.objType, "obj": self.entry}
)
)
[docs]
@typechecked
def isdigit(self) -> pdarray:
"""
Returns a boolean pdarray where index i indicates whether string i of the
Strings has all digit characters.
Returns
-------
pdarray, bool
True for elements that are digits, False otherwise
Raises
------
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.islower
Strings.isupper
Strings.istitle
Examples
--------
>>> not_digit = ak.array([f'Strings {i}' for i in range(3)])
>>> digit = ak.array([f'12{i}' for i in range(3)])
>>> strings = ak.concatenate([not_digit, digit])
>>> strings
array(['Strings 0', 'Strings 1', 'Strings 2', '120', '121', '122'])
>>> strings.isdigit()
array([False False False True True True])
Special Character Examples
>>> special_strings = ak.array(["3.14", "\u0030", "\u00B2", "2³₇", "2³x₇"])
>>> special_strings
array(['3.14', '0', '²', '2³₇', '2³x₇'])
>>> special_strings.isdigit()
array([False True True True False])
"""
return create_pdarray(
generic_msg(
cmd="checkChars", args={"subcmd": "isdigit", "objType": self.objType, "obj": self.entry}
)
)
[docs]
@typechecked
def isempty(self) -> pdarray:
"""
Returns a boolean pdarray where index i indicates whether string i of the
Strings is empty.
True for elements that are the empty string, False otherwise
Returns
-------
pdarray, bool
True for elements that are digits, False otherwise
Raises
------
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.islower
Strings.isupper
Strings.istitle
Examples
--------
>>> not_empty = ak.array([f'Strings {i}' for i in range(3)])
>>> empty = ak.array(['' for i in range(3)])
>>> strings = ak.concatenate([not_empty, empty])
>>> strings
array(['%Strings 0', '%Strings 1', '%Strings 2', '', '', ''])
>>> strings.isempty()
"""
return create_pdarray(
generic_msg(
cmd="checkChars", args={"subcmd": "isempty", "objType": self.objType, "obj": self.entry}
)
)
[docs]
@typechecked
def isspace(self) -> pdarray:
"""
Returns a boolean pdarray where index i indicates whether string i has all
whitespace characters (‘ ‘, ‘\t’, ‘\n’, ‘\v’, ‘\f’, ‘\r’).
Returns
-------
pdarray, bool
True for elements that are whitespace, False otherwise
Raises
------
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.islower
Strings.isupper
Strings.istitle
Examples
--------
>>> not_space = ak.array([f'Strings {i}' for i in range(3)])
>>> space = ak.array([' ', '\t', '\n', '\v', '\f', '\r', ' \t\n\v\f\r'])
>>> strings = ak.concatenate([not_space, space])
>>> strings
array(['Strings 0', 'Strings 1', 'Strings 2', ' ',
... 'u0009', 'n', 'u000B', 'u000C', 'u000D', ' u0009nu000Bu000Cu000D'])
>>> strings.isspace()
array([False False False True True True True True True True])
"""
return create_pdarray(
generic_msg(
cmd="checkChars", args={"subcmd": "isspace", "objType": self.objType, "obj": self.entry}
)
)
[docs]
@typechecked
def strip(self, chars: Optional[Union[bytes, str_scalars]] = "") -> Strings:
"""
Returns a new Strings object with all leading and trailing occurrences of characters contained
in chars removed. The chars argument is a string specifying the set of characters to be removed.
If omitted, the chars argument defaults to removing whitespace. The chars argument is not a
prefix or suffix; rather, all combinations of its values are stripped.
Parameters
----------
chars
the set of characters to be removed
Returns
-------
Strings
Strings object with the leading and trailing characters matching the set of characters in
the chars argument removed
Raises
------
RuntimeError
Raised if there is a server-side error thrown
Examples
--------
>>> strings = ak.array(['Strings ', ' StringS ', 'StringS '])
>>> s = strings.strip()
>>> s
array(['Strings', 'StringS', 'StringS'])
>>> strings = ak.array(['Strings 1', '1 StringS ', ' 1StringS 12 '])
>>> s = strings.strip(' 12')
>>> s
array(['Strings', 'StringS', 'StringS'])
"""
if isinstance(chars, bytes):
chars = chars.decode()
rep_msg = generic_msg(
cmd="segmentedStrip", args={"objType": self.objType, "name": self.entry, "chars": chars}
)
return Strings.from_return_msg(cast(str, rep_msg))
[docs]
@typechecked
def cached_regex_patterns(self) -> List:
"""
Returns the regex patterns for which Match objects have been cached
"""
return list(self._regex_dict.keys())
[docs]
@typechecked
def purge_cached_regex_patterns(self) -> None:
"""
purges cached regex patterns
"""
self._regex_dict = dict()
def _empty_pattern_verification(self, pattern):
if pattern == "$" or (re.search(pattern, "") and (self == "").any()): # type: ignore
# TODO remove once changes from chapel issue #20431 and #20441 are in arkouda
raise ValueError(
"regex operations not currently supported with a pattern='$' or pattern='' when "
"the empty string is contained in Strings"
)
def _get_matcher(self, pattern: Union[bytes, str_scalars], create: bool = True):
"""
internal function to fetch cached Matcher objects
"""
from arkouda.matcher import Matcher
if isinstance(pattern, bytes):
pattern = pattern.decode()
try:
re.compile(pattern)
except Exception as e:
raise ValueError(e)
self._empty_pattern_verification(pattern)
matcher = None
if pattern in self._regex_dict:
matcher = self._regex_dict[pattern]
elif create:
self._regex_dict[pattern] = Matcher(pattern=pattern, parent_entry_name=self.entry.name)
matcher = self._regex_dict[pattern]
return matcher
[docs]
@typechecked
def find_locations(self, pattern: Union[bytes, str_scalars]) -> Tuple[pdarray, pdarray, pdarray]:
"""
Finds pattern matches and returns pdarrays containing the number, start postitions,
and lengths of matches
Parameters
----------
pattern: str_scalars
The regex pattern used to find matches
Returns
-------
pdarray, int64
For each original string, the number of pattern matches
pdarray, int64
The start positons of pattern matches
pdarray, int64
The lengths of pattern matches
Raises
------
TypeError
Raised if the pattern parameter is not bytes or str_scalars
ValueError
Raised if pattern is not a valid regex
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.findall, Strings.match
Examples
--------
>>> strings = ak.array([f'{i} string {i}' for i in range(1, 6)])
>>> num_matches, starts, lens = strings.find_locations('\\d')
>>> num_matches
array([2, 2, 2, 2, 2])
>>> starts
array([0, 9, 0, 9, 0, 9, 0, 9, 0, 9])
>>> lens
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1]))
"""
matcher = self._get_matcher(pattern)
matcher.find_locations()
return matcher.num_matches, matcher.starts, matcher.lengths
[docs]
@typechecked
def search(self, pattern: Union[bytes, str_scalars]) -> Match:
"""
Returns a match object with the first location in each element where pattern produces a match.
Elements match if any part of the string matches the regular expression pattern
Parameters
----------
pattern: str
Regex used to find matches
Returns
-------
Match
Match object where elements match if any part of the string matches the
regular expression pattern
Examples
--------
>>> strings = ak.array(['1_2___', '____', '3', '__4___5____6___7', ''])
>>> strings.search('_+')
<ak.Match object: matched=True, span=(1, 2); matched=True, span=(0, 4);
matched=False; matched=True, span=(0, 2); matched=False>
"""
return self._get_matcher(pattern).get_match(MatchType.SEARCH, self)
[docs]
@typechecked
def match(self, pattern: Union[bytes, str_scalars]) -> Match:
"""
Returns a match object where elements match only if the beginning of the string matches the
regular expression pattern
Parameters
----------
pattern: str
Regex used to find matches
Returns
-------
Match
Match object where elements match only if the beginning of the string matches the
regular expression pattern
Examples
--------
>>> strings = ak.array(['1_2___', '____', '3', '__4___5____6___7', ''])
>>> strings.match('_+')
<ak.Match object: matched=False; matched=True, span=(0, 4); matched=False;
matched=True, span=(0, 2); matched=False>
"""
return self._get_matcher(pattern).get_match(MatchType.MATCH, self)
[docs]
@typechecked()
def fullmatch(self, pattern: Union[bytes, str_scalars]) -> Match:
"""
Returns a match object where elements match only if the whole string matches the
regular expression pattern
Parameters
----------
pattern: str
Regex used to find matches
Returns
-------
Match
Match object where elements match only if the whole string matches the
regular expression pattern
Examples
--------
>>> strings = ak.array(['1_2___', '____', '3', '__4___5____6___7', ''])
>>> strings.fullmatch('_+')
<ak.Match object: matched=False; matched=True, span=(0, 4); matched=False;
matched=False; matched=False>
"""
return self._get_matcher(pattern).get_match(MatchType.FULLMATCH, self)
[docs]
@typechecked()
def split(
self, pattern: Union[bytes, str_scalars], maxsplit: int = 0, return_segments: bool = False
) -> Union[Strings, Tuple]:
"""
Returns a new Strings split by the occurrences of pattern.
If maxsplit is nonzero, at most maxsplit splits occur
Parameters
----------
pattern: str
Regex used to split strings into substrings
maxsplit: int
The max number of pattern match occurences in each element to split.
The default maxsplit=0 splits on all occurences
return_segments: bool
If True, return mapping of original strings to first substring
in return array.
Returns
-------
Strings
Substrings with pattern matches removed
pdarray, int64 (optional)
For each original string, the index of first corresponding substring
in the return array
Examples
--------
>>> strings = ak.array(['1_2___', '____', '3', '__4___5____6___7', ''])
>>> strings.split('_+', maxsplit=2, return_segments=True)
(array(['1', '2', '', '', '', '3', '', '4', '5____6___7', '']), array([0 3 5 6 9]))
"""
return self._get_matcher(pattern).split(maxsplit, return_segments)
[docs]
@typechecked
def findall(
self, pattern: Union[bytes, str_scalars], return_match_origins: bool = False
) -> Union[Strings, Tuple]:
"""
Return a new Strings containg all non-overlapping matches of pattern
Parameters
----------
pattern: str_scalars
Regex used to find matches
return_match_origins: bool
If True, return a pdarray containing the index of the original string each
pattern match is from
Returns
-------
Strings
Strings object containing only pattern matches
pdarray, int64 (optional)
The index of the original string each pattern match is from
Raises
------
TypeError
Raised if the pattern parameter is not bytes or str_scalars
ValueError
Raised if pattern is not a valid regex
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.find_locations
Examples
--------
>>> strings = ak.array(['1_2___', '____', '3', '__4___5____6___7', ''])
>>> strings.findall('_+', return_match_origins=True)
(array(['_', '___', '____', '__', '___', '____', '___']), array([0 0 1 3 3 3 3]))
"""
return self._get_matcher(pattern).findall(return_match_origins)
[docs]
@typechecked()
def sub(
self, pattern: Union[bytes, str_scalars], repl: Union[bytes, str_scalars], count: int = 0
) -> Strings:
"""
Return new Strings obtained by replacing non-overlapping occurrences of pattern with the
replacement repl.
If count is nonzero, at most count substitutions occur
Parameters
----------
pattern: str_scalars
The regex to substitue
repl: str_scalars
The substring to replace pattern matches with
count: int
The max number of pattern match occurences in each element to replace.
The default count=0 replaces all occurences of pattern with repl
Returns
-------
Strings
Strings with pattern matches replaced
Raises
------
TypeError
Raised if pattern or repl are not bytes or str_scalars
ValueError
Raised if pattern is not a valid regex
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.subn
Examples
--------
>>> strings = ak.array(['1_2___', '____', '3', '__4___5____6___7', ''])
>>> strings.sub(pattern='_+', repl='-', count=2)
array(['1-2-', '-', '3', '-4-5____6___7', ''])
"""
if isinstance(repl, bytes):
repl = repl.decode()
return self._get_matcher(pattern).sub(repl, count)
[docs]
@typechecked()
def subn(
self, pattern: Union[bytes, str_scalars], repl: Union[bytes, str_scalars], count: int = 0
) -> Tuple:
"""
Perform the same operation as sub(), but return a tuple (new_Strings, number_of_substitions)
Parameters
----------
pattern: str_scalars
The regex to substitue
repl: str_scalars
The substring to replace pattern matches with
count: int
The max number of pattern match occurences in each element to replace.
The default count=0 replaces all occurences of pattern with repl
Returns
-------
Strings
Strings with pattern matches replaced
pdarray, int64
The number of substitutions made for each element of Strings
Raises
------
TypeError
Raised if pattern or repl are not bytes or str_scalars
ValueError
Raised if pattern is not a valid regex
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.sub
Examples
--------
>>> strings = ak.array(['1_2___', '____', '3', '__4___5____6___7', ''])
>>> strings.subn(pattern='_+', repl='-', count=2)
(array(['1-2-', '-', '3', '-4-5____6___7', '']), array([2 1 0 2 0]))
"""
if isinstance(repl, bytes):
repl = repl.decode()
return self._get_matcher(pattern).sub(repl, count, return_num_subs=True)
[docs]
@typechecked
def contains(self, substr: Union[bytes, str_scalars], regex: bool = False) -> pdarray:
"""
Check whether each element contains the given substring.
Parameters
----------
substr: str_scalars
The substring in the form of string or byte array to search for
regex: bool
Indicates whether substr is a regular expression
Note: only handles regular expressions supported by re2
(does not support lookaheads/lookbehinds)
Returns
-------
pdarray, bool
True for elements that contain substr, False otherwise
Raises
------
TypeError
Raised if the substr parameter is not bytes or str_scalars
ValueError
Rasied if substr is not a valid regex
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.startswith, Strings.endswith
Examples
--------
>>> strings = ak.array([f'{i} string {i}' for i in range(1, 6)])
>>> strings
array(['1 string 1', '2 string 2', '3 string 3', '4 string 4', '5 string 5'])
>>> strings.contains('string')
array([True, True, True, True, True])
>>> strings.contains('string \\d', regex=True)
array([True, True, True, True, True])
"""
if isinstance(substr, bytes):
substr = substr.decode()
if not regex:
substr = re.escape(substr)
self._empty_pattern_verification(substr)
matcher = self._get_matcher(substr, create=False)
if matcher is not None:
return matcher.get_match(MatchType.SEARCH, self).matched()
return create_pdarray(
generic_msg(
cmd="segmentedSearch",
args={"objType": self.objType, "obj": self.entry, "valType": "str", "val": substr},
)
)
[docs]
@typechecked
def startswith(self, substr: Union[bytes, str_scalars], regex: bool = False) -> pdarray:
"""
Check whether each element starts with the given substring.
Parameters
----------
substr: Union[bytes, str_scalars]
The prefix to search for
regex: bool
Indicates whether substr is a regular expression
Note: only handles regular expressions supported by re2
(does not support lookaheads/lookbehinds)
Returns
-------
pdarray, bool
True for elements that start with substr, False otherwise
Raises
------
TypeError
Raised if the substr parameter is not a bytes ior str_scalars
ValueError
Rasied if substr is not a valid regex
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.contains, Strings.endswith
Examples
--------
>>> strings_end = ak.array([f'string {i}' for i in range(1, 6)])
>>> strings_end
array(['string 1', 'string 2', 'string 3', 'string 4', 'string 5'])
>>> strings_end.startswith('string')
array([True, True, True, True, True])
>>> strings_start = ak.array([f'{i} string' for i in range(1,6)])
>>> strings_start
array(['1 string', '2 string', '3 string', '4 string', '5 string'])
>>> strings_start.startswith('\\d str', regex = True)
array([True, True, True, True, True])
"""
if isinstance(substr, bytes):
substr = substr.decode()
if not regex:
substr = re.escape(substr)
self._empty_pattern_verification(substr)
matcher = self._get_matcher(substr, create=False)
if matcher is not None:
return matcher.get_match(MatchType.MATCH, self).matched()
else:
return self.contains("^" + substr, regex=True)
[docs]
@typechecked
def endswith(self, substr: Union[bytes, str_scalars], regex: bool = False) -> pdarray:
"""
Check whether each element ends with the given substring.
Parameters
----------
substr: Union[bytes, str_scalars]
The suffix to search for
regex: bool
Indicates whether substr is a regular expression
Note: only handles regular expressions supported by re2
(does not support lookaheads/lookbehinds)
Returns
-------
pdarray, bool
True for elements that end with substr, False otherwise
Raises
------
TypeError
Raised if the substr parameter is not bytes or str_scalars
ValueError
Rasied if substr is not a valid regex
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.contains, Strings.startswith
Examples
--------
>>> strings_start = ak.array([f'{i} string' for i in range(1,6)])
>>> strings_start
array(['1 string', '2 string', '3 string', '4 string', '5 string'])
>>> strings_start.endswith('ing')
array([True, True, True, True, True])
>>> strings_end = ak.array([f'string {i}' for i in range(1, 6)])
>>> strings_end
array(['string 1', 'string 2', 'string 3', 'string 4', 'string 5'])
>>> strings_end.endswith('ing \\d', regex = True)
array([True, True, True, True, True])
"""
if isinstance(substr, bytes):
substr = substr.decode()
if not regex:
substr = re.escape(substr)
self._empty_pattern_verification(substr)
return self.contains(substr + "$", regex=True)
[docs]
def flatten(
self, delimiter: str, return_segments: bool = False, regex: bool = False
) -> Union[Strings, Tuple]:
"""Unpack delimiter-joined substrings into a flat array.
Parameters
----------
delimiter: str
Characters used to split strings into substrings
return_segments: bool
If True, also return mapping of original strings to first substring
in return array.
regex: bool
Indicates whether delimiter is a regular expression
Note: only handles regular expressions supported by re2
(does not support lookaheads/lookbehinds)
Returns
-------
Strings
Flattened substrings with delimiters removed
pdarray, int64 (optional)
For each original string, the index of first corresponding substring
in the return array
See Also
--------
peel, rpeel
Examples
--------
>>> orig = ak.array(['one|two', 'three|four|five', 'six'])
>>> orig.flatten('|')
array(['one', 'two', 'three', 'four', 'five', 'six'])
>>> flat, map = orig.flatten('|', return_segments=True)
>>> map
array([0, 2, 5])
>>> under = ak.array(['one_two', 'three_____four____five', 'six'])
>>> under_flat, under_map = under.flatten('_+', return_segments=True, regex=True)
>>> under_flat
array(['one', 'two', 'three', 'four', 'five', 'six'])
>>> under_map
array([0, 2, 5])
"""
if regex:
try:
re.compile(delimiter)
except Exception as e:
raise ValueError(e)
return self.split(delimiter, return_segments=return_segments)
else:
cmd = "segmentedFlatten"
repMsg = cast(
str,
generic_msg(
cmd=cmd,
args={
"values": self.entry,
"objtype": self.objType,
"return_segs": return_segments,
"regex": regex,
"delim": delimiter,
},
),
)
if return_segments:
arrays = repMsg.split("+", maxsplit=2)
return Strings.from_return_msg("+".join(arrays[0:2])), create_pdarray(arrays[2])
else:
return Strings.from_return_msg(repMsg)
[docs]
@typechecked
def peel(
self,
delimiter: Union[bytes, str_scalars],
times: int_scalars = 1,
includeDelimiter: bool = False,
keepPartial: bool = False,
fromRight: bool = False,
regex: bool = False,
) -> Tuple:
"""
Peel off one or more delimited fields from each string (similar
to string.partition), returning two new arrays of strings.
*Warning*: This function is experimental and not guaranteed to work.
Parameters
----------
delimiter: Union[bytes, str_scalars]
The separator where the split will occur
times: Union[int, np.int64]
The number of times the delimiter is sought, i.e. skip over
the first (times-1) delimiters
includeDelimiter: bool
If true, append the delimiter to the end of the first return
array. By default, it is prepended to the beginning of the
second return array.
keepPartial: bool
If true, a string that does not contain <times> instances of
the delimiter will be returned in the first array. By default,
such strings are returned in the second array.
fromRight: bool
If true, peel from the right instead of the left (see also rpeel)
regex: bool
Indicates whether delimiter is a regular expression
Note: only handles regular expressions supported by re2
(does not support lookaheads/lookbehinds)
Returns
-------
Tuple[Strings, Strings]
left: Strings
The field(s) peeled from the end of each string (unless
fromRight is true)
right: Strings
The remainder of each string after peeling (unless fromRight
is true)
Raises
------
TypeError
Raised if the delimiter parameter is not byte or str_scalars, if
times is not int64, or if includeDelimiter, keepPartial, or
fromRight is not bool
ValueError
Raised if times is < 1 or if delimiter is not a valid regex
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
rpeel, stick, lstick
Examples
--------
>>> s = ak.array(['a.b', 'c.d', 'e.f.g'])
>>> s.peel('.')
(array(['a', 'c', 'e']), array(['b', 'd', 'f.g']))
>>> s.peel('.', includeDelimiter=True)
(array(['a.', 'c.', 'e.']), array(['b', 'd', 'f.g']))
>>> s.peel('.', times=2)
(array(['', '', 'e.f']), array(['a.b', 'c.d', 'g']))
>>> s.peel('.', times=2, keepPartial=True)
(array(['a.b', 'c.d', 'e.f']), array(['', '', 'g']))
"""
if isinstance(delimiter, bytes):
delimiter = delimiter.decode()
if regex:
try:
re.compile(delimiter)
except Exception as e:
raise ValueError(e)
if re.search(delimiter, ""):
raise ValueError(
"peel with a pattern that matches the empty string are not currently supported"
)
if times < 1:
raise ValueError("times must be >= 1")
repMsg = generic_msg(
cmd="segmentedPeel",
args={
"subcmd": "peel",
"objType": self.objType,
"obj": self.entry,
"valType": "str",
"times": NUMBER_FORMAT_STRINGS["int64"].format(times),
"id": NUMBER_FORMAT_STRINGS["bool"].format(includeDelimiter),
"keepPartial": NUMBER_FORMAT_STRINGS["bool"].format(keepPartial),
"lStr": NUMBER_FORMAT_STRINGS["bool"].format(not fromRight),
"regex": NUMBER_FORMAT_STRINGS["bool"].format(regex),
"delim": delimiter,
},
)
arrays = cast(str, repMsg).split("+", maxsplit=3)
# first two created are left Strings, last two are right strings
left_str = Strings.from_return_msg("+".join(arrays[0:2]))
right_str = Strings.from_return_msg("+".join(arrays[2:4]))
return left_str, right_str
[docs]
def rpeel(
self,
delimiter: Union[bytes, str_scalars],
times: int_scalars = 1,
includeDelimiter: bool = False,
keepPartial: bool = False,
regex: bool = False,
):
"""
Peel off one or more delimited fields from the end of each string
(similar to string.rpartition), returning two new arrays of strings.
*Warning*: This function is experimental and not guaranteed to work.
Parameters
----------
delimiter: Union[bytes, str_scalars]
The separator where the split will occur
times: Union[int, np.int64]
The number of times the delimiter is sought, i.e. skip over
the last (times-1) delimiters
includeDelimiter: bool
If true, prepend the delimiter to the start of the first return
array. By default, it is appended to the end of the
second return array.
keepPartial: bool
If true, a string that does not contain <times> instances of
the delimiter will be returned in the second array. By default,
such strings are returned in the first array.
regex: bool
Indicates whether delimiter is a regular expression
Note: only handles regular expressions supported by re2
(does not support lookaheads/lookbehinds)
Returns
-------
Tuple[Strings, Strings]
left: Strings
The remainder of the string after peeling
right: Strings
The field(s) that were peeled from the right of each string
Raises
------
TypeError
Raised if the delimiter parameter is not bytes or str_scalars or
if times is not int64
ValueError
Raised if times is < 1 or if delimiter is not a valid regex
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
peel, stick, lstick
Examples
--------
>>> s = ak.array(['a.b', 'c.d', 'e.f.g'])
>>> s.rpeel('.')
(array(['a', 'c', 'e.f']), array(['b', 'd', 'g']))
# Compared against peel
>>> s.peel('.')
(array(['a', 'c', 'e']), array(['b', 'd', 'f.g']))
"""
return self.peel(
delimiter,
times=times,
includeDelimiter=includeDelimiter,
keepPartial=keepPartial,
fromRight=True,
regex=regex,
)
[docs]
@typechecked
def stick(
self, other: Strings, delimiter: Union[bytes, str_scalars] = "", toLeft: bool = False
) -> Strings:
"""
Join the strings from another array onto one end of the strings
of this array, optionally inserting a delimiter.
*Warning*: This function is experimental and not guaranteed to work.
Parameters
----------
other : Strings
The strings to join onto self's strings
delimiter : str
String inserted between self and other
toLeft : bool
If true, join other strings to the left of self. By default,
other is joined to the right of self.
Returns
-------
Strings
The array of joined strings
Raises
------
TypeError
Raised if the delimiter parameter is not bytes or str_scalars
or if the other parameter is not a Strings instance
ValueError
Raised if times is < 1
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
lstick, peel, rpeel
Examples
--------
>>> s = ak.array(['a', 'c', 'e'])
>>> t = ak.array(['b', 'd', 'f'])
>>> s.stick(t, delimiter='.')
array(['a.b', 'c.d', 'e.f'])
"""
if isinstance(delimiter, bytes):
delimiter = delimiter.decode()
rep_msg = generic_msg(
cmd="segmentedBinopvv",
args={
"op": "stick",
"objType": self.objType,
"obj": self.entry,
"otherType": other.objType,
"other": other.entry,
"left": NUMBER_FORMAT_STRINGS["bool"].format(toLeft),
"delim": delimiter,
},
)
return Strings.from_return_msg(cast(str, rep_msg))
def __add__(self, other: Strings) -> Strings:
return self.stick(other)
[docs]
def lstick(self, other: Strings, delimiter: Union[bytes, str_scalars] = "") -> Strings:
"""
Join the strings from another array onto the left of the strings
of this array, optionally inserting a delimiter.
*Warning*: This function is experimental and not guaranteed to work.
Parameters
----------
other : Strings
The strings to join onto self's strings
delimiter : Union[bytes,str_scalars]
String inserted between self and other
Returns
-------
Strings
The array of joined strings, as other + self
Raises
------
TypeError
Raised if the delimiter parameter is neither bytes nor a str
or if the other parameter is not a Strings instance
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
stick, peel, rpeel
Examples
--------
>>> s = ak.array(['a', 'c', 'e'])
>>> t = ak.array(['b', 'd', 'f'])
>>> s.lstick(t, delimiter='.')
array(['b.a', 'd.c', 'f.e'])
"""
return self.stick(other, delimiter=delimiter, toLeft=True)
def __radd__(self, other: Strings) -> Strings:
return self.lstick(other)
[docs]
def get_prefixes(
self, n: int_scalars, return_origins: bool = True, proper: bool = True
) -> Union[Strings, Tuple[Strings, pdarray]]:
"""
Return the n-long prefix of each string, where possible
Parameters
----------
n : int
Length of prefix
return_origins : bool
If True, return a logical index indicating which strings
were long enough to return an n-prefix
proper : bool
If True, only return proper prefixes, i.e. from strings
that are at least n+1 long. If False, allow the entire
string to be returned as a prefix.
Returns
-------
prefixes : Strings
The array of n-character prefixes; the number of elements is the number of
True values in the returned mask.
origin_indices : pdarray, bool
Boolean array that is True where the string was long enough to return
an n-character prefix, False otherwise.
"""
repMsg = cast(
str,
generic_msg(
cmd="segmentedSubstring",
args={
"objType": self.objType,
"name": self,
"nChars": n,
"returnOrigins": return_origins,
"kind": "prefixes",
"proper": proper,
},
),
)
if return_origins:
parts = repMsg.split("+")
prefixes = Strings.from_return_msg("+".join(parts[:2]))
longenough = create_pdarray(parts[2])
return prefixes, cast(pdarray, longenough)
else:
return Strings.from_return_msg(repMsg)
[docs]
def get_suffixes(
self, n: int_scalars, return_origins: bool = True, proper: bool = True
) -> Union[Strings, Tuple[Strings, pdarray]]:
"""
Return the n-long suffix of each string, where possible
Parameters
----------
n : int
Length of suffix
return_origins : bool
If True, return a logical index indicating which strings
were long enough to return an n-suffix
proper : bool
If True, only return proper suffixes, i.e. from strings
that are at least n+1 long. If False, allow the entire
string to be returned as a suffix.
Returns
-------
suffixes : Strings
The array of n-character suffixes; the number of elements is the number of
True values in the returned mask.
origin_indices : pdarray, bool
Boolean array that is True where the string was long enough to return
an n-character suffix, False otherwise.
"""
repMsg = cast(
str,
generic_msg(
cmd="segmentedSubstring",
args={
"objType": self.objType,
"name": self,
"nChars": n,
"returnOrigins": return_origins,
"kind": "suffixes",
"proper": proper,
},
),
)
if return_origins:
parts = repMsg.split("+")
suffixes = Strings.from_return_msg("+".join(parts[:2]))
longenough = create_pdarray(parts[2])
return suffixes, cast(pdarray, longenough)
else:
return Strings.from_return_msg(repMsg)
[docs]
def hash(self) -> Tuple[pdarray, pdarray]:
"""
Compute a 128-bit hash of each string.
Returns
-------
Tuple[pdarray,pdarray]
A tuple of two int64 pdarrays. The ith hash value is the concatenation
of the ith values from each array.
Notes
-----
The implementation uses SipHash128, a fast and balanced hash function (used
by Python for dictionaries and sets). For realistic numbers of strings (up
to about 10**15), the probability of a collision between two 128-bit hash
values is negligible.
"""
# TODO fix this to return a single pdarray of hashes
repMsg = generic_msg(cmd="segmentedHash", args={"objType": self.objType, "obj": self.entry})
h1, h2 = cast(str, repMsg).split("+")
return create_pdarray(h1), create_pdarray(h2)
[docs]
def group(self) -> pdarray:
"""
Return the permutation that groups the array, placing equivalent
strings together. All instances of the same string are guaranteed to lie
in one contiguous block of the permuted array, but the blocks are not
necessarily ordered.
Returns
-------
pdarray
The permutation that groups the array by value
See Also
--------
GroupBy, unique
Notes
-----
If the arkouda server is compiled with "-sSegmentedString.useHash=true",
then arkouda uses 128-bit hash values to group strings, rather than sorting
the strings directly. This method is fast, but the resulting permutation
merely groups equivalent strings and does not sort them. If the "useHash"
parameter is false, then a full sort is performed.
Raises
------
RuntimeError
Raised if there is a server-side error in executing group request or
creating the pdarray encapsulating the return message
"""
return create_pdarray(
generic_msg(cmd="segmentedGroup", args={"objType": self.objType, "obj": self.entry})
)
def _get_grouping_keys(self) -> List[Strings]:
"""
Private method for generating grouping keys used by GroupBy.
API: this method must be defined by all groupable arrays, and it
must return a list of arrays that can be (co)argsorted.
"""
return [self]
[docs]
def to_ndarray(self) -> np.ndarray:
"""
Convert the array to a np.ndarray, transferring array data from the
arkouda server to Python. If the array exceeds a built-in size limit,
a RuntimeError is raised.
Returns
-------
np.ndarray
A numpy ndarray with the same strings as this array
Notes
-----
The number of bytes in the array cannot exceed ``ak.client.maxTransferBytes``,
otherwise a ``RuntimeError`` will be raised. This is to protect the user
from overflowing the memory of the system on which the Python client
is running, under the assumption that the server is running on a
distributed system with much more memory than the client. The user
may override this limit by setting ak.client.maxTransferBytes to a larger
value, but proceed with caution.
See Also
--------
array()
to_list()
Examples
--------
>>> a = ak.array(["hello", "my", "world"])
>>> a.to_ndarray()
array(['hello', 'my', 'world'], dtype='<U5')
>>> type(a.to_ndarray())
numpy.ndarray
"""
# Get offsets and append total bytes for length calculation
npoffsets = np.hstack((self._comp_to_ndarray("offsets"), np.array([self.nbytes])))
# Get contents of strings (will error if too large)
npvalues = self._comp_to_ndarray("values")
# Compute lengths, discounting null terminators
lengths = np.diff(npoffsets) - 1
# Numpy dtype is based on max string length
dt = f"<U{lengths.max() if len(lengths) > 0 else 1}"
res = np.empty(self.size, dtype=dt)
# Form a string from each segment and store in numpy array
for i, (o, l) in enumerate(zip(npoffsets, lengths)):
res[i] = np.str_(codecs.decode(b"".join(npvalues[o : o + l])))
return res
[docs]
def to_list(self) -> list:
"""
Convert the SegString to a list, transferring data from the
arkouda server to Python. If the SegString exceeds a built-in size limit,
a RuntimeError is raised.
Returns
-------
list
A list with the same strings as this SegString
Notes
-----
The number of bytes in the array cannot exceed ``ak.client.maxTransferBytes``,
otherwise a ``RuntimeError`` will be raised. This is to protect the user
from overflowing the memory of the system on which the Python client
is running, under the assumption that the server is running on a
distributed system with much more memory than the client. The user
may override this limit by setting ak.client.maxTransferBytes to a larger
value, but proceed with caution.
See Also
--------
to_ndarray()
Examples
--------
>>> a = ak.array(["hello", "my", "world"])
>>> a.to_list()
['hello', 'my', 'world']
>>> type(a.to_list())
list
"""
return self.to_ndarray().tolist()
def _comp_to_ndarray(self, comp: str) -> np.ndarray:
"""
This is an internal helper function to perform the to_ndarray for one
of the string components.
Parameters
----------
comp : str
The strings component to request
Returns
-------
np.ndarray
A numpy ndarray with the same attributes and data as the pdarray
Raises
------
RuntimeError
Raised if there is a server-side error thrown, if the pdarray size
exceeds the built-in client.maxTransferBytes size limit, or if the bytes
received does not match expected number of bytes
Notes
-----
The number of bytes in the array cannot exceed ``client.maxTransferBytes``,
otherwise a ``RuntimeError`` will be raised. This is to protect the user
from overflowing the memory of the system on which the Python client
is running, under the assumption that the server is running on a
distributed system with much more memory than the client. The user
may override this limit by setting client.maxTransferBytes to a larger
value, but proceed with caution.
"""
from arkouda.client import maxTransferBytes
# Total number of bytes in the array data
array_bytes = (
self.size * arkouda.numpy.dtypes.dtype(arkouda.numpy.dtypes.int64).itemsize
if comp == "offsets"
else self.nbytes * arkouda.numpy.dtypes.dtype(arkouda.numpy.dtypes.uint8).itemsize
)
# Guard against overflowing client memory
if array_bytes > maxTransferBytes:
raise RuntimeError(
"Array exceeds allowed size for transfer. Increase ak.client.maxTransferBytes to allow"
)
# The reply from the server will be a bytes object
rep_msg = generic_msg(
cmd=CMD_TO_NDARRAY, args={"obj": self.entry, "comp": comp}, recv_binary=True
)
# Make sure the received data has the expected length
if len(rep_msg) != array_bytes:
raise RuntimeError(f"Expected {array_bytes} bytes but received {len(rep_msg)}")
# The server sends us native-endian bytes so we need to account for that.
# Since bytes are immutable, we need to copy the np array to be mutable
dt: np.dtype = np.dtype(np.int64) if comp == "offsets" else np.dtype(np.uint8)
if arkouda.numpy.dtypes.get_server_byteorder() == "big":
dt = dt.newbyteorder(">")
else:
dt = dt.newbyteorder("<")
return (
np.frombuffer(rep_msg.encode("utf_8"), dt).copy()
if isinstance(rep_msg, str)
else np.frombuffer(rep_msg, dt).copy()
)
[docs]
def astype(self, dtype) -> pdarray:
"""
Cast values of Strings object to provided dtype
Parameters
__________
dtype: np.dtype or str
Dtype to cast to
Returns
_______
ak.pdarray
An arkouda pdarray with values converted to the specified data type
Notes
_____
This is essentially shorthand for ak.cast(x, '<dtype>') where x is a pdarray.
"""
from arkouda.numeric import cast as akcast
return akcast(self, dtype)
[docs]
def to_parquet(
self,
prefix_path: str,
dataset: str = "strings_array",
mode: str = "truncate",
compression: Optional[str] = None,
) -> str:
"""
Save the Strings object to Parquet. The result is a collection of files,
one file per locale of the arkouda server, where each filename starts
with prefix_path. Each locale saves its chunk of the array to its
corresponding file.
Parameters
----------
prefix_path : str
Directory and filename prefix that all output files share
dataset : str
Name of the dataset to create in files (must not already exist)
mode : str {'truncate' | 'append'}
By default, truncate (overwrite) output files, if they exist.
If 'append', attempt to create new dataset in existing files.
compression : str (Optional)
(None | "snappy" | "gzip" | "brotli" | "zstd" | "lz4")
Sets the compression type used with Parquet files
Returns
-------
string message indicating result of save operation
Raises
------
RuntimeError
Raised if a server-side error is thrown saving the pdarray
Notes
-----
- The prefix_path must be visible to the arkouda server and the user must
have write permission.
- Output files have names of the form ``<prefix_path>_LOCALE<i>``, where ``<i>``
ranges from 0 to ``numLocales`` for `file_type='distribute'`.
- 'append' write mode is supported, but is not efficient.
- If any of the output files already exist and
the mode is 'truncate', they will be overwritten. If the mode is 'append'
and the number of output files is less than the number of locales or a
dataset with the same name already exists, a ``RuntimeError`` will result.
- Any file extension can be used.The file I/O does not rely on the extension to
determine the file format.
"""
from arkouda.io import _mode_str_to_int
return cast(
str,
generic_msg(
"writeParquet",
{
"values": self.entry,
"dset": dataset,
"mode": _mode_str_to_int(mode),
"prefix": prefix_path,
"objType": "strings",
"dtype": self.dtype,
"compression": compression,
},
),
)
[docs]
def to_hdf(
self,
prefix_path: str,
dataset: str = "strings_array",
mode: str = "truncate",
save_offsets: bool = True,
file_type: str = "distribute",
) -> str:
"""
Save the Strings object to HDF5.
The object can be saved to a collection of files or single file.
Parameters
----------
prefix_path : str
Directory and filename prefix that all output files share
dataset : str
The name of the Strings dataset to be written, defaults to strings_array
mode : str {'truncate' | 'append'}
By default, truncate (overwrite) output files, if they exist.
If 'append', create a new Strings dataset within existing files.
save_offsets : bool
Defaults to True which will instruct the server to save the offsets array to HDF5
If False the offsets array will not be save and will be derived from the string values
upon load/read.
file_type: str ("single" | "distribute")
Default: Distribute
Distribute the dataset over a file per locale.
Single file will save the dataset to one file
Returns
-------
String message indicating result of save operation
Raises
------
RuntimeError
Raised if a server-side error is thrown saving the pdarray
Notes
-----
- Parquet files do not store the segments, only the values.
- Strings state is saved as two datasets within an hdf5 group:
one for the string characters and one for the
segments corresponding to the start of each string
- the hdf5 group is named via the dataset parameter.
- The prefix_path must be visible to the arkouda server and the user must
have write permission.
- Output files have names of the form ``<prefix_path>_LOCALE<i>``, where ``<i>``
ranges from 0 to ``numLocales`` for `file_type='distribute'`. Otherwise,
the file name will be `prefix_path`.
- If any of the output files already exist and
the mode is 'truncate', they will be overwritten. If the mode is 'append'
and the number of output files is less than the number of locales or a
dataset with the same name already exists, a ``RuntimeError`` will result.
- Any file extension can be used.The file I/O does not rely on the extension to
determine the file format.
See Also
---------
to_hdf
"""
from arkouda.io import _file_type_to_int, _mode_str_to_int
return cast(
str,
generic_msg(
"tohdf",
{
"values": self.entry,
"dset": dataset,
"write_mode": _mode_str_to_int(mode),
"filename": prefix_path,
"dtype": self.dtype,
"save_offsets": save_offsets,
"objType": "strings",
"file_format": _file_type_to_int(file_type),
},
),
)
[docs]
def update_hdf(
self,
prefix_path: str,
dataset: str = "strings_array",
save_offsets: bool = True,
repack: bool = True,
):
"""
Overwrite the dataset with the name provided with this Strings object. If
the dataset does not exist it is added
Parameters
-----------
prefix_path : str
Directory and filename prefix that all output files share
dataset : str
Name of the dataset to create in files
save_offsets : bool
Defaults to True which will instruct the server to save the offsets array to HDF5
If False the offsets array will not be save and will be derived from the string values
upon load/read.
repack: bool
Default: True
HDF5 does not release memory on delete. When True, the inaccessible
data (that was overwritten) is removed. When False, the data remains, but is
inaccessible. Setting to false will yield better performance, but will cause
file sizes to expand.
Returns
--------
str - success message if successful
Raises
-------
RuntimeError
Raised if a server-side error is thrown saving the Strings object
Notes
------
- If file does not contain File_Format attribute to indicate how it was saved,
the file name is checked for _LOCALE#### to determine if it is distributed.
- If the dataset provided does not exist, it will be added
"""
from arkouda.io import (
_file_type_to_int,
_get_hdf_filetype,
_mode_str_to_int,
_repack_hdf,
)
# determine the format (single/distribute) that the file was saved in
file_type = _get_hdf_filetype(prefix_path + "*")
generic_msg(
cmd="tohdf",
args={
"values": self,
"dset": dataset,
"write_mode": _mode_str_to_int("append"),
"filename": prefix_path,
"dtype": self.dtype,
"save_offsets": save_offsets,
"objType": "strings",
"file_format": _file_type_to_int(file_type),
"overwrite": True,
},
)
if repack:
_repack_hdf(prefix_path)
[docs]
@typechecked
def to_csv(
self,
prefix_path: str,
dataset: str = "strings_array",
col_delim: str = ",",
overwrite: bool = False,
):
"""
Write Strings to CSV file(s). File will contain a single column with the Strings data.
All CSV Files written by Arkouda include a header denoting data types of the columns.
Unlike other file formats, CSV files store Strings as their UTF-8 format instead of storing
bytes as uint(8).
Parameters
-----------
prefix_path: str
The filename prefix to be used for saving files. Files will have _LOCALE#### appended
when they are written to disk.
dataset: str
Column name to save the Strings under. Defaults to "strings_array".
col_delim: str
Defaults to ",". Value to be used to separate columns within the file.
Please be sure that the value used DOES NOT appear in your dataset.
overwrite: bool
Defaults to False. If True, any existing files matching your provided prefix_path will
be overwritten. If False, an error will be returned if existing files are found.
Returns
--------
str reponse message
Raises
------
ValueError
Raised if all datasets are not present in all parquet files or if one or
more of the specified files do not exist
RuntimeError
Raised if one or more of the specified files cannot be opened.
If `allow_errors` is true this may be raised if no values are returned
from the server.
TypeError
Raised if we receive an unknown arkouda_type returned from the server
Notes
------
- CSV format is not currently supported by load/load_all operations
- The column delimiter is expected to be the same for column names and data
- Be sure that column delimiters are not found within your data.
- All CSV files must delimit rows using newline (``\\n``) at this time.
"""
return cast(
str,
generic_msg(
cmd="writecsv",
args={
"datasets": [self],
"col_names": [dataset],
"filename": prefix_path,
"num_dsets": 1,
"col_delim": col_delim,
"dtypes": [self.dtype.name],
"row_count": self.size,
"overwrite": overwrite,
},
),
)
[docs]
@typechecked
def save(
self,
prefix_path: str,
dataset: str = "strings_array",
mode: str = "truncate",
save_offsets: bool = True,
compression: Optional[str] = None,
file_format: str = "HDF5",
file_type: str = "distribute",
) -> str:
"""
DEPRECATED
Save the Strings object to HDF5 or Parquet. The result is a collection of files,
one file per locale of the arkouda server, where each filename starts
with prefix_path. HDF5 support single files, in which case the file name will
only be that provided. Each locale saves its chunk of the array to its
corresponding file.
Parameters
----------
prefix_path : str
Directory and filename prefix that all output files share
dataset : str
The name of the Strings dataset to be written, defaults to strings_array
mode : str {'truncate' | 'append'}
By default, truncate (overwrite) output files, if they exist.
If 'append', create a new Strings dataset within existing files.
save_offsets : bool
Defaults to True which will instruct the server to save the offsets array to HDF5
If False the offsets array will not be save and will be derived from the string values
upon load/read. This is not supported for Parquet files.
compression : str (Optional)
(None | "snappy" | "gzip" | "brotli" | "zstd" | "lz4")
Sets the compression type used with Parquet files
file_format : str
By default, saved files will be written to the HDF5 file format. If
'Parquet', the files will be written to the Parquet file format. This
is case insensitive.
file_type: str ("single" | "distribute")
Default: Distribute
Distribute the dataset over a file per locale.
Single file will save the dataset to one file
Returns
-------
String message indicating result of save operation
Notes
-----
Important implementation notes: (1) Strings state is saved as two datasets
within an hdf5 group: one for the string characters and one for the
segments corresponding to the start of each string, (2) the hdf5 group is named
via the dataset parameter. (3) Parquet files do not store the segments,
only the values.
"""
from warnings import warn
warn(
"ak.Strings.save has been deprecated. Please use ak.Strings.to_hdf or ak.Strings.to_parquet",
DeprecationWarning,
)
if mode.lower() not in ["append", "truncate"]:
raise ValueError("Allowed modes are 'truncate' and 'append'")
if file_format.lower() == "hdf5":
return self.to_hdf(
prefix_path, dataset=dataset, mode=mode, save_offsets=save_offsets, file_type=file_type
)
elif file_format.lower() == "parquet":
return self.to_parquet(prefix_path, dataset=dataset, mode=mode, compression=compression)
else:
raise ValueError("Valid file types are HDF5 or Parquet")
def _list_component_names(self) -> List[str]:
"""
Internal Function that returns a list of all component names
Parameters
----------
None
Returns
-------
List[str]
List of all component names
"""
return list(itertools.chain.from_iterable([self.entry._list_component_names()]))
[docs]
def info(self) -> str:
"""
Returns a JSON formatted string containing information about all components of self
Parameters
----------
None
Returns
-------
str
JSON string containing information about all components of self
"""
return information(self._list_component_names())
[docs]
def pretty_print_info(self) -> None:
"""
Prints information about all components of self in a human readable format
Parameters
----------
None
Returns
-------
None
"""
self.entry.pretty_print_info()
[docs]
@typechecked
def register(self, user_defined_name: str) -> Strings:
"""
Register this Strings object with a user defined name in the arkouda server
so it can be attached to later using Strings.attach()
This is an in-place operation, registering a Strings object more than once will
update the name in the registry and remove the previously registered name.
A name can only be registered to one object at a time.
Parameters
----------
user_defined_name : str
user defined name which the Strings object is to be registered under
Returns
-------
Strings
The same Strings object which is now registered with the arkouda server and
has an updated name.
This is an in-place modification, the original is returned to support a
fluid programming style.
Please note you cannot register two different objects with the same name.
Raises
------
TypeError
Raised if user_defined_name is not a str
RegistrationError
If the server was unable to register the Strings object with the user_defined_name
If the user is attempting to register more than one object with the same name,
the former should be unregistered first to free up the registration name.
See also
--------
attach, unregister
Notes
-----
Registered names/Strings objects in the server are immune to deletion
until they are unregistered.
"""
if self.registered_name is not None and self.is_registered():
raise RegistrationError(f"This object is already registered as {self.registered_name}")
generic_msg(
cmd="register",
args={
"name": user_defined_name,
"objType": self.objType,
"array": self.name,
},
)
self.registered_name = user_defined_name
return self
[docs]
def unregister(self) -> None:
"""
Unregister a Strings object in the arkouda server which was previously
registered using register() and/or attached to using attach()
Parameters
----------
Returns
-------
None
Raises
------
RuntimeError
Raised if the server could not find the internal name/symbol to remove
See also
--------
register, attach
Notes
-----
Registered names/Strings objects in the server are immune to deletion until
they are unregistered.
"""
from arkouda.util import unregister
if not self.registered_name:
raise RegistrationError("This object is not registered")
unregister(self.registered_name)
self.registered_name = None
[docs]
def is_registered(self) -> np.bool_:
"""
Return True iff the object is contained in the registry
Parameters
----------
None
Returns
-------
bool
Indicates if the object is contained in the registry
Raises
------
RuntimeError
Raised if there's a server-side error thrown
"""
from arkouda.util import is_registered
if self.registered_name is None:
return np.bool_(is_registered(self.name, as_component=True))
else:
return np.bool_(is_registered(self.registered_name))
[docs]
@staticmethod
@typechecked
def attach(user_defined_name: str) -> Strings:
"""
class method to return a Strings object attached to the registered name in the arkouda
server which was registered using register()
Parameters
----------
user_defined_name : str
user defined name which the Strings object was registered under
Returns
-------
Strings object
the Strings object registered with user_defined_name in the arkouda server
Raises
------
TypeError
Raised if user_defined_name is not a str
See also
--------
register, unregister
Notes
-----
Registered names/Strings objects in the server are immune to deletion
until they are unregistered.
"""
import warnings
from arkouda.util import attach
warnings.warn(
"ak.Strings.attach() is deprecated. Please use ak.attach() instead.",
DeprecationWarning,
)
return attach(user_defined_name)
[docs]
@staticmethod
@typechecked
def unregister_strings_by_name(user_defined_name: str) -> None:
"""
Unregister a Strings object in the arkouda server previously registered via register()
Parameters
----------
user_defined_name : str
The registered name of the Strings object
See also
--------
register, unregister, attach, is_registered
"""
import warnings
from arkouda.util import unregister
warnings.warn(
"ak.Strings.unregister_segarray_by_name() is deprecated. "
"Please use ak.unregister() instead.",
DeprecationWarning,
)
return unregister(user_defined_name)
[docs]
def transfer(self, hostname: str, port: int_scalars):
"""
Sends a Strings object to a different Arkouda server
Parameters
----------
hostname : str
The hostname where the Arkouda server intended to
receive the Strings object is running.
port : int_scalars
The port to send the array over. This needs to be an
open port (i.e., not one that the Arkouda server is
running on). This will open up `numLocales` ports,
each of which in succession, so will use ports of the
range {port..(port+numLocales)} (e.g., running an
Arkouda server of 4 nodes, port 1234 is passed as
`port`, Arkouda will use ports 1234, 1235, 1236,
and 1237 to send the array data).
This port much match the port passed to the call to
`ak.receive_array()`.
Returns
-------
A message indicating a complete transfer
Raises
------
ValueError
Raised if the op is not within the pdarray.BinOps set
TypeError
Raised if other is not a pdarray or the pdarray.dtype is not
a supported dtype
"""
# hostname is the hostname to send to
return generic_msg(
cmd="sendArray",
args={"values": self.entry, "hostname": hostname, "port": port, "objType": "strings"},
)