Source code for arkouda.pdarraycreation

import itertools
from typing import Iterable, List, Optional, Tuple, Union, cast

import numpy as np  # type: ignore
import pandas as pd  # type: ignore
from typeguard import typechecked

from arkouda.client import generic_msg
from arkouda.dtypes import (
    NUMBER_FORMAT_STRINGS,
    BigInt,
    DTypes,
    NumericDTypes,
    SeriesDTypes,
    bigint,
)
from arkouda.dtypes import dtype as akdtype
from arkouda.dtypes import float64, get_byteorder, get_server_byteorder
from arkouda.dtypes import int64 as akint64
from arkouda.dtypes import (
    int_scalars,
    isSupportedInt,
    isSupportedNumber,
    numeric_scalars,
    resolve_scalar_dtype,
)
from arkouda.dtypes import uint64 as akuint64
from arkouda.pdarrayclass import create_pdarray, pdarray
from arkouda.strings import Strings

__all__ = [
    "array",
    "zeros",
    "ones",
    "full",
    "zeros_like",
    "ones_like",
    "full_like",
    "arange",
    "linspace",
    "randint",
    "uniform",
    "standard_normal",
    "random_strings_uniform",
    "random_strings_lognormal",
    "from_series",
    "bigint_from_uint_arrays",
]


[docs] @typechecked def from_series(series: pd.Series, dtype: Optional[Union[type, str]] = None) -> Union[pdarray, Strings]: """ Converts a Pandas Series to an Arkouda pdarray or Strings object. If dtype is None, the dtype is inferred from the Pandas Series. Otherwise, the dtype parameter is set if the dtype of the Pandas Series is to be overridden or is unknown (for example, in situations where the Series dtype is object). Parameters ---------- series : Pandas Series The Pandas Series with a dtype of bool, float64, int64, or string dtype : Optional[type] The valid dtype types are np.bool, np.float64, np.int64, and np.str Returns ------- Union[pdarray,Strings] Raises ------ TypeError Raised if series is not a Pandas Series object ValueError Raised if the Series dtype is not bool, float64, int64, string, datetime, or timedelta Examples -------- >>> ak.from_series(pd.Series(np.random.randint(0,10,5))) array([9, 0, 4, 7, 9]) >>> ak.from_series(pd.Series(['1', '2', '3', '4', '5']),dtype=np.int64) array([1, 2, 3, 4, 5]) >>> ak.from_series(pd.Series(np.random.uniform(low=0.0,high=1.0,size=3))) array([0.57600036956445599, 0.41619265571741659, 0.6615356693784662]) >>> ak.from_series(pd.Series(['0.57600036956445599', '0.41619265571741659', '0.6615356693784662']), dtype=np.float64) array([0.57600036956445599, 0.41619265571741659, 0.6615356693784662]) >>> ak.from_series(pd.Series(np.random.choice([True, False],size=5))) array([True, False, True, True, True]) >>> ak.from_series(pd.Series(['True', 'False', 'False', 'True', 'True']), dtype=np.bool) array([True, True, True, True, True]) >>> ak.from_series(pd.Series(['a', 'b', 'c', 'd', 'e'], dtype="string")) array(['a', 'b', 'c', 'd', 'e']) >>> ak.from_series(pd.Series(['a', 'b', 'c', 'd', 'e']),dtype=np.str) array(['a', 'b', 'c', 'd', 'e']) >>> ak.from_series(pd.Series(pd.to_datetime(['1/1/2018', np.datetime64('2018-01-01')]))) array([1514764800000000000, 1514764800000000000]) Notes ----- The supported datatypes are bool, float64, int64, string, and datetime64[ns]. The data type is either inferred from the the Series or is set via the dtype parameter. Series of datetime or timedelta are converted to Arkouda arrays of dtype int64 (nanoseconds) A Pandas Series containing strings has a dtype of object. Arkouda assumes the Series contains strings and sets the dtype to str """ if not dtype: dt = series.dtype.name else: dt = str(dtype) try: """ If the Series has a object dtype, set dtype to string to comply with method signature that does not require a dtype; this is required because Pandas can infer non-str dtypes from the input np or Python array. """ if dt == "object": dt = "string" n_array = series.to_numpy(dtype=SeriesDTypes[dt]) except KeyError: raise ValueError( f"dtype {dt} is unsupported. Supported dtypes are bool, float64, int64, string, " f"datetime64[ns], and timedelta64[ns]" ) return array(n_array)
[docs] def array( a: Union[pdarray, np.ndarray, Iterable], dtype: Union[np.dtype, type, str, None] = None, max_bits: int = -1, ) -> Union[pdarray, Strings]: """ Convert a Python or Numpy Iterable to a pdarray or Strings object, sending the corresponding data to the arkouda server. Parameters ---------- a: Union[pdarray, np.ndarray] Rank-1 array of a supported dtype dtype: np.dtype, type, or str The target dtype to cast values to max_bits: int Specifies the maximum number of bits; only used for bigint pdarrays Returns ------- pdarray or Strings A pdarray instance stored on arkouda server or Strings instance, which is composed of two pdarrays stored on arkouda server Raises ------ TypeError Raised if a is not a pdarray, np.ndarray, or Python Iterable such as a list, array, tuple, or deque RuntimeError Raised if a is not one-dimensional, nbytes > maxTransferBytes, a.dtype is not supported (not in DTypes), or if the product of a size and a.itemsize > maxTransferBytes ValueError Raised if the returned message is malformed or does not contain the fields required to generate the array. See Also -------- pdarray.to_ndarray Notes ----- The number of bytes in the input array cannot exceed `ak.client.maxTransferBytes`, otherwise a RuntimeError will be raised. This is to protect the user from overwhelming the connection between the Python client and the arkouda server, under the assumption that it is a low-bandwidth connection. The user may override this limit by setting ak.client.maxTransferBytes to a larger value, but should proceed with caution. If the pdrray or ndarray is of type U, this method is called twice recursively to create the Strings object and the two corresponding pdarrays for string bytes and offsets, respectively. Examples -------- >>> ak.array(np.arange(1,10)) array([1, 2, 3, 4, 5, 6, 7, 8, 9]) >>> ak.array(range(1,10)) array([1, 2, 3, 4, 5, 6, 7, 8, 9]) >>> strings = ak.array([f'string {i}' for i in range(0,5)]) >>> type(strings) <class 'arkouda.strings.Strings'> """ # Removed @typechecked to prevent cyclic dependencies with ArrayView from arkouda.numeric import cast as akcast # If a is already a pdarray, do nothing if isinstance(a, pdarray): casted = a if dtype is None else akcast(a, dtype) if dtype == bigint and max_bits != -1: casted.max_bits = max_bits return casted from arkouda.client import maxTransferBytes # If a is not already a numpy.ndarray, convert it if not isinstance(a, np.ndarray): try: if dtype is not None and dtype != bigint: # if the user specified dtype, use that dtype a = np.array(a, dtype=dtype) elif all(isSupportedInt(i) for i in a) and any(2**64 > i > 2**63 for i in a): # all supportedInt values but some >2**63, default to uint (see #1297) # iterating shouldn't be too expensive since # this is after the `isinstance(a, pdarray)` check a = np.array(a, dtype=np.uint) else: # let numpy decide the type a = np.array(a) except (RuntimeError, TypeError, ValueError): raise TypeError("a must be a pdarray, np.ndarray, or convertible to a numpy array") # Return multi-dimensional arrayview if a.ndim != 1: # TODO add order if a.dtype.name in NumericDTypes: flat_a = array(a.flatten(), dtype=dtype) if isinstance(flat_a, pdarray): # break into parts so mypy doesn't think we're calling reshape on a Strings return flat_a.reshape(a.shape) else: raise TypeError("Must be an iterable or have a numeric DType") # Check if array of strings # if a.dtype == numpy.object_ need to check first element if "U" in a.dtype.kind or (a.dtype == np.object_ and isinstance(a[0], str)): # encode each string and add a null byte terminator encoded = [i for i in itertools.chain.from_iterable(map(lambda x: x.encode() + b"\x00", a))] nbytes = len(encoded) if nbytes > maxTransferBytes: raise RuntimeError( f"Creating pdarray would require transferring {nbytes} bytes, which exceeds " f"allowed transfer size. Increase ak.client.maxTransferBytes to force." ) encoded_np = np.array(encoded, dtype=np.uint8) rep_msg = generic_msg( cmd="array1D", args={"dtype": encoded_np.dtype.name, "shape": encoded_np.size, "seg_string": True}, payload=_array_memview(encoded_np), send_binary=True, ) parts = cast(str, rep_msg).split("+", maxsplit=3) return ( Strings.from_parts(parts[0], parts[1]) if dtype is None else akcast(Strings.from_parts(parts[0], parts[1]), dtype) ) # If not strings, then check that dtype is supported in arkouda if dtype == bigint or a.dtype.name not in DTypes: # 2 situations result in attempting to call `bigint_from_uint_arrays` # 1. user specified i.e. dtype=ak.bigint # 2. too big to fit into other numpy types (dtype = object) try: # attempt to break bigint into multiple uint64 arrays uint_arrays: List[Union[pdarray, Strings]] = [] # early out if we would have more uint arrays than can fit in max_bits early_out = (max_bits // 64) + (max_bits % 64 != 0) if max_bits != -1 else float("inf") while any(a != 0) and len(uint_arrays) < early_out: low, a = a % 2**64, a // 2**64 uint_arrays.append(array(np.array(low, dtype=np.uint), dtype=akuint64)) return bigint_from_uint_arrays(uint_arrays[::-1], max_bits=max_bits) except TypeError: raise RuntimeError(f"Unhandled dtype {a.dtype}") else: # Do not allow arrays that are too large size = a.size if (size * a.itemsize) > maxTransferBytes: raise RuntimeError( "Array exceeds allowed transfer size. Increase ak.client.maxTransferBytes to allow" ) # Pack binary array data into a bytes object with a command header # including the dtype and size. If the server has a different byteorder # than our numpy array we need to swap to match since the server expects # native endian bytes aview = _array_memview(a) rep_msg = generic_msg( cmd="array1D", args={"dtype": a.dtype.name, "shape": size, "seg_string": False}, payload=aview, send_binary=True, ) return create_pdarray(rep_msg) if dtype is None else akcast(create_pdarray(rep_msg), dtype)
def _array_memview(a) -> memoryview: if (get_byteorder(a.dtype) == "<" and get_server_byteorder() == "big") or ( get_byteorder(a.dtype) == ">" and get_server_byteorder() == "little" ): return memoryview(a.byteswap()) else: return memoryview(a)
[docs] def bigint_from_uint_arrays(arrays, max_bits=-1): """ Create a bigint pdarray from an iterable of uint pdarrays. The first item in arrays will be the highest 64 bits and the last item will be the lowest 64 bits. Parameters ---------- arrays : Sequence[pdarray] An iterable of uint pdarrays used to construct the bigint pdarray. The first item in arrays will be the highest 64 bits and the last item will be the lowest 64 bits. max_bits : int Specifies the maximum number of bits; only used for bigint pdarrays Returns ------- pdarray bigint pdarray constructed from uint arrays Raises ------ TypeError Raised if any pdarray in arrays has a dtype other than uint or if the pdarrays are not the same size. RuntimeError Raised if there is a server-side error thrown See Also -------- pdarray.bigint_to_uint_arrays Examples -------- >>> a = ak.bigint_from_uint_arrays([ak.ones(5, dtype=ak.uint64), ak.arange(5, dtype=ak.uint64)]) >>> a array(["18446744073709551616" "18446744073709551617" "18446744073709551618" "18446744073709551619" "18446744073709551620"]) >>> a.dtype dtype(bigint) >>> all(a[i] == 2**64 + i for i in range(5)) True """ if not all(isinstance(a, pdarray) and a.dtype == akuint64 for a in arrays): raise TypeError("Sequence must contain only uint pdarrays") if len({a.size for a in arrays}) != 1: raise TypeError("All pdarrays must be same size") if not isinstance(arrays, list): arrays = list(arrays) if max_bits != -1: # truncate if we have more uint arrays than can fit in max_bits max_num_arrays = (max_bits // 64) + (max_bits % 64 != 0) if len(arrays) > max_num_arrays: # only want max_num_arrays from the right (because those are the lowest bits) arrays = arrays[-max_num_arrays:] return create_pdarray( generic_msg( cmd="big_int_creation", args={ "arrays": arrays, "num_arrays": len(arrays), "len": arrays[0].size, "max_bits": max_bits, }, ) )
[docs] @typechecked def zeros( size: Union[int_scalars, str], dtype: Union[np.dtype, type, str, BigInt] = float64, max_bits: Optional[int] = None, ) -> pdarray: """ Create a pdarray filled with zeros. Parameters ---------- size : int_scalars Size of the array (only rank-1 arrays supported) dtype : all_scalars Type of resulting array, default float64 max_bits: int Specifies the maximum number of bits; only used for bigint pdarrays Returns ------- pdarray Zeros of the requested size and dtype Raises ------ TypeError Raised if the supplied dtype is not supported or if the size parameter is neither an int nor a str that is parseable to an int. See Also -------- ones, zeros_like Examples -------- >>> ak.zeros(5, dtype=ak.int64) array([0, 0, 0, 0, 0]) >>> ak.zeros(5, dtype=ak.float64) array([0, 0, 0, 0, 0]) >>> ak.zeros(5, dtype=ak.bool) array([False, False, False, False, False]) """ if not np.isscalar(size): raise TypeError(f"size must be a scalar, not {size.__class__.__name__}") dtype = akdtype(dtype) # normalize dtype dtype_name = dtype.name if isinstance(dtype, BigInt) else cast(np.dtype, dtype).name # check dtype for error if dtype_name not in NumericDTypes: raise TypeError(f"unsupported dtype {dtype}") repMsg = generic_msg(cmd="create1D", args={"dtype": dtype_name, "shape": size}) return create_pdarray(repMsg, max_bits=max_bits)
[docs] @typechecked def ones( size: Union[int_scalars, str], dtype: Union[np.dtype, type, str, BigInt] = float64, max_bits: Optional[int] = None, ) -> pdarray: """ Create a pdarray filled with ones. Parameters ---------- size : int_scalars Size of the array (only rank-1 arrays supported) dtype : Union[float64, int64, bool] Resulting array type, default float64 max_bits: int Specifies the maximum number of bits; only used for bigint pdarrays Returns ------- pdarray Ones of the requested size and dtype Raises ------ TypeError Raised if the supplied dtype is not supported or if the size parameter is neither an int nor a str that is parseable to an int. See Also -------- zeros, ones_like Examples -------- >>> ak.ones(5, dtype=ak.int64) array([1, 1, 1, 1, 1]) >>> ak.ones(5, dtype=ak.float64) array([1, 1, 1, 1, 1]) >>> ak.ones(5, dtype=ak.bool) array([True, True, True, True, True]) """ if not np.isscalar(size): raise TypeError(f"size must be a scalar, not {size.__class__.__name__}") dtype = akdtype(dtype) # normalize dtype dtype_name = dtype.name if isinstance(dtype, BigInt) else cast(np.dtype, dtype).name # check dtype for error if dtype_name not in NumericDTypes: raise TypeError(f"unsupported dtype {dtype}") repMsg = generic_msg(cmd="create1D", args={"dtype": dtype_name, "shape": size}) a = create_pdarray(repMsg) a.fill(1) if max_bits: a.max_bits = max_bits return a
[docs] @typechecked def full( size: Union[int_scalars, str], fill_value: Union[numeric_scalars, str], dtype: Union[np.dtype, type, str, BigInt] = float64, max_bits: Optional[int] = None, ) -> Union[pdarray, Strings]: """ Create a pdarray filled with fill_value. Parameters ---------- size: int_scalars Size of the array (only rank-1 arrays supported) fill_value: int_scalars Value with which the array will be filled dtype: all_scalars Resulting array type, default float64 max_bits: int Specifies the maximum number of bits; only used for bigint pdarrays Returns ------- pdarray or Strings array of the requested size and dtype filled with fill_value Raises ------ TypeError Raised if the supplied dtype is not supported or if the size parameter is neither an int nor a str that is parseable to an int. See Also -------- zeros, ones Examples -------- >>> ak.full(5, 7, dtype=ak.int64) array([7, 7, 7, 7, 7]) >>> ak.full(5, 9, dtype=ak.float64) array([9, 9, 9, 9, 9]) >>> ak.full(5, 5, dtype=ak.bool) array([True, True, True, True, True]) """ if not np.isscalar(size): raise TypeError(f"size must be a scalar, not {size.__class__.__name__}") if isinstance(fill_value, str): return _full_string(size, fill_value) dtype = akdtype(dtype) # normalize dtype dtype_name = dtype.name if isinstance(dtype, BigInt) else cast(np.dtype, dtype).name # check dtype for error if dtype_name not in NumericDTypes: raise TypeError(f"unsupported dtype {dtype}") repMsg = generic_msg(cmd="create1D", args={"dtype": dtype_name, "shape": size}) a = create_pdarray(repMsg) a.fill(fill_value) if max_bits: a.max_bits = max_bits return a
@typechecked def scalar_array(value: numeric_scalars): """ Create a pdarray from a single scalar value. Parameters ---------- value: numeric_scalars Value to create pdarray from Returns ------- pdarray pdarray with a single element """ return create_pdarray( generic_msg( cmd="create0D", args={ "dtype": resolve_scalar_dtype(value), "value": value, }, ) ) @typechecked def _full_string( size: Union[int_scalars, str], fill_value: str, ) -> Strings: """ Create a Strings object filled with fill_value. Parameters ---------- size: int_scalars Size of the array (only rank-1 arrays supported) fill_value: str Value with which the array will be filled Returns ------- Strings array of the requested size and dtype filled with fill_value """ repMsg = generic_msg(cmd="segmentedFull", args={"size": size, "fill_value": fill_value}) return Strings.from_return_msg(cast(str, repMsg))
[docs] @typechecked def zeros_like(pda: pdarray) -> pdarray: """ Create a zero-filled pdarray of the same size and dtype as an existing pdarray. Parameters ---------- pda : pdarray Array to use for size and dtype Returns ------- pdarray Equivalent to ak.zeros(pda.size, pda.dtype) Raises ------ TypeError Raised if the pda parameter is not a pdarray. See Also -------- zeros, ones_like Examples -------- >>> zeros = ak.zeros(5, dtype=ak.int64) >>> ak.zeros_like(zeros) array([0, 0, 0, 0, 0]) >>> zeros = ak.zeros(5, dtype=ak.float64) >>> ak.zeros_like(zeros) array([0, 0, 0, 0, 0]) >>> zeros = ak.zeros(5, dtype=ak.bool) >>> ak.zeros_like(zeros) array([False, False, False, False, False]) """ return zeros(pda.size, pda.dtype, pda.max_bits)
[docs] @typechecked def ones_like(pda: pdarray) -> pdarray: """ Create a one-filled pdarray of the same size and dtype as an existing pdarray. Parameters ---------- pda : pdarray Array to use for size and dtype Returns ------- pdarray Equivalent to ak.ones(pda.size, pda.dtype) Raises ------ TypeError Raised if the pda parameter is not a pdarray. See Also -------- ones, zeros_like Notes ----- Logic for generating the pdarray is delegated to the ak.ones method. Accordingly, the supported dtypes match are defined by the ak.ones method. Examples -------- >>> ones = ak.ones(5, dtype=ak.int64) >>> ak.ones_like(ones) array([1, 1, 1, 1, 1]) >>> ones = ak.ones(5, dtype=ak.float64) >>> ak.ones_like(ones) array([1, 1, 1, 1, 1]) >>> ones = ak.ones(5, dtype=ak.bool) >>> ak.ones_like(ones) array([True, True, True, True, True]) """ return ones(pda.size, pda.dtype, pda.max_bits)
[docs] @typechecked def full_like(pda: pdarray, fill_value: numeric_scalars) -> pdarray: """ Create a pdarray filled with fill_value of the same size and dtype as an existing pdarray. Parameters ---------- pda: pdarray Array to use for size and dtype fill_value: int_scalars Value with which the array will be filled Returns ------- pdarray Equivalent to ak.full(pda.size, fill_value, pda.dtype) Raises ------ TypeError Raised if the pda parameter is not a pdarray. See Also -------- ones_like, zeros_like Notes ----- Logic for generating the pdarray is delegated to the ak.full method. Accordingly, the supported dtypes match are defined by the ak.full method. Examples -------- >>> full = ak.full(5, 7, dtype=ak.int64) >>> ak.full_like(full) array([7, 7, 7, 7, 7]) >>> full = ak.full(5, 9, dtype=ak.float64) >>> ak.full_like(full) array([9, 9, 9, 9, 9]) >>> full = ak.full(5, 5, dtype=ak.bool) >>> ak.full_like(full) array([True, True, True, True, True]) """ return full(pda.size, fill_value, pda.dtype, pda.max_bits)
[docs] def arange(*args, **kwargs) -> pdarray: """ arange([start,] stop[, stride,] dtype=int64) Create a pdarray of consecutive integers within the interval [start, stop). If only one arg is given then arg is the stop parameter. If two args are given, then the first arg is start and second is stop. If three args are given, then the first arg is start, second is stop, third is stride. The return value is cast to type dtype Parameters ---------- start: int_scalars, optional Starting value (inclusive) stop: int_scalars Stopping value (exclusive) stride: int_scalars, optional The difference between consecutive elements, the default stride is 1, if stride is specified then start must also be specified. dtype: np.dtype, type, or str The target dtype to cast values to max_bits: int Specifies the maximum number of bits; only used for bigint pdarrays Returns ------- pdarray, dtype Integers from start (inclusive) to stop (exclusive) by stride Raises ------ TypeError Raised if start, stop, or stride is not an int object ZeroDivisionError Raised if stride == 0 See Also -------- linspace, zeros, ones, randint Notes ----- Negative strides result in decreasing values. Currently, only int64 pdarrays can be created with this method. For float64 arrays, use the linspace method. Examples -------- >>> ak.arange(0, 5, 1) array([0, 1, 2, 3, 4]) >>> ak.arange(5, 0, -1) array([5, 4, 3, 2, 1]) >>> ak.arange(0, 10, 2) array([0, 2, 4, 6, 8]) >>> ak.arange(-5, -10, -1) array([-5, -6, -7, -8, -9]) """ # if one arg is given then arg is stop if len(args) == 1: start = 0 stop = args[0] stride = 1 # if two args are given then first arg is start and second is stop if len(args) == 2: start = args[0] stop = args[1] stride = 1 # if three args are given then first arg is start, # second is stop, third is stride if len(args) == 3: start = args[0] stop = args[1] stride = args[2] if stride == 0: raise ZeroDivisionError("division by zero") dtype = akint64 if "dtype" not in kwargs.keys() else kwargs["dtype"] if isSupportedInt(start) and isSupportedInt(stop) and isSupportedInt(stride): arg_dtypes = [resolve_scalar_dtype(arg) for arg in (start, stop, stride)] max_bits = -1 if "max_bits" not in kwargs.keys() else kwargs["max_bits"] arg_dtype = "int64" if dtype in ["bigint", bigint] or "bigint" in arg_dtypes or max_bits != -1: arg_dtype = "bigint" elif "uint64" in arg_dtypes: arg_dtype = "uint64" if stride < 0: stop = stop + 2 repMsg = generic_msg( cmd="arange", args={"start": start, "stop": stop, "stride": stride, "dtype": arg_dtype} ) return ( create_pdarray(repMsg, max_bits=max_bits) if dtype == akint64 else array(create_pdarray(repMsg), max_bits=max_bits, dtype=dtype) ) else: raise TypeError( f"start,stop,stride must be type int, np.int64, or np.uint64 {start} {stop} {stride}" )
[docs] @typechecked def linspace(start: numeric_scalars, stop: numeric_scalars, length: int_scalars) -> pdarray: """ Create a pdarray of linearly-spaced floats in a closed interval. Parameters ---------- start : numeric_scalars Start of interval (inclusive) stop : numeric_scalars End of interval (inclusive) length : int_scalars Number of points Returns ------- pdarray, float64 Array of evenly spaced float values along the interval Raises ------ TypeError Raised if start or stop is not a float or int or if length is not an int See Also -------- arange Notes ----- If that start is greater than stop, the pdarray values are generated in descending order. Examples -------- >>> ak.linspace(0, 1, 5) array([0, 0.25, 0.5, 0.75, 1]) >>> ak.linspace(start=1, stop=0, length=5) array([1, 0.75, 0.5, 0.25, 0]) >>> ak.linspace(start=-5, stop=0, length=5) array([-5, -3.75, -2.5, -1.25, 0]) """ if not isSupportedNumber(start) or not isSupportedNumber(stop): raise TypeError("both start and stop must be an int, np.int64, float, or np.float64") if not isSupportedNumber(length): raise TypeError("length must be an int or int64") repMsg = generic_msg(cmd="linspace", args={"start": start, "stop": stop, "len": length}) return create_pdarray(repMsg)
[docs] @typechecked def randint( low: numeric_scalars, high: numeric_scalars, size: Union[int_scalars, Tuple[int_scalars, ...]] = 1, dtype=akint64, seed: Optional[int_scalars] = None, ) -> pdarray: """ Generate a pdarray of randomized int, float, or bool values in a specified range bounded by the low and high parameters. Parameters ---------- low : numeric_scalars The low value (inclusive) of the range high : numeric_scalars The high value (exclusive for int, inclusive for float) of the range size : int_scalars The length of the returned array dtype : Union[int64, float64, bool] The dtype of the array seed : int_scalars, optional Index for where to pull the first returned value Returns ------- pdarray Values drawn uniformly from the specified range having the desired dtype Raises ------ TypeError Raised if dtype.name not in DTypes, size is not an int, low or high is not an int or float, or seed is not an int ValueError Raised if size < 0 or if high < low Notes ----- Calling randint with dtype=float64 will result in uniform non-integral floating point values. Ranges >= 2**64 in size is undefined behavior because it exceeds the maximum value that can be stored on the server (uint64) Examples -------- >>> ak.randint(0, 10, 5) array([5, 7, 4, 8, 3]) >>> ak.randint(0, 1, 3, dtype=ak.float64) array([0.92176432277231968, 0.083130710959903542, 0.68894208386667544]) >>> ak.randint(0, 1, 5, dtype=ak.bool) array([True, False, True, True, True]) >>> ak.randint(1, 5, 10, seed=2) array([4, 3, 1, 3, 4, 4, 2, 4, 3, 2]) >>> ak.randint(1, 5, 3, dtype=ak.float64, seed=2) array([2.9160772326374946, 4.353429832157099, 4.5392023718621486]) >>> ak.randint(1, 5, 10, dtype=ak.bool, seed=2) array([False, True, True, True, True, False, True, True, True, True]) """ from arkouda.random import randint return randint(low=low, high=high, size=size, dtype=dtype, seed=seed)
[docs] @typechecked def uniform( size: int_scalars, low: numeric_scalars = float(0.0), high: numeric_scalars = 1.0, seed: Union[None, int_scalars] = None, ) -> pdarray: """ Generate a pdarray with uniformly distributed random float values in a specified range. Parameters ---------- low : float_scalars The low value (inclusive) of the range, defaults to 0.0 high : float_scalars The high value (inclusive) of the range, defaults to 1.0 size : int_scalars The length of the returned array seed : int_scalars, optional Value used to initialize the random number generator Returns ------- pdarray, float64 Values drawn uniformly from the specified range Raises ------ TypeError Raised if dtype.name not in DTypes, size is not an int, or if either low or high is not an int or float ValueError Raised if size < 0 or if high < low Notes ----- The logic for uniform is delegated to the ak.randint method which is invoked with a dtype of float64 Examples -------- >>> ak.uniform(3) array([0.92176432277231968, 0.083130710959903542, 0.68894208386667544]) >>> ak.uniform(size=3,low=0,high=5,seed=0) array([0.30013431967121934, 0.47383036230759112, 1.0441791878997098]) """ return randint(low=low, high=high, size=size, dtype="float64", seed=seed)
[docs] @typechecked def standard_normal(size: int_scalars, seed: Union[None, int_scalars] = None) -> pdarray: """ Draw real numbers from the standard normal distribution. Parameters ---------- size : int_scalars The number of samples to draw (size of the returned array) seed : int_scalars Value used to initialize the random number generator Returns ------- pdarray, float64 The array of random numbers Raises ------ TypeError Raised if size is not an int ValueError Raised if size < 0 See Also -------- randint Notes ----- For random samples from :math:`N(\\mu, \\sigma^2)`, use: ``(sigma * standard_normal(size)) + mu`` Examples -------- >>> ak.standard_normal(3,1) array([-0.68586185091150265, 1.1723810583573375, 0.567584107142031]) """ from arkouda.random import standard_normal return standard_normal(size=size, seed=seed)
[docs] @typechecked def random_strings_uniform( minlen: int_scalars, maxlen: int_scalars, size: int_scalars, characters: str = "uppercase", seed: Union[None, int_scalars] = None, ) -> Strings: """ Generate random strings with lengths uniformly distributed between minlen and maxlen, and with characters drawn from a specified set. Parameters ---------- minlen : int_scalars The minimum allowed length of string maxlen : int_scalars The maximum allowed length of string size : int_scalars The number of strings to generate characters : (uppercase, lowercase, numeric, printable, binary) The set of characters to draw from seed : Union[None, int_scalars], optional Value used to initialize the random number generator Returns ------- Strings The array of random strings Raises ------ ValueError Raised if minlen < 0, maxlen < minlen, or size < 0 See Also -------- random_strings_lognormal, randint Examples -------- >>> ak.random_strings_uniform(minlen=1, maxlen=5, seed=1, size=5) array(['TVKJ', 'EWAB', 'CO', 'HFMD', 'U']) >>> ak.random_strings_uniform(minlen=1, maxlen=5, seed=1, size=5, ... characters='printable') array(['+5"f', '-P]3', '4k', '~HFF', 'F']) """ if minlen < 0 or maxlen <= minlen or size < 0: raise ValueError("Incompatible arguments: minlen < 0, maxlen " + "<= minlen, or size < 0") repMsg = generic_msg( cmd="randomStrings", args={ "size": NUMBER_FORMAT_STRINGS["int64"].format(size), "dist": "uniform", "chars": characters, "arg1": NUMBER_FORMAT_STRINGS["int64"].format(minlen), "arg2": NUMBER_FORMAT_STRINGS["int64"].format(maxlen), "seed": seed, }, ) return Strings.from_return_msg(cast(str, repMsg))
[docs] @typechecked def random_strings_lognormal( logmean: numeric_scalars, logstd: numeric_scalars, size: int_scalars, characters: str = "uppercase", seed: Optional[int_scalars] = None, ) -> Strings: """ Generate random strings with log-normally distributed lengths and with characters drawn from a specified set. Parameters ---------- logmean : numeric_scalars The log-mean of the length distribution logstd : numeric_scalars The log-standard-deviation of the length distribution size : int_scalars The number of strings to generate characters : (uppercase, lowercase, numeric, printable, binary) The set of characters to draw from seed : int_scalars, optional Value used to initialize the random number generator Returns ------- Strings The Strings object encapsulating a pdarray of random strings Raises ------ TypeError Raised if logmean is neither a float nor a int, logstd is not a float, size is not an int, or if characters is not a str ValueError Raised if logstd <= 0 or size < 0 See Also -------- random_strings_lognormal, randint Notes ----- The lengths of the generated strings are distributed $Lognormal(\\mu, \\sigma^2)$, with :math:`\\mu = logmean` and :math:`\\sigma = logstd`. Thus, the strings will have an average length of :math:`exp(\\mu + 0.5*\\sigma^2)`, a minimum length of zero, and a heavy tail towards longer strings. Examples -------- >>> ak.random_strings_lognormal(2, 0.25, 5, seed=1) array(['TVKJTE', 'ABOCORHFM', 'LUDMMGTB', 'KWOQNPHZ', 'VSXRRL']) >>> ak.random_strings_lognormal(2, 0.25, 5, seed=1, characters='printable') array(['+5"fp-', ']3Q4kC~HF', '=F=`,IE!', 'DjkBa'9(', '5oZ1)=']) """ if not isSupportedNumber(logmean) or not isSupportedNumber(logstd): raise TypeError("both logmean and logstd must be an int, np.int64, float, or np.float64") if logstd <= 0 or size < 0: raise ValueError("Incompatible arguments: logstd <= 0 or size < 0") repMsg = generic_msg( cmd="randomStrings", args={ "size": NUMBER_FORMAT_STRINGS["int64"].format(size), "dist": "lognormal", "chars": characters, "arg1": NUMBER_FORMAT_STRINGS["float64"].format(logmean), "arg2": NUMBER_FORMAT_STRINGS["float64"].format(logstd), "seed": seed, }, ) return Strings.from_return_msg(cast(str, repMsg))