from __future__ import annotations
import numpy as np
import pandas as pd
from numpy import bool_, floating, integer, str_
from arkouda import (
Categorical,
DataFrame,
Index,
MultiIndex,
SegArray,
Series,
Strings,
array,
pdarray,
)
from arkouda.testing import (
assert_almost_equal,
assert_arkouda_array_equal,
assert_frame_equal,
assert_index_equal,
assert_series_equal,
)
DEBUG = True
__all__ = [
"assert_almost_equivalent",
"assert_arkouda_array_equivalent",
"assert_equivalent",
"assert_frame_equivalent",
"assert_index_equivalent",
"assert_series_equivalent",
]
def _convert_to_arkouda(obj):
"""
Convert a NumPy or pandas object to an Arkouda object.
This function attempts to convert a supported NumPy or pandas object
(including arrays, Series, DataFrames, Index types, or categoricals)
into an Arkouda-compatible equivalent.
Parameters
----------
obj : object
A NumPy or pandas object to convert. Must be one of the supported types
including np.ndarray, pd.Series, pd.DataFrame, pd.Index, pd.Categorical,
or their Arkouda equivalents.
Returns
-------
object
An Arkouda object of the same logical structure as the input.
Raises
------
TypeError
If the input object is not a recognized Arkouda, NumPy, or pandas type.
Examples
--------
>>> import pandas as pd
>>> import arkouda as ak
>>> from arkouda.testing._equivalence_asserters import _convert_to_arkouda
>>> _convert_to_arkouda(pd.Series([1, 2, 3]))
0 1
1 2
2 3
dtype: int64
"""
if isinstance(
obj,
(
DataFrame,
Series,
Index,
MultiIndex,
SegArray,
Categorical,
Strings,
pdarray,
str_,
integer,
floating,
bool_,
bool,
float,
),
):
return obj
if not isinstance(
obj,
(pd.MultiIndex, pd.Index, pd.Series, pd.DataFrame, pd.Categorical, np.ndarray),
):
raise TypeError(f"obj must be an arkouda, numpy or pandas object, but was type: {type(obj)}")
if isinstance(obj, pd.MultiIndex):
return MultiIndex(obj)
elif isinstance(obj, pd.Index):
return Index(obj)
elif isinstance(obj, pd.Series):
return Series(obj)
elif isinstance(obj, pd.DataFrame):
return DataFrame(obj)
elif isinstance(obj, pd.Categorical):
return Categorical(obj)
elif isinstance(obj, np.ndarray):
return array(
obj if obj.flags.c_contiguous else np.ascontiguousarray(obj)
) # required for some multi-dim cases
return None
[docs]
def assert_almost_equivalent(
left,
right,
rtol: float = 1.0e-5,
atol: float = 1.0e-8,
) -> None:
"""
Check that two objects are approximately equal.
By approximately equal, we refer to objects that are numbers or that
contain numbers which may be equivalent to specific levels of precision.
If the objects are pandas or numpy objects, they are converted to Arkouda objects.
Then assert_almost_equal is applied to the result.
Parameters
----------
left : object
First object to compare.
right : object
Second object to compare.
rtol : float
Relative tolerance. Default is 1e-5.
atol : float
Absolute tolerance. Default is 1e-8.
Raises
------
TypeError
If either input is not a supported numeric-like type.
Warning
-------
This function cannot be used on pdarrays of size > ak.client.maxTransferBytes
because it converts pdarrays to numpy arrays and calls np.allclose.
See Also
--------
assert_almost_equal
Examples
--------
>>> import arkouda as ak
>>> from arkouda.testing import assert_almost_equivalent
>>> assert_almost_equivalent(0.123456, 0.123457, rtol=1e-4)
"""
__tracebackhide__ = not DEBUG
assert_almost_equal(
_convert_to_arkouda(left),
_convert_to_arkouda(right),
rtol=rtol,
atol=atol,
)
[docs]
def assert_index_equivalent(
left: Index | pd.Index,
right: Index | pd.Index,
exact: bool = True,
check_names: bool = True,
check_exact: bool = True,
check_categorical: bool = True,
check_order: bool = True,
rtol: float = 1.0e-5,
atol: float = 1.0e-8,
obj: str = "Index",
) -> None:
"""
Check that two Index objects are equal.
If the objects are pandas Index, they are converted to Arkouda Index.
Then assert_index_equal is applied to the result.
Parameters
----------
left : Index or pd.Index
First Index to compare.
right : Index or pd.Index
Second Index to compare.
exact : bool
Whether to check that class, dtype, and inferred type are identical. Default is True.
check_names : bool
Whether to check the names attribute. Default is True.
check_exact : bool
Whether to compare values exactly. Default is True.
check_categorical : bool
Whether to compare internal Categoricals exactly. Default is True.
check_order : bool
Whether to require identical order in index values. Default is True.
rtol : float
Relative tolerance used when check_exact is False. Default is 1e-5.
atol : float
Absolute tolerance used when check_exact is False. Default is 1e-8.
obj : str
Object name used in error messages. Default is "Index".
Raises
------
TypeError
If either input is not an Index or pd.Index.
See Also
--------
assert_index_equal
Examples
--------
>>> import arkouda as ak
>>> from arkouda import testing as tm
>>> import pandas as pd
>>> a = ak.Index([1, 2, 3])
>>> b = pd.Index([1, 2, 3])
>>> tm.assert_index_equivalent(a, b)
"""
__tracebackhide__ = not DEBUG
if not isinstance(left, (Index, pd.Index)) or not isinstance(right, (Index, pd.Index)):
raise TypeError(
f"left and right must be type arkouda.Index, or pandas.Index. "
f"Instead types were {type(left)} and {type(right)}"
)
assert_index_equal(
_convert_to_arkouda(left),
_convert_to_arkouda(right),
exact=exact,
check_names=check_names,
check_exact=check_exact,
check_categorical=check_categorical,
check_order=check_order,
rtol=rtol,
atol=atol,
obj=obj,
)
[docs]
def assert_arkouda_array_equivalent(
left: pdarray | Strings | Categorical | SegArray | np.ndarray | pd.Categorical,
right: pdarray | Strings | Categorical | SegArray | np.ndarray | pd.Categorical,
check_dtype: bool = True,
err_msg=None,
check_same=None,
obj: str = "pdarray",
index_values=None,
) -> None:
"""
Check that two Arkouda-compatible arrays are equal.
Supported types include numpy arrays, pandas Categorical, and Arkouda arrays.
Parameters
----------
left : pdarray, Strings, Categorical, SegArray, np.ndarray, or pd.Categorical
First array to compare.
right : pdarray, Strings, Categorical, SegArray, np.ndarray, or pd.Categorical
Second array to compare.
check_dtype : bool
Whether to verify that dtypes match. Default is True.
err_msg : str or None
Optional message to display on failure.
check_same : None or {"copy", "same"}
Whether to ensure identity or separation in memory. Default is None.
obj : str
Object label for error messages. Default is "pdarray".
index_values : Index or pdarray, optional
Shared index used in error output. Default is None.
Raises
------
TypeError
If either input is not a supported array type.
See Also
--------
assert_arkouda_array_equal
Examples
--------
>>> import arkouda as ak
>>> from arkouda import Strings
>>> from arkouda.testing import assert_arkouda_array_equivalent
>>> a = ak.array([1, 2, 3])
>>> b = ak.array([1, 2, 3])
>>> assert_arkouda_array_equivalent(a, b)
>>> s1 = ak.array(['x', 'y'])
>>> s2 = ak.array(['x', 'y'])
>>> assert_arkouda_array_equivalent(s1, s2)
"""
__tracebackhide__ = not DEBUG
if not isinstance(
left, (np.ndarray, pd.Categorical, pdarray, Strings, Categorical, SegArray)
) or not isinstance(right, (np.ndarray, pd.Categorical, pdarray, Strings, Categorical, SegArray)):
raise TypeError(
f"left and right must be type np.ndarray, pdarray, Strings, "
f"Categorical, or SegArray. "
f"Instead types were {type(left)} and {type(right)}"
)
assert_arkouda_array_equal(
_convert_to_arkouda(left),
_convert_to_arkouda(right),
check_dtype=check_dtype,
err_msg=err_msg,
check_same=check_same,
obj=obj,
index_values=index_values,
)
[docs]
def assert_series_equivalent(
left: Series | pd.Series,
right: Series | pd.Series,
check_dtype: bool = True,
check_index_type: bool = True,
check_series_type: bool = True,
check_names: bool = True,
check_exact: bool = False,
check_categorical: bool = True,
check_category_order: bool = True,
rtol: float = 1.0e-5,
atol: float = 1.0e-8,
obj: str = "Series",
*,
check_index: bool = True,
check_like: bool = False,
) -> None:
"""
Check that two Series are equal.
This function compares two Series and raises an assertion if they differ.
pandas Series are converted to Arkouda equivalents before comparison.
The comparison can be customized using the provided keyword arguments.
Parameters
----------
left : Series or pd.Series
First Series to compare.
right : Series or pd.Series
Second Series to compare.
check_dtype : bool
Whether to check that dtypes are identical. Default is True.
check_index_type : bool
Whether to check that index class, dtype, and inferred type are identical. Default is True.
check_series_type : bool
Whether to check that the Series class is identical. Default is True.
check_names : bool
Whether to check that the Series and Index name attributes are identical. Default is True.
check_exact : bool
Whether to compare numbers exactly. Default is False.
check_categorical : bool
Whether to compare internal Categoricals exactly. Default is True.
check_category_order : bool
Whether to compare category order in internal Categoricals. Default is True.
rtol : float
Relative tolerance used when check_exact is False. Default is 1e-5.
atol : float
Absolute tolerance used when check_exact is False. Default is 1e-8.
obj : str
Object name used in error messages. Default is "Series".
check_index : bool
Whether to check index equivalence. If False, only values are compared. Default is True.
check_like : bool
If True, ignore the order of the index. Must be False if check_index is False.
Note: identical labels must still correspond to the same data. Default is False.
Raises
------
TypeError
If either input is not a Series or pd.Series.
See Also
--------
assert_series_equal
Examples
--------
>>> import arkouda as ak
>>> from arkouda import testing as tm
>>> import pandas as pd
>>> a = ak.Series([1, 2, 3, 4])
>>> b = pd.Series([1, 2, 3, 4])
>>> tm.assert_series_equivalent(a, b)
"""
__tracebackhide__ = not DEBUG
if not isinstance(left, (Series, pd.Series)) or not isinstance(right, (Series, pd.Series)):
raise TypeError(
f"left and right must be type arkouda.pandas.Series or pandas.pandas.Series. "
f"Instead types were {type(left)} and {type(right)}."
)
assert_series_equal(
_convert_to_arkouda(left),
_convert_to_arkouda(right),
check_dtype=check_dtype,
check_index_type=check_index_type,
check_series_type=check_series_type,
check_names=check_names,
check_exact=check_exact,
check_categorical=check_categorical,
check_category_order=check_category_order,
rtol=rtol,
atol=atol,
obj=obj,
check_index=check_index,
check_like=check_like,
)
[docs]
def assert_frame_equivalent(
left: DataFrame | pd.DataFrame,
right: DataFrame | pd.DataFrame,
check_dtype: bool = True,
check_index_type: bool = True,
check_column_type: bool = True,
check_frame_type: bool = True,
check_names: bool = True,
check_exact: bool = True,
check_categorical: bool = True,
check_like: bool = False,
rtol: float = 1.0e-5,
atol: float = 1.0e-8,
obj: str = "DataFrame",
) -> None:
"""
Check that two DataFrames are equal.
This function compares two DataFrames and raises an assertion if they differ.
It is intended primarily for use in unit tests. pandas DataFrames are converted to
Arkouda equivalents before comparison.
Parameters
----------
left : DataFrame or pd.DataFrame
First DataFrame to compare.
right : DataFrame or pd.DataFrame
Second DataFrame to compare.
check_dtype : bool
Whether to check that dtypes are identical. Default is True.
check_index_type : bool
Whether to check that index class, dtype, and inferred type are identical. Default is True.
check_column_type : bool
Whether to check that column class, dtype, and inferred type are identical. Default is True.
check_frame_type : bool
Whether to check that the DataFrame class is identical. Default is True.
check_names : bool
Whether to check that the index and column names are identical. Default is True.
check_exact : bool
Whether to compare values exactly. Default is True.
check_categorical : bool
Whether to compare internal categoricals exactly. Default is True.
check_like : bool
Whether to ignore the order of index and columns. Labels must still match their data. /
Default is False.
rtol : float
Relative tolerance used when check_exact is False. Default is 1e-5.
atol : float
Absolute tolerance used when check_exact is False. Default is 1e-8.
obj : str
Object name used in error messages. Default is "DataFrame".
Raises
------
TypeError
If either input is not a DataFrame or pd.DataFrame.
See Also
--------
assert_frame_equal
Examples
--------
>>> import arkouda as ak
>>> import pandas as pd
>>> from arkouda.testing import assert_frame_equivalent
>>> df1 = ak.DataFrame({'a': [1, 2], 'b': [3, 4]})
>>> df2 = pd.DataFrame({'a': [1, 2], 'b': [3.0, 4.0]})
Fails because dtypes are different:
>>> assert_frame_equivalent(df1, df2) # doctest: +SKIP
"""
__tracebackhide__ = not DEBUG
if not isinstance(left, (DataFrame, pd.DataFrame)) or not isinstance(
right, (DataFrame, pd.DataFrame)
):
raise TypeError(
f"left and right must be type arkouda.pandas.DataFrame or pandas.DataFrame. "
f"Instead types were {type(left)} and {type(right)}."
)
assert_frame_equal(
_convert_to_arkouda(left),
_convert_to_arkouda(right),
check_dtype=check_dtype,
check_index_type=check_index_type,
check_column_type=check_column_type,
check_frame_type=check_frame_type,
check_names=check_names,
check_exact=check_exact,
check_categorical=check_categorical,
check_like=check_like,
rtol=rtol,
atol=atol,
obj=obj,
)
[docs]
def assert_equivalent(left, right, **kwargs) -> None:
"""
Dispatch to the appropriate assertion function depending on object types.
Parameters
----------
left : Any
First object to compare. Type determines which assertion function is used.
right : Any
Second object to compare.
**kwargs : dict
Keyword arguments passed to the specific assertion function.
Raises
------
AssertionError
If values are not equivalent.
Examples
--------
>>> import arkouda as ak
>>> import pandas as pd
>>> from arkouda.testing import assert_equivalent
>>> ak_series = ak.Series([1, 2, 3])
>>> pd_series = pd.Series([1, 2, 3])
>>> assert_equivalent(ak_series, pd_series)
"""
__tracebackhide__ = not DEBUG
if isinstance(left, (Index, pd.Index)):
assert_index_equivalent(left, right, **kwargs)
elif isinstance(left, (Series, pd.Series)):
assert_series_equivalent(left, right, **kwargs)
elif isinstance(left, (DataFrame, pd.DataFrame)):
assert_frame_equivalent(left, right, **kwargs)
elif isinstance(left, (pdarray, np.ndarray, Strings, Categorical, pd.Categorical, SegArray)):
assert_arkouda_array_equivalent(left, right, **kwargs)
elif isinstance(left, str):
assert kwargs == {}
assert left == right
else:
assert kwargs == {}
assert_almost_equivalent(left, right)