Source code for arkouda.testing._equivalence_asserters

from __future__ import annotations

import numpy as np
import pandas as pd
from numpy import bool_, floating, integer, str_

from arkouda import (
    Categorical,
    DataFrame,
    Index,
    MultiIndex,
    SegArray,
    Series,
    Strings,
    array,
    pdarray,
)
from arkouda.testing import (
    assert_almost_equal,
    assert_arkouda_array_equal,
    assert_frame_equal,
    assert_index_equal,
    assert_series_equal,
)

DEBUG = True

__all__ = [
    "assert_almost_equivalent",
    "assert_arkouda_array_equivalent",
    "assert_equivalent",
    "assert_frame_equivalent",
    "assert_index_equivalent",
    "assert_series_equivalent",
]


def _convert_to_arkouda(obj):
    """
    Convert a numpy or pandas object to an arkouda object.
    """

    if isinstance(
        obj,
        (
            DataFrame,
            Series,
            Index,
            MultiIndex,
            SegArray,
            Categorical,
            Strings,
            pdarray,
            str_,
            integer,
            floating,
            bool_,
            bool,
            float,
        ),
    ):
        return obj

    if not isinstance(
        obj, (pd.MultiIndex, pd.Index, pd.Series, pd.DataFrame, pd.Categorical, np.ndarray)
    ):
        raise TypeError(f"obj must be an arkouda, numpy or pandas object, but was type: {type(obj)}")

    if isinstance(obj, pd.MultiIndex):
        return MultiIndex(obj)
    elif isinstance(obj, pd.Index):
        return Index(obj)
    elif isinstance(obj, pd.Series):
        return Series(obj)
    elif isinstance(obj, pd.DataFrame):
        return DataFrame(obj)
    elif isinstance(obj, pd.Categorical):
        return Categorical(obj)
    elif isinstance(obj, np.ndarray):
        return array(obj)
    return None


[docs] def assert_almost_equivalent( left, right, rtol: float = 1.0e-5, atol: float = 1.0e-8, ) -> None: """ Check that the left and right objects are approximately equal. By approximately equal, we refer to objects that are numbers or that contain numbers which may be equivalent to specific levels of precision. If the objects are pandas or numpy objects, they are converted to arkouda objects. Then assert_almost_equal is applied to the result. Parameters ---------- left : object right : object rtol : float, default 1e-5 Relative tolerance. atol : float, default 1e-8 Absolute tolerance. Warning ------- This function cannot be used on pdarray of size > ak.client.maxTransferBytes because it converts pdarrays to numpy arrays and calls np.allclose. See Also -------- assert_almost_equal """ __tracebackhide__ = not DEBUG assert_almost_equal( _convert_to_arkouda(left), _convert_to_arkouda(right), rtol=rtol, atol=atol, )
[docs] def assert_index_equivalent( left: Index | pd.Index, right: Index | pd.Index, exact: bool = True, check_names: bool = True, check_exact: bool = True, check_categorical: bool = True, check_order: bool = True, rtol: float = 1.0e-5, atol: float = 1.0e-8, obj: str = "Index", ) -> None: """ Check that left and right Index are equal. If the objects are pandas.Index, they are converted to arkouda.Index. Then assert_almost_equal is applied to the result. Parameters ---------- left : Index or pandas.Index right : Index or pandas.Index exact : True Whether to check the Index class, dtype and inferred_type are identical. check_names : bool, default True Whether to check the names attribute. check_exact : bool, default True Whether to compare number exactly. check_categorical : bool, default True Whether to compare internal Categorical exactly. check_order : bool, default True Whether to compare the order of index entries as well as their values. If True, both indexes must contain the same elements, in the same order. If False, both indexes must contain the same elements, but in any order. rtol : float, default 1e-5 Relative tolerance. Only used when check_exact is False. atol : float, default 1e-8 Absolute tolerance. Only used when check_exact is False. obj : str, default 'Index' Specify object name being compared, internally used to show appropriate assertion message. See Also -------- assert_index_equal Examples -------- >>> from arkouda import testing as tm >>> import pandas as pd >>> a = ak.Index([1, 2, 3]) >>> b = pd.Index([1, 2, 3]) >>> tm.assert_index_equivalent(a, b) """ __tracebackhide__ = not DEBUG if not isinstance(left, (Index, pd.Index)) or not isinstance(right, (Index, pd.Index)): raise TypeError( f"left and right must be type arkouda.Index, or pandas.Index. " f"Instead types were {type(left)} and {type(right)}" ) assert_index_equal( _convert_to_arkouda(left), _convert_to_arkouda(right), exact=exact, check_names=check_names, check_exact=check_exact, check_categorical=check_categorical, check_order=check_order, rtol=rtol, atol=atol, obj=obj, )
[docs] def assert_arkouda_array_equivalent( left: pdarray | Strings | Categorical | SegArray | np.ndarray | pd.Categorical, right: pdarray | Strings | Categorical | SegArray | np.ndarray | pd.Categorical, check_dtype: bool = True, err_msg=None, check_same=None, obj: str = "pdarray", index_values=None, ) -> None: """ Check that 'np.array', 'pd.Categorical', 'ak.pdarray', 'ak.Strings', 'ak.Categorical', or 'ak.SegArray' is equivalent. np.nparray's and pd.Categorical's will be converted to the arkouda equivalent. Then assert_arkouda_pdarray_equal will be applied to the result. Parameters ---------- left, right : np.ndarray, pd.Categorical, arkouda.pdarray or arkouda.Strings or arkouda.Categorical The two arrays to be compared. check_dtype : bool, default True Check dtype if both a and b are ak.pdarray or np.ndarray. err_msg : str, default None If provided, used as assertion message. check_same : None|'copy'|'same', default None Ensure left and right refer/do not refer to the same memory area. obj : str, default 'numpy array' Specify object name being compared, internally used to show appropriate assertion message. index_values : Index | arkouda.pdarray, default None optional index (shared by both left and right), used in output. See Also -------- assert_arkouda_array_equal """ __tracebackhide__ = not DEBUG if not isinstance( left, (np.ndarray, pd.Categorical, pdarray, Strings, Categorical, SegArray) ) or not isinstance(right, (np.ndarray, pd.Categorical, pdarray, Strings, Categorical, SegArray)): raise TypeError( f"left and right must be type np.ndarray, pdarray, Strings, " f"Categorical, or SegArray. " f"Instead types were {type(left)} and {type(right)}" ) assert_arkouda_array_equal( _convert_to_arkouda(left), _convert_to_arkouda(right), check_dtype=check_dtype, err_msg=err_msg, check_same=check_same, obj=obj, index_values=index_values, )
[docs] def assert_series_equivalent( left: Series | pd.Series, right: Series | pd.Series, check_dtype: bool = True, check_index_type: bool = True, check_series_type: bool = True, check_names: bool = True, check_exact: bool = False, check_categorical: bool = True, check_category_order: bool = True, rtol: float = 1.0e-5, atol: float = 1.0e-8, obj: str = "Series", *, check_index: bool = True, check_like: bool = False, ) -> None: """ Check that left and right Series are equal. pd.Series's will be converted to the arkouda equivalent. Then assert_series_equal will be applied to the result. Parameters ---------- left : Series or pd.Series right : Series or pd.Series check_dtype : bool, default True Whether to check the Series dtype is identical. check_index_type : bool, default True Whether to check the Index class, dtype and inferred_type are identical. check_series_type : bool, default True Whether to check the Series class is identical. check_names : bool, default True Whether to check the Series and Index names attribute. check_exact : bool, default False Whether to compare number exactly. check_categorical : bool, default True Whether to compare internal Categorical exactly. check_category_order : bool, default True Whether to compare category order of internal Categoricals. rtol : float, default 1e-5 Relative tolerance. Only used when check_exact is False. atol : float, default 1e-8 Absolute tolerance. Only used when check_exact is False. obj : str, default 'Series' Specify object name being compared, internally used to show appropriate assertion message. check_index : bool, default True Whether to check index equivalence. If False, then compare only values. check_like : bool, default False If True, ignore the order of the index. Must be False if check_index is False. Note: same labels must be with the same data. See Also -------- assert_series_equal Examples -------- >>> from arkouda import testing as tm >>> import pandas as pd >>> a = ak.Series([1, 2, 3, 4]) >>> b = pd.Series([1, 2, 3, 4]) >>> tm.assert_series_equivalent(a, b) """ __tracebackhide__ = not DEBUG if not isinstance(left, (Series, pd.Series)) or not isinstance(right, (Series, pd.Series)): raise TypeError( f"left and right must be type arkouda.Series or pandas.Series. " f"Instead types were {type(left)} and {type(right)}." ) assert_series_equal( _convert_to_arkouda(left), _convert_to_arkouda(right), check_dtype=check_dtype, check_index_type=check_index_type, check_series_type=check_series_type, check_names=check_names, check_exact=check_exact, check_categorical=check_categorical, check_category_order=check_category_order, rtol=rtol, atol=atol, obj=obj, check_index=check_index, check_like=check_like, )
[docs] def assert_frame_equivalent( left: DataFrame | pd.DataFrame, right: DataFrame | pd.DataFrame, check_dtype: bool = True, check_index_type: bool = True, check_column_type: bool = True, check_frame_type: bool = True, check_names: bool = True, check_exact: bool = True, check_categorical: bool = True, check_like: bool = False, rtol: float = 1.0e-5, atol: float = 1.0e-8, obj: str = "DataFrame", ) -> None: """ Check that left and right DataFrame are equal. This function is intended to compare two DataFrames and output any differences. It is mostly intended for use in unit tests. Additional parameters allow varying the strictness of the equality checks performed. pd.DataFrame's will be converted to the arkouda equivalent. Then assert_frame_equal will be applied to the result. Parameters ---------- left : DataFrame or pd.DataFrame First DataFrame to compare. right : DataFrame or pd.DataFrame Second DataFrame to compare. check_dtype : bool, default True Whether to check the DataFrame dtype is identical. check_index_type : bool, default = True Whether to check the Index class, dtype and inferred_type are identical. check_column_type : bool or {'equiv'}, default 'equiv' Whether to check the columns class, dtype and inferred_type are identical. Is passed as the ``exact`` argument of :func:`assert_index_equal`. check_frame_type : bool, default True Whether to check the DataFrame class is identical. check_names : bool, default True Whether to check that the `names` attribute for both the `index` and `column` attributes of the DataFrame is identical. check_exact : bool, default False Whether to compare number exactly. check_categorical : bool, default True Whether to compare internal Categorical exactly. check_like : bool, default False If True, ignore the order of index & columns. Note: index labels must match their respective rows (same as in columns) - same labels must be with the same data. rtol : float, default 1e-5 Relative tolerance. Only used when check_exact is False. atol : float, default 1e-8 Absolute tolerance. Only used when check_exact is False. obj : str, default 'DataFrame' Specify object name being compared, internally used to show appropriate assertion message. See Also -------- assert_frame_equal Examples -------- This example shows comparing two DataFrames that are equal but with columns of differing dtypes. >>> from arkouda.testing import assert_frame_equivalent >>> import pandas as pd >>> df1 = ak.DataFrame({'a': [1, 2], 'b': [3, 4]}) >>> df2 = pd.DataFrame({'a': [1, 2], 'b': [3.0, 4.0]}) >>> assert_frame_equivalent(df1, df1) """ __tracebackhide__ = not DEBUG if not isinstance(left, (DataFrame, pd.DataFrame)) or not isinstance( right, (DataFrame, pd.DataFrame) ): raise TypeError( f"left and right must be type arkouda.DataFrame or pandas.DataFrame. " f"Instead types were {type(left)} and {type(right)}." ) assert_frame_equal( _convert_to_arkouda(left), _convert_to_arkouda(right), check_dtype=check_dtype, check_index_type=check_index_type, check_column_type=check_column_type, check_frame_type=check_frame_type, check_names=check_names, check_exact=check_exact, check_categorical=check_categorical, check_like=check_like, rtol=rtol, atol=atol, obj=obj, )
[docs] def assert_equivalent(left, right, **kwargs) -> None: """ Wrapper for tm.assert_*_equivalent to dispatch to the appropriate test function. Parameters ---------- left, right : Index, pd.Index, Series, pd.Series, DataFrame, pd.DataFrame, Strings, Categorical, pd.Categorical, SegArray, pdarray, np.ndarray, The two items to be compared. **kwargs All keyword arguments are passed through to the underlying assert method. """ __tracebackhide__ = not DEBUG if isinstance(left, (Index, pd.Index)): assert_index_equivalent(left, right, **kwargs) elif isinstance(left, (Series, pd.Series)): assert_series_equivalent(left, right, **kwargs) elif isinstance(left, (DataFrame, pd.DataFrame)): assert_frame_equivalent(left, right, **kwargs) elif isinstance(left, (pdarray, np.ndarray, Strings, Categorical, pd.Categorical, SegArray)): assert_arkouda_array_equivalent(left, right, **kwargs) elif isinstance(left, str): assert kwargs == {} assert left == right else: assert kwargs == {} assert_almost_equivalent(left, right)