[docs]defplot_dist(b,h,log=True,xlabel=None,newfig=True):""" Plot the distribution and cumulative distribution of histogram Data Parameters ---------- b : np.ndarray Bin edges h : np.ndarray Histogram data log : bool use log to scale y xlabel: str Label for the x axis of the graph newfig: bool Generate a new figure or not Notes ----- This function does not return or display the plot. A user must have matplotlib imported in addition to arkouda to display plots. This could be updated to return the object or have a flag to show the resulting plots. See Examples Below. Examples -------- >>> import arkouda as ak >>> from matplotlib import pyplot as plt >>> b, h = ak.histogram(ak.arange(10), 3) >>> ak.plot_dist(b, h.to_ndarray()) >>> # to show the plot >>> plt.show() """ifnewfig:plt.figure(figsize=(12,5))plt.subplot(1,2,1)plt.plot(b,h,marker=".",linestyle="solid")iflog:plt.yscale("log")ifxlabelisnotNone:plt.gca().set_xlabel(xlabel,fontsize=14)plt.gca().set_title("distribution")plt.subplot(1,2,2)plt.plot(b,np.cumsum(h)/np.sum(h),marker=None,linestyle="solid")plt.gca().set_ylim((0,1))plt.gca().set_title("cumulative distribution")ifxlabelisnotNone:plt.gca().set_xlabel(xlabel,fontsize=14)
[docs]defhist_all(ak_df:DataFrame,cols:list=[]):""" Create a grid plot histogramming all numeric columns in ak dataframe Parameters ---------- ak_df : ak.DataFrame Full Arkouda DataFrame containing data to be visualized cols : list (Optional) A specified list of columns to be plotted Notes ----- This function displays the plot. Examples -------- >>> import arkouda as ak >>> from arkouda.plotting import hist_all >>> ak_df = ak.DataFrame({"a": ak.array(np.random.randn(100)), "b": ak.array(np.random.randn(100)), "c": ak.array(np.random.randn(100)), "d": ak.array(np.random.randn(100)) }) >>> hist_all(ak_df) """iflen(cols)==0:cols=ak_df.columnsnum_rows=int(math.ceil(len(cols)**0.5))num_cols=(len(cols)+num_rows-1)//num_rowsfig,axes=plt.subplots(num_rows,num_cols,figsize=(10,10))fig.tight_layout(pad=2.0)ifnum_rows>1:axes=axes.flatten()else:axes=[axes]forcolincols:try:ax=axes[cols.index(col)]x=ak_df[col]ifx.dtype=="float64":x=x[~isnan(x)]n=len(x)g1=skew(x)exceptValueError:GB_df=GroupBy(ak_df[col])new_labels=arange(GB_df.unique_keys.size)newcol=GB_df.broadcast(new_labels)x=newcol[:ak_df.size]ifx.dtype=="float64":x=x[~isnan(x)]n=len(x)g1=skew(x)sigma_g1=math.sqrt(6*(n-2)/((n+1)*(n+3)))# Doane's Formulanum_bins=int(1+math.log2(n)+math.log2(1+abs(g1)/sigma_g1))# Compute histogram counts in arkoudah=histogram(x,num_bins)# Compute bins in numpyifisinstance(x,Datetime):# Matplotlib has trouble plotting np.datetime64 and np.timedelta64bins=date_range(x.min(),x.max(),periods=num_bins).to_ndarray().astype("int")elifisinstance(x,Timedelta):bins=timedelta_range(x.min(),x.max(),periods=num_bins).to_ndarray().astype("int")else:bins=np.linspace(x.min(),x.max(),num_bins+1)[:-1]ax.bar(bins,h[1].to_ndarray(),width=bins[1]-bins[0])ax.set_title(col,size=8)ifx.max()>100*x.min():ax.set_yscale("log")