whylogs.api.pyspark.experimental.profiler#

Module Contents#

Functions#

Attributes#

whylogs.api.pyspark.experimental.profiler.logger#
whylogs.api.pyspark.experimental.profiler.SparkDataFrame#
whylogs.api.pyspark.experimental.profiler.COL_NAME_FIELD = 'col_name'#
whylogs.api.pyspark.experimental.profiler.COL_PROFILE_FIELD = 'col_profile'#
whylogs.api.pyspark.experimental.profiler.whylogs_pandas_map_profiler(pdf_iterator: Iterable[whylogs.core.stubs.pd.DataFrame], schema: Optional[whylogs.core.DatasetSchema] = None) Iterable[whylogs.core.stubs.pd.DataFrame]#
Parameters
Return type

Iterable[whylogs.core.stubs.pd.DataFrame]

whylogs.api.pyspark.experimental.profiler.column_profile_bytes_aggregator(group_by_cols: Tuple[str], profiles_df: whylogs.core.stubs.pd.DataFrame) whylogs.core.stubs.pd.DataFrame#
Parameters
  • group_by_cols (Tuple[str]) –

  • profiles_df (whylogs.core.stubs.pd.DataFrame) –

Return type

whylogs.core.stubs.pd.DataFrame

whylogs.api.pyspark.experimental.profiler.collect_column_profile_views(input_df: pyspark.sql.DataFrame, schema: Optional[whylogs.core.DatasetSchema] = None) Dict[str, whylogs.core.view.column_profile_view.ColumnProfileView]#
Parameters
Return type

Dict[str, whylogs.core.view.column_profile_view.ColumnProfileView]

whylogs.api.pyspark.experimental.profiler.collect_dataset_profile_view(input_df: pyspark.sql.DataFrame, dataset_timestamp: Optional[datetime.datetime] = None, creation_timestamp: Optional[datetime.datetime] = None, schema: Optional[whylogs.core.DatasetSchema] = None) whylogs.core.view.dataset_profile_view.DatasetProfileView#
Parameters
Return type

whylogs.core.view.dataset_profile_view.DatasetProfileView