whylogs.core.dataset_profile#

Module Contents#

Classes#

DatasetProfile

Dataset profile represents a collection of in-memory profiling stats for a dataset.

Attributes#

whylogs.core.dataset_profile.logger#
class whylogs.core.dataset_profile.DatasetProfile(schema: Optional[whylogs.core.schema.DatasetSchema] = None, dataset_timestamp: Optional[datetime.datetime] = None, creation_timestamp: Optional[datetime.datetime] = None, metrics: Optional[Dict[str, Union[whylogs.core.metrics.Metric, Any]]] = None, metadata: Optional[Dict[str, str]] = None)#

Bases: whylogs.api.writer.writer.Writable

Dataset profile represents a collection of in-memory profiling stats for a dataset.

Parameters
  • schema (Optional[whylogs.core.schema.DatasetSchema]) – DatasetSchema, optional An object that represents the data column names and types

  • dataset_timestamp (Optional[datetime.datetime]) – int, optional A timestamp integer that best represents the date tied to the dataset generation. i.e.: A January 1st 2019 Sales Dataset will have 1546300800000 as the timestamp in miliseconds (UTC). If None is provided, it will take the current timestamp as default

  • creation_timestamp (Optional[datetime.datetime]) – int, optional The timestamp tied to the exact moment when the DatasetProfile is created. If None is provided, it will take the current timestamp as default

  • metrics (Optional[Dict[str, Union[whylogs.core.metrics.Metric, Any]]]) –

  • metadata (Optional[Dict[str, str]]) –

property creation_timestamp: datetime.datetime#
Return type

datetime.datetime

property dataset_timestamp: datetime.datetime#
Return type

datetime.datetime

property is_active: bool#

Returns True if the profile tracking code is currently running.

Return type

bool

property is_empty: bool#

Returns True if the profile tracking code is currently running.

Return type

bool

property metadata: Dict[str, str]#
Return type

Dict[str, str]

property model_performance_metrics: whylogs.core.model_performance_metrics.model_performance_metrics.ModelPerformanceMetrics#
Return type

whylogs.core.model_performance_metrics.model_performance_metrics.ModelPerformanceMetrics

set_dataset_timestamp(dataset_timestamp: datetime.datetime) None#
Parameters

dataset_timestamp (datetime.datetime) –

Return type

None

add_metric(col_name: str, metric: whylogs.core.metrics.Metric) None#
Parameters
Return type

None

add_dataset_metric(name: str, metric: whylogs.core.metrics.Metric) None#
Parameters
Return type

None

add_model_performance_metrics(metric: whylogs.core.model_performance_metrics.model_performance_metrics.ModelPerformanceMetrics) None#
Parameters

metric (whylogs.core.model_performance_metrics.model_performance_metrics.ModelPerformanceMetrics) –

Return type

None

track(obj: Any = None, *, pandas: Optional[whylogs.core.stubs.pd.DataFrame] = None, row: Optional[Mapping[str, Any]] = None, execute_udfs: bool = True) None#
Parameters
  • obj (Any) –

  • pandas (Optional[whylogs.core.stubs.pd.DataFrame]) –

  • row (Optional[Mapping[str, Any]]) –

  • execute_udfs (bool) –

Return type

None

view() whylogs.core.view.DatasetProfileView#
Return type

whylogs.core.view.DatasetProfileView

flush() None#
Return type

None

static get_default_path(path) str#
Return type

str

write(path: Optional[str] = None, **kwargs: Any) Tuple[bool, str]#
Parameters
  • path (Optional[str]) –

  • kwargs (Any) –

Return type

Tuple[bool, str]

classmethod read(input_path: str) whylogs.core.view.DatasetProfileView#
Parameters

input_path (str) –

Return type

whylogs.core.view.DatasetProfileView