whylogs.core#

Subpackages#

Submodules#

Package Contents#

Classes#

ColumnProfile

DatasetProfile

Dataset profile represents a collection of in-memory profiling stats for a dataset.

TypeMapper

Helper class that provides a standard way to create an ABC using

MetricGetter

ProfileGetter

MetricConfig

CardinalityThresholds

ModelPerformanceMetrics

Container class for various model-related performance metrics

Predicate

Resolver

A resolver maps from a column name and a data type to trackers.

ColumnSchema

Schema of a column.

DatasetSchema

Defines the schema for tracking metrics in whylogs.

Segment

SegmentationPartition

ColumnProfileView

DatasetProfileView

A Writable is an object that contains data to write to a file or files.

Functions#

Not(→ Predicate)

Require(→ Predicate)

Attributes#

class whylogs.core.ColumnProfile(name: str, schema: whylogs.core.schema.ColumnSchema, cache_size: int)#

Bases: object

Parameters
add_metric(metric: whylogs.core.metrics.Metric) None#
Parameters

metric (whylogs.core.metrics.Metric) –

Return type

None

track(row: Dict[str, Any]) None#
Parameters

row (Dict[str, Any]) –

Return type

None

flush() None#

Force emptying the cache and update the internal metrics.

Return type

None

track_column(series: Any, identity_values: Optional[Any] = None) None#
Parameters
  • series (Any) –

  • identity_values (Optional[Any]) –

Return type

None

to_protobuf() whylogs.core.proto.ColumnMessage#
Return type

whylogs.core.proto.ColumnMessage

view() whylogs.core.view.ColumnProfileView#
Return type

whylogs.core.view.ColumnProfileView

class whylogs.core.DatasetProfile(schema: Optional[whylogs.core.schema.DatasetSchema] = None, dataset_timestamp: Optional[datetime.datetime] = None, creation_timestamp: Optional[datetime.datetime] = None, metrics: Optional[Dict[str, Union[whylogs.core.metrics.Metric, Any]]] = None, metadata: Optional[Dict[str, str]] = None)#

Bases: whylogs.api.writer.writer._Writable

Dataset profile represents a collection of in-memory profiling stats for a dataset.

Parameters
  • schema (Optional[whylogs.core.schema.DatasetSchema]) – DatasetSchema, optional An object that represents the data column names and types

  • dataset_timestamp (Optional[datetime.datetime]) – int, optional A timestamp integer that best represents the date tied to the dataset generation. i.e.: A January 1st 2019 Sales Dataset will have 1546300800000 as the timestamp in miliseconds (UTC). If None is provided, it will take the current timestamp as default

  • creation_timestamp (Optional[datetime.datetime]) – int, optional The timestamp tied to the exact moment when the DatasetProfile is created. If None is provided, it will take the current timestamp as default

  • metrics (Optional[Dict[str, Union[whylogs.core.metrics.Metric, Any]]]) –

  • metadata (Optional[Dict[str, str]]) –

property creation_timestamp: datetime.datetime#
Return type

datetime.datetime

property dataset_timestamp: datetime.datetime#
Return type

datetime.datetime

property is_active: bool#

Returns True if the profile tracking code is currently running.

Return type

bool

property is_empty: bool#

Returns True if the profile tracking code is currently running.

Return type

bool

property metadata: Dict[str, str]#
Return type

Dict[str, str]

property model_performance_metrics: whylogs.core.model_performance_metrics.model_performance_metrics.ModelPerformanceMetrics#
Return type

whylogs.core.model_performance_metrics.model_performance_metrics.ModelPerformanceMetrics

set_dataset_timestamp(dataset_timestamp: datetime.datetime) None#
Parameters

dataset_timestamp (datetime.datetime) –

Return type

None

add_metric(col_name: str, metric: whylogs.core.metrics.Metric) None#
Parameters
Return type

None

add_dataset_metric(name: str, metric: whylogs.core.metrics.Metric) None#
Parameters
Return type

None

add_model_performance_metrics(metric: whylogs.core.model_performance_metrics.model_performance_metrics.ModelPerformanceMetrics) None#
Parameters

metric (whylogs.core.model_performance_metrics.model_performance_metrics.ModelPerformanceMetrics) –

Return type

None

track(obj: Any = None, *, pandas: Optional[whylogs.core.stubs.pd.DataFrame] = None, row: Optional[Mapping[str, Any]] = None, execute_udfs: bool = True) None#
Parameters
  • obj (Any) –

  • pandas (Optional[whylogs.core.stubs.pd.DataFrame]) –

  • row (Optional[Mapping[str, Any]]) –

  • execute_udfs (bool) –

Return type

None

view() whylogs.core.view.DatasetProfileView#
Return type

whylogs.core.view.DatasetProfileView

flush() None#
Return type

None

write(path: Optional[str] = None, **kwargs: Any) Tuple[bool, str]#
Parameters
  • path (Optional[str]) –

  • kwargs (Any) –

Return type

Tuple[bool, str]

classmethod read(input_path: str) whylogs.core.view.DatasetProfileView#
Parameters

input_path (str) –

Return type

whylogs.core.view.DatasetProfileView

writer(name: str = 'local', **kwargs: Any) WriterWrapper#

Utility method to create a Writer of the specified type

Parameters
  • name (str) –

  • kwargs (Any) –

Return type

WriterWrapper

class whylogs.core.TypeMapper#

Bases: abc.ABC

Helper class that provides a standard way to create an ABC using inheritance.

class whylogs.core.MetricGetter(metric: whylogs.core.metrics.metrics.Metric, path: str)#

Bases: whylogs.core.relations.ValueGetter

Parameters
serialize() str#
Return type

str

class whylogs.core.ProfileGetter(profile: Union[whylogs.core.dataset_profile.DatasetProfile, whylogs.core.view.dataset_profile_view.DatasetProfileView], column_name: str, path: str)#

Bases: whylogs.core.relations.ValueGetter

Parameters
serialize() str#
Return type

str

class whylogs.core.MetricConfig#
hll_lg_k: int#
kll_k: int#
fi_lg_max_k: int#
fi_disabled: bool#
track_unicode_ranges: bool#
large_kll_k: bool#
kll_k_large: int#
unicode_ranges: Dict[str, Tuple[int, int]]#
lower_case: bool#
normalize: bool#
max_frequent_item_size: int#
identity_column: Optional[str]#
class whylogs.core.CardinalityThresholds#
few: int = 50#
proportionately_few: float = 0.01#
class whylogs.core.ModelPerformanceMetrics(confusion_matrix: Optional[whylogs.core.model_performance_metrics.confusion_matrix.ConfusionMatrix] = None, regression_metrics: Optional[whylogs.core.model_performance_metrics.regression_metrics.RegressionMetrics] = None, metrics: Optional[Dict[str, whylogs.core.metrics.metrics.Metric]] = None, field_metadata: Optional[Dict[str, Set[str]]] = None)#

Container class for various model-related performance metrics

Parameters
confusion_matrix#

ConfusionMatrix which keeps it track of counts with NumberTracker

Type

ConfusionMatrix

regression_metrics#

Regression Metrics keeps track of a common regression metrics in case the targets are continous.

Type

RegressionMetrics

property output_fields: Optional[List[str]]#
Return type

Optional[List[str]]

to_protobuf() whylogs.core.proto.v0.ModelProfileMessage#
Return type

whylogs.core.proto.v0.ModelProfileMessage

classmethod from_protobuf(message: whylogs.core.proto.v0.ModelProfileMessage) ModelPerformanceMetrics#
Parameters

message (whylogs.core.proto.v0.ModelProfileMessage) –

Return type

ModelPerformanceMetrics

compute_confusion_matrix(predictions: List[Union[str, int, bool, float]], targets: List[Union[str, int, bool, float]], scores: Optional[List[float]] = None)#

computes the confusion_matrix, if one is already present merges to old one.

Parameters
compute_regression_metrics(predictions: List[Union[float, int]], targets: List[Union[float, int]])#
Parameters
  • predictions (List[Union[float, int]]) –

  • targets (List[Union[float, int]]) –

add_metadata_to_field(column_name: str, categories: Set[str]) None#
Parameters
  • column_name (str) –

  • categories (Set[str]) –

Return type

None

specify_output_fields(column_names: Union[str, Set[str]]) None#
Parameters

column_names (Union[str, Set[str]]) –

Return type

None

merge(other) ModelPerformanceMetrics#
Return type

ModelPerformanceMetrics

whylogs.core.Not(p: Predicate) Predicate#
Parameters

p (Predicate) –

Return type

Predicate

class whylogs.core.Predicate(op: Relation = Relation.no_op, value: Union[str, int, float, ValueGetter] = 0, udf: Optional[Callable[[Any], bool]] = None, left: Optional[Predicate] = None, right: Optional[Predicate] = None, component: Optional[str] = None)#
Parameters
property not_: Predicate#
Return type

Predicate

matches(value: Union[str, int, float, ValueGetter]) Predicate#
Parameters

value (Union[str, int, float, ValueGetter]) –

Return type

Predicate

fullmatch(value: Union[str, int, float, ValueGetter]) Predicate#
Parameters

value (Union[str, int, float, ValueGetter]) –

Return type

Predicate

search(value: Union[str, int, float, ValueGetter]) Predicate#
Parameters

value (Union[str, int, float, ValueGetter]) –

Return type

Predicate

equals(value: Union[str, int, float, ValueGetter]) Predicate#
Parameters

value (Union[str, int, float, ValueGetter]) –

Return type

Predicate

less_than(value: Union[str, int, float, ValueGetter]) Predicate#
Parameters

value (Union[str, int, float, ValueGetter]) –

Return type

Predicate

less_or_equals(value: Union[str, int, float, ValueGetter]) Predicate#
Parameters

value (Union[str, int, float, ValueGetter]) –

Return type

Predicate

greater_than(value: Union[str, int, float, ValueGetter]) Predicate#
Parameters

value (Union[str, int, float, ValueGetter]) –

Return type

Predicate

greater_or_equals(value: Union[str, int, float, ValueGetter]) Predicate#
Parameters

value (Union[str, int, float, ValueGetter]) –

Return type

Predicate

not_equal(value: Union[str, int, float, ValueGetter]) Predicate#
Parameters

value (Union[str, int, float, ValueGetter]) –

Return type

Predicate

and_(right: Predicate) Predicate#
Parameters

right (Predicate) –

Return type

Predicate

or_(right: Predicate) Predicate#
Parameters

right (Predicate) –

Return type

Predicate

is_(udf: Callable[[Any], bool]) Predicate#
Parameters

udf (Callable[[Any], bool]) –

Return type

Predicate

serialize() str#
Return type

str

whylogs.core.Require(component: Optional[str] = None) Predicate#
Parameters

component (Optional[str]) –

Return type

Predicate

class whylogs.core.Resolver#

Bases: abc.ABC

A resolver maps from a column name and a data type to trackers.

Note that the key of the result dictionaries defines the namespaces of the metrics in the serialized form.

abstract resolve(name: str, why_type: whylogs.core.datatypes.DataType, column_schema: ColumnSchema) Dict[str, whylogs.core.metrics.metrics.Metric]#
Parameters
Return type

Dict[str, whylogs.core.metrics.metrics.Metric]

class whylogs.core.ColumnSchema#

Schema of a column.

The main goal is to specify the data type. On top of that, users can configure their own tracker resolution logic (mapping a type to a list of tracker factories) and any additional trackers here.

dtype: Any#
cfg: whylogs.core.metrics.metrics.MetricConfig#
type_mapper: whylogs.core.datatypes.TypeMapper#
resolver: whylogs.core.resolvers.Resolver#
validators: Dict[str, List[whylogs.core.validators.validator.Validator]]#
get_metrics(name: str) Dict[str, whylogs.core.metrics.metrics.Metric]#
Parameters

name (str) –

Return type

Dict[str, whylogs.core.metrics.metrics.Metric]

get_validators(name: str) List[Optional[whylogs.core.validators.validator.Validator]]#
Parameters

name (str) –

Return type

List[Optional[whylogs.core.validators.validator.Validator]]

class whylogs.core.DatasetSchema(types: Optional[Dict[str, Any]] = None, default_configs: Optional[whylogs.core.metrics.metrics.MetricConfig] = None, type_mapper: Optional[whylogs.core.datatypes.TypeMapper] = None, resolvers: Optional[whylogs.core.resolvers.Resolver] = None, cache_size: int = 1024, schema_based_automerge: bool = False, segments: Optional[Dict[str, whylogs.core.segmentation_partition.SegmentationPartition]] = None, validators: Optional[Dict[str, List[whylogs.core.validators.validator.Validator]]] = None, metadata: Optional[Dict[str, str]] = None)#

Defines the schema for tracking metrics in whylogs.

In order to customize your tracking, you can extend this class to specify your own column schema or your own type resolution. Otherwise, you can just use the default DatasetSchema object.

Schema objects are also used to group datasets together.

Parameters
types#

required. a dictionay of column name to the Python type.

default_configs#

optional. Options to configure various behavior of whylogs.

type_mapper#

Optional. a mapper that transates the Python type to standardized whylogs DataType object.

resolvers#

Optional. an object that defines how to map from a column name, a whylogs DataType and a schema to metrics.

Examples

>>> import pandas as pd
>>> import numpy as np
>>> from whylogs.core import DatasetSchema, DatasetProfile
>>> from whylogs.core.resolvers import Resolver, StandardResolver
>>>
>>> class MyResolver(StandardResolver):
...    pass
>>>
>>> schema = DatasetSchema(
...    types={
...        "col1": str,
...        "col2": np.int32,
...        "col3": pd.CategoricalDtype(categories=('foo', 'bar'), ordered=True)
...    },
...    resolvers=MyResolver()
... )
>>> prof = DatasetProfile(schema)
>>> df = pd.DataFrame({"col1": ['foo'], "col2": np.array([1], dtype=np.int32), "col3": ['bar']})
>>> prof.track(pandas=df)
copy() DatasetSchema#

Returns a new instance of the same underlying schema

Return type

DatasetSchema

resolve(*, pandas: Optional[whylogs.core.stubs.pd.DataFrame] = None, row: Optional[Mapping[str, Any]] = None) bool#
Parameters
  • pandas (Optional[whylogs.core.stubs.pd.DataFrame]) –

  • row (Optional[Mapping[str, Any]]) –

Return type

bool

get_col_names() tuple#
Return type

tuple

get(name: str) Optional[ColumnSchema]#
Parameters

name (str) –

Return type

Optional[ColumnSchema]

class whylogs.core.Segment#
key: Tuple[str, Ellipsis]#
parent_id: str#
class whylogs.core.SegmentationPartition#
property simple: bool#
Return type

bool

property filter_id: str#
Return type

str

name: str#
mapper: Optional[ColumnMapperFunction]#
id: str#
filter: Optional[SegmentFilter]#
whylogs.core.WHYLOGS_MAGIC_HEADER = 'WHY1'#
class whylogs.core.ColumnProfileView(metrics: Dict[str, METRIC], success_count: int = 0, failure_count: int = 0)#

Bases: object

Parameters
  • metrics (Dict[str, METRIC]) –

  • success_count (int) –

  • failure_count (int) –

merge(other: ColumnProfileView) ColumnProfileView#
Parameters

other (ColumnProfileView) –

Return type

ColumnProfileView

serialize() bytes#
Return type

bytes

classmethod deserialize(serialized_profile: bytes) ColumnProfileView#
Parameters

serialized_profile (bytes) –

Return type

ColumnProfileView

get_metric(m_name: str) Optional[METRIC]#
Parameters

m_name (str) –

Return type

Optional[METRIC]

to_protobuf() whylogs.core.proto.ColumnMessage#
Return type

whylogs.core.proto.ColumnMessage

get_metric_component_paths() List[str]#
Return type

List[str]

get_metric_names() List[str]#
Return type

List[str]

get_metrics() List[whylogs.core.metrics.metrics.Metric]#
Return type

List[whylogs.core.metrics.metrics.Metric]

to_summary_dict(*, column_metric: Optional[str] = None, cfg: Optional[whylogs.core.configs.SummaryConfig] = None) Dict[str, Any]#
Parameters
Return type

Dict[str, Any]

classmethod zero(msg: whylogs.core.proto.ColumnMessage) ColumnProfileView#
Parameters

msg (whylogs.core.proto.ColumnMessage) –

Return type

ColumnProfileView

classmethod from_protobuf(msg: whylogs.core.proto.ColumnMessage) ColumnProfileView#
Parameters

msg (whylogs.core.proto.ColumnMessage) –

Return type

ColumnProfileView

classmethod from_bytes(data: bytes) ColumnProfileView#
Parameters

data (bytes) –

Return type

ColumnProfileView

class whylogs.core.DatasetProfileView(*, columns: Dict[str, whylogs.core.view.column_profile_view.ColumnProfileView], dataset_timestamp: Optional[datetime.datetime], creation_timestamp: Optional[datetime.datetime], metrics: Optional[Dict[str, Any]] = None, metadata: Optional[Dict[str, str]] = None)#

Bases: whylogs.api.writer.writer._Writable

A Writable is an object that contains data to write to a file or files. These might be temporary files intended to be passed on to another consumer (e.g., WhyLabs servers) via a Writer.

Parameters
property dataset_timestamp: Optional[datetime.datetime]#
Return type

Optional[datetime.datetime]

property creation_timestamp: Optional[datetime.datetime]#
Return type

Optional[datetime.datetime]

property metadata: Dict[str, str]#
Return type

Dict[str, str]

property model_performance_metrics: Any#
Return type

Any

set_dataset_timestamp(dataset_timestamp: datetime.datetime) None#
Parameters

dataset_timestamp (datetime.datetime) –

Return type

None

add_model_performance_metrics(metric: Any) None#
Parameters

metric (Any) –

Return type

None

merge(other: DatasetProfileView) DatasetProfileView#
Parameters

other (DatasetProfileView) –

Return type

DatasetProfileView

get_column(col_name: str) Optional[whylogs.core.view.column_profile_view.ColumnProfileView]#
Parameters

col_name (str) –

Return type

Optional[whylogs.core.view.column_profile_view.ColumnProfileView]

get_columns(col_names: Optional[List[str]] = None) Dict[str, whylogs.core.view.column_profile_view.ColumnProfileView]#
Parameters

col_names (Optional[List[str]]) –

Return type

Dict[str, whylogs.core.view.column_profile_view.ColumnProfileView]

write(path: Optional[str] = None, **kwargs: Any) Tuple[bool, str]#
Parameters
  • path (Optional[str]) –

  • kwargs (Any) –

Return type

Tuple[bool, str]

serialize() bytes#
Return type

bytes

classmethod zero() DatasetProfileView#
Return type

DatasetProfileView

classmethod deserialize(data: bytes) DatasetProfileView#
Parameters

data (bytes) –

Return type

DatasetProfileView

classmethod read(path: str) DatasetProfileView#
Parameters

path (str) –

Return type

DatasetProfileView

to_pandas(column_metric: Optional[str] = None, cfg: Optional[whylogs.core.configs.SummaryConfig] = None) whylogs.core.stubs.pd.DataFrame#
Parameters
Return type

whylogs.core.stubs.pd.DataFrame

writer(name: str = 'local', **kwargs: Any) WriterWrapper#

Utility method to create a Writer of the specified type

Parameters
  • name (str) –

  • kwargs (Any) –

Return type

WriterWrapper