whylogs.experimental.api.logger
#
Package Contents#
Classes#
A holder object for profiling results. |
|
A holder object for profiling results. |
|
Defines the schema for tracking metrics in whylogs. |
|
Functions#
|
|
Log ranking metrics for a batch of data. |
Attributes#
- whylogs.experimental.api.logger.log(obj: Any = None, *, pandas: Optional[whylogs.core.stubs.pd.DataFrame] = None, row: Optional[Dict[str, Any]] = None, schema: Optional[whylogs.core.DatasetSchema] = None, name: Optional[str] = None, multiple: Optional[Dict[str, Loggable]] = None, dataset_timestamp: Optional[datetime.datetime] = None, trace_id: Optional[str] = None, tags: Optional[List[str]] = None, segment_key_values: Optional[Dict[str, str]] = None, debug_event: Optional[Dict[str, Any]] = None) result_set.ResultSet #
- Parameters
obj (Any) –
pandas (Optional[whylogs.core.stubs.pd.DataFrame]) –
row (Optional[Dict[str, Any]]) –
schema (Optional[whylogs.core.DatasetSchema]) –
name (Optional[str]) –
multiple (Optional[Dict[str, Loggable]]) –
dataset_timestamp (Optional[datetime.datetime]) –
trace_id (Optional[str]) –
tags (Optional[List[str]]) –
debug_event (Optional[Dict[str, Any]]) –
- Return type
- class whylogs.experimental.api.logger.SegmentedResultSet(segments: Dict[str, Dict[whylogs.core.Segment, Union[whylogs.core.DatasetProfile, whylogs.core.DatasetProfileView]]], partitions: Optional[List[whylogs.core.segmentation_partition.SegmentationPartition]] = None, metrics: Optional[Dict[str, Any]] = None, properties: Optional[Dict[str, Any]] = None)#
Bases:
ResultSet
A holder object for profiling results.
A whylogs.log call can result in more than one profile. This wrapper class simplifies the navigation among these profiles.
Note that currently we only hold one profile but we’re planning to add other kinds of profiles such as segmented profiles here.
- Parameters
segments (Dict[str, Dict[whylogs.core.Segment, Union[whylogs.core.DatasetProfile, whylogs.core.DatasetProfileView]]]) –
partitions (Optional[List[whylogs.core.segmentation_partition.SegmentationPartition]]) –
metrics (Optional[Dict[str, Any]]) –
properties (Optional[Dict[str, Any]]) –
- property partitions: Optional[List[whylogs.core.segmentation_partition.SegmentationPartition]]#
- Return type
Optional[List[whylogs.core.segmentation_partition.SegmentationPartition]]
- property model_performance_metric: Optional[whylogs.core.model_performance_metrics.ModelPerformanceMetrics]#
- Return type
Optional[whylogs.core.model_performance_metrics.ModelPerformanceMetrics]
- property performance_metrics: Optional[whylogs.core.model_performance_metrics.ModelPerformanceMetrics]#
- Return type
Optional[whylogs.core.model_performance_metrics.ModelPerformanceMetrics]
- profile(segment: Optional[whylogs.core.Segment] = None) Optional[Union[whylogs.core.DatasetProfile, whylogs.core.DatasetProfileView]] #
- Parameters
segment (Optional[whylogs.core.Segment]) –
- Return type
Optional[Union[whylogs.core.DatasetProfile, whylogs.core.DatasetProfileView]]
- get_writables() Optional[List[whylogs.api.writer.writer._Writable]] #
- Return type
Optional[List[whylogs.api.writer.writer._Writable]]
- get_whylabs_tags() List[whylabs_client.model.segment_tag.SegmentTag] #
- Return type
List[whylabs_client.model.segment_tag.SegmentTag]
- get_timestamps() List[Optional[datetime.datetime]] #
- Return type
List[Optional[datetime.datetime]]
- set_dataset_timestamp(dataset_timestamp: datetime.datetime) None #
- Parameters
dataset_timestamp (datetime.datetime) –
- Return type
- segments(restrict_to_parition_id: Optional[str] = None) Optional[List[whylogs.core.Segment]] #
- Parameters
restrict_to_parition_id (Optional[str]) –
- Return type
Optional[List[whylogs.core.Segment]]
- segments_in_partition(partition: whylogs.core.segmentation_partition.SegmentationPartition) Optional[Dict[whylogs.core.Segment, Union[whylogs.core.DatasetProfile, whylogs.core.DatasetProfileView]]] #
- Parameters
partition (whylogs.core.segmentation_partition.SegmentationPartition) –
- Return type
Optional[Dict[whylogs.core.Segment, Union[whylogs.core.DatasetProfile, whylogs.core.DatasetProfileView]]]
- view(segment: Optional[whylogs.core.Segment] = None) Optional[whylogs.core.DatasetProfileView] #
- Parameters
segment (Optional[whylogs.core.Segment]) –
- Return type
Optional[whylogs.core.DatasetProfileView]
- get_model_performance_metrics_for_segment(segment: whylogs.core.Segment) Optional[whylogs.core.model_performance_metrics.ModelPerformanceMetrics] #
- Parameters
segment (whylogs.core.Segment) –
- Return type
Optional[whylogs.core.model_performance_metrics.ModelPerformanceMetrics]
- add_metrics_for_segment(metrics: whylogs.core.model_performance_metrics.ModelPerformanceMetrics, segment: whylogs.core.Segment) None #
- Parameters
metrics (whylogs.core.model_performance_metrics.ModelPerformanceMetrics) –
segment (whylogs.core.Segment) –
- Return type
- static zero() SegmentedResultSet #
- Return type
- add_model_performance_metrics(metrics: whylogs.core.model_performance_metrics.ModelPerformanceMetrics) None #
- Parameters
metrics (whylogs.core.model_performance_metrics.ModelPerformanceMetrics) –
- Return type
- add_metric(name: str, metric: whylogs.core.metrics.metrics.Metric) None #
- Parameters
name (str) –
metric (whylogs.core.metrics.metrics.Metric) –
- Return type
- merge(other: ResultSet) SegmentedResultSet #
- Parameters
other (ResultSet) –
- Return type
- class whylogs.experimental.api.logger.ViewResultSet(view: whylogs.core.DatasetProfileView)#
Bases:
ResultSet
A holder object for profiling results.
A whylogs.log call can result in more than one profile. This wrapper class simplifies the navigation among these profiles.
Note that currently we only hold one profile but we’re planning to add other kinds of profiles such as segmented profiles here.
- Parameters
view (whylogs.core.DatasetProfileView) –
- property performance_metrics: Optional[whylogs.core.model_performance_metrics.ModelPerformanceMetrics]#
- Return type
Optional[whylogs.core.model_performance_metrics.ModelPerformanceMetrics]
- profile() Optional[whylogs.core.DatasetProfile] #
- Return type
Optional[whylogs.core.DatasetProfile]
- view() Optional[whylogs.core.DatasetProfileView] #
- Return type
Optional[whylogs.core.DatasetProfileView]
- static zero() ViewResultSet #
- Return type
- merge(other: ResultSet) ViewResultSet #
- Parameters
other (ResultSet) –
- Return type
- set_dataset_timestamp(dataset_timestamp: datetime.datetime) None #
- Parameters
dataset_timestamp (datetime.datetime) –
- Return type
- get_writables() Optional[List[whylogs.api.writer.writer._Writable]] #
- Return type
Optional[List[whylogs.api.writer.writer._Writable]]
- add_model_performance_metrics(metrics: whylogs.core.model_performance_metrics.ModelPerformanceMetrics) None #
- Parameters
metrics (whylogs.core.model_performance_metrics.ModelPerformanceMetrics) –
- Return type
- add_metric(name: str, metric: whylogs.core.metrics.metrics.Metric) None #
- Parameters
name (str) –
metric (whylogs.core.metrics.metrics.Metric) –
- Return type
- class whylogs.experimental.api.logger.DatasetSchema(types: Optional[Dict[str, Any]] = None, default_configs: Optional[whylogs.core.metrics.metrics.MetricConfig] = None, type_mapper: Optional[whylogs.core.datatypes.TypeMapper] = None, resolvers: Optional[whylogs.core.resolvers.Resolver] = None, cache_size: int = 1024, schema_based_automerge: bool = False, segments: Optional[Dict[str, whylogs.core.segmentation_partition.SegmentationPartition]] = None, validators: Optional[Dict[str, List[whylogs.core.validators.validator.Validator]]] = None, metadata: Optional[Dict[str, str]] = None)#
Defines the schema for tracking metrics in whylogs.
In order to customize your tracking, you can extend this class to specify your own column schema or your own type resolution. Otherwise, you can just use the default DatasetSchema object.
Schema objects are also used to group datasets together.
- Parameters
types (Optional[Dict[str, Any]]) –
default_configs (Optional[whylogs.core.metrics.metrics.MetricConfig]) –
type_mapper (Optional[whylogs.core.datatypes.TypeMapper]) –
resolvers (Optional[whylogs.core.resolvers.Resolver]) –
cache_size (int) –
schema_based_automerge (bool) –
segments (Optional[Dict[str, whylogs.core.segmentation_partition.SegmentationPartition]]) –
validators (Optional[Dict[str, List[whylogs.core.validators.validator.Validator]]]) –
- types#
required. a dictionay of column name to the Python type.
- default_configs#
optional. Options to configure various behavior of whylogs.
- type_mapper#
Optional. a mapper that transates the Python type to standardized whylogs
DataType
object.
- resolvers#
Optional. an object that defines how to map from a column name, a whylogs
DataType
and a schema to metrics.
Examples
>>> import pandas as pd >>> import numpy as np >>> from whylogs.core import DatasetSchema, DatasetProfile >>> from whylogs.core.resolvers import Resolver, StandardResolver >>> >>> class MyResolver(StandardResolver): ... pass >>> >>> schema = DatasetSchema( ... types={ ... "col1": str, ... "col2": np.int32, ... "col3": pd.CategoricalDtype(categories=('foo', 'bar'), ordered=True) ... }, ... resolvers=MyResolver() ... ) >>> prof = DatasetProfile(schema) >>> df = pd.DataFrame({"col1": ['foo'], "col2": np.array([1], dtype=np.int32), "col3": ['bar']}) >>> prof.track(pandas=df)
- copy() DatasetSchema #
Returns a new instance of the same underlying schema
- Return type
- resolve(*, pandas: Optional[whylogs.core.stubs.pd.DataFrame] = None, row: Optional[Mapping[str, Any]] = None) bool #
- get(name: str) Optional[ColumnSchema] #
- Parameters
name (str) –
- Return type
Optional[ColumnSchema]
- whylogs.experimental.api.logger.diagnostic_logger#
- class whylogs.experimental.api.logger.RowWiseMetrics(target_column: str, prediction_column: str, convert_non_numeric: bool = False)#
- whylogs.experimental.api.logger.log_batch_ranking_metrics(data: whylogs.core.stubs.pd.core.frame.DataFrame, prediction_column: Optional[str] = None, target_column: Optional[str] = None, score_column: Optional[str] = None, k: Optional[int] = None, schema: Union[whylogs.core.DatasetSchema, None] = None, log_full_data: bool = False) whylogs.api.logger.result_set.ViewResultSet #
Log ranking metrics for a batch of data.
- You can call the function several ways:
- Pass both prediction_column and target_column.
The named columns contain lists of strings. In this case, the prediction column contains the items the model has predicted are relevant, and the target column contains the items that are actually relevant. In this case, relevance is boolean.
The prediction column contains lists of integers and the target column contains lists of numbers or booleans. The value at the i-th position in the predicted list is the predicted rank of the i-th element of the domain. The value at the i-th position in the target list is the true relevance score of the i-th element of the domain. The score can be numeric or boolean. Higher scores indicate higher relevance.
Pass both target_column and score_column. The value at the i-th position in the target list is the true relevance of the i-th element of the domain (represented as a number, higher being more relevant; or boolean). The value at the i-th position in the score list is the model output for the i-th element of the domain.
Pass only target_column. The target column contians lists of numbers or booleans. The list entries are the true relevance of the items predicted by the model in prediction order.
- Parameters
data (pd.core.frame.DataFrame) – Dataframe with the data to log.
prediction_column (Optional[str], optional) – Column name for the predicted values. If not provided, the score_column and target_column must be provided, by default None
target_column (Optional[str], optional) – Column name for the relevance scores. If not provided, relevance must be encoded within prediction column, by default None
score_column (Optional[str], optional) – Column name for the scores. Can either be probabilities, confidence values, or other continuous measures. If not passed, prediction_column must be passed,by default None
k (Optional[int], optional) – Consider the top k ranks for metrics calculation. If None, use all outputs, by default None
schema (Union[DatasetSchema, None], optional) – Defines the schema for tracking metrics in whylogs, by default None
log_full_data (bool, optional) – Whether to log the complete dataframe or not. If True, the complete DF will be logged in addition to the ranking metrics. If False, only the calculated ranking metrics will be logged. In a typical production use case, the ground truth might not be available at the time the remaining data is generated. In order to prevent double profiling the input features, consider leaving this as False. by default False
- Returns
- Return type
Examples
import pandas as pd from whylogs.experimental.api.logger import log_batch_ranking_metrics # 1st and 2nd recommended items are relevant - 3rd is not df = pd.DataFrame({"targets": [[1, 0, 1]], "predictions": [[2,3,1]]}) results = log_batch_ranking_metrics( data=df, prediction_column="predictions", target_column="targets", k=3, )
non_numerical_df = pd.DataFrame( { "raw_predictions": [ ["cat", "pig", "elephant"], ["horse", "donkey", "robin"], ], "raw_targets": [ ["cat", "elephant"], ["dog"], ], } ) # 1st query: # Recommended items: [cat, pig, elephant] # Relevant items: [cat, elephant] # 2nd query: # Recommended items: [horse, donkey, robin] # Relevant items: [dog] results = log_batch_ranking_metrics( k=2, data=non_numerical_df, prediction_column="raw_predictions", target_column="raw_targets", convert_non_numeric=True )
binary_single_df = pd.DataFrame( { "raw_targets": [ [True, False, True], # First recommended item: Relevant, Second: Not relevant, Third: Relevant [False, False, False], # None of the recommended items are relevant [True, True, False], # First and second recommended items are relevant ] } ) result = log_batch_ranking_metrics(data=binary_single_df, target_column="raw_targets", k=3)