Custom Metrics#

Open in Colab

[ ]:
# Note: you may need to restart the kernel to use updated packages.
%pip install whylogs

If all of the state of the metric can be represented by subclasses of MetricComponent, it’s very simple to create a new metric. There are a number of standard metric components in metric_components.py. You can also create new components by subclassing CustomComponent.

from dataclasses import dataclass
from typing import Any, Dict, List
import pickle

import whylogs_sketching as ds  # type: ignore
from whylogs.core.configs import SummaryConfig
from whylogs.core.dataset_profile import DatasetProfile
from whylogs.core.datatypes import DataType
from whylogs.core.metrics.metric_components import KllComponent
from whylogs.core.metrics.metrics import CustomMetricBase, Metric, MetricConfig, OperationResult
from whylogs.core.preprocessing import PreprocessedColumn
from whylogs.core.resolvers import Resolver
from whylogs.core.schema import DatasetSchema
from whylogs.core.proto import MetricMessage, MetricComponentMessage

# Metric classes should be decorated with @dataclass
class HistogramMetric(Metric):
    histogram: KllComponent  # All the fields are subclasses of MetricComonent

    # you must implement namespace returning a unique string to identify your metric
    def namespace(self) -> str:
        return "histogram"

    # you must implement to_summary_dict returning a summary of your metric
    def to_summary_dict(self, cfg: SummaryConfig) -> Dict[str, Any]:
        if self.histogram.value.get_n() == 0:
            quantiles = [None, None, None, None, None]
            quantiles = self.histogram.value.get_quantiles([0.1, 0.25, 0.5, 0.75, 0.9])
        return {
            "n": self.histogram.value.get_n(),
            "max": self.histogram.value.get_max_value(),
            "min": self.histogram.value.get_min_value(),
            "q_10": quantiles[0],
            "q_25": quantiles[1],
            "median": quantiles[2],
            "q_75": quantiles[3],
            "q_90": quantiles[4],

    # columnar_update updates your metric as data is logged
    def columnar_update(self, data: PreprocessedColumn) -> OperationResult:
        successes = 0

        if data.numpy.len > 0:
            for arr in [data.numpy.floats, data.numpy.ints]:
                if arr is not None:

        for lst in [data.list.ints, data.list.floats]:
            if lst is not None and len(lst) > 0:

        return OperationResult.ok(successes)

    # The zero method returns an "empty" instance of your metric ready to start tracking data
    # If your metric needs configuration, create a subclass of MetricConfig containing your
    # parameters.
    def zero(cls, config: MetricConfig) -> "HistogramMetric":
        return cls(histogram=KllComponent(ds.kll_doubles_sketch(k=config.kll_k)))

If you prefer not to use MetricComponent fields for your metric, you can instead make your metric a subclass of CustomMetricBase. All fields whose names don’t start with _ will be included in the metric summary and serialized via protobuf.

class StructMetric(CustomMetricBase):
    x: int
    s: str
    _private: float = 3.14159  # excluded from summary and protobuf

    def namespace(self) -> str:
        return "struct"

    # you must implement your own merge method
    def merge(self, other: "StructMetric") -> "StructMetric":
        return StructMetric(self.x + other.x, self.s + other.s)

    def columnar_update(self, data: PreprocessedColumn) -> OperationResult:
        self.x += 1
        self.s += "a"
        return OperationResult.ok(1)

    def zero(cls, config: MetricConfig) -> "StructMetric":
        return cls(0, "")

Using Your Metric#

You will need to create a Resolver and DatasetSchema in order to use your metric.

from whylogs.core import ColumnSchema

class TestResolver(Resolver):
    def resolve(self, name: str, why_type: DataType, column_schema: ColumnSchema) -> Dict[str, Metric]:
        return {"histogram": HistogramMetric.zero(column_schema.cfg),
                "struct": StructMetric(0, "")}

schema = DatasetSchema(types={"col1": float}, resolvers=TestResolver())
prof = DatasetProfile(schema)
row = {"col1": 1.2}
histogram/n histogram/max histogram/min histogram/q_10 histogram/q_25 histogram/median histogram/q_75 histogram/q_90 struct/x struct/s type
col1 1 1.2 1.2 1.2 1.2 1.2 1.2 1.2 2 aa SummaryType.COLUMN