Source code for vis4d.eval.common.binary

"""Binary occupancy evaluator."""

from __future__ import annotations

import numpy as np

from vis4d.common.array import array_to_numpy
from vis4d.common.typing import (
from vis4d.eval.base import Evaluator

[docs] def threshold_and_flatten( prediction: NDArrayNumber, target: NDArrayNumber, threshold_value: float ) -> tuple[NDArrayBool, NDArrayBool]: """Thresholds the predictions based on the provided treshold value. Applies the following actions: prediction -> prediction >= threshold_value pred, gt = pred.ravel().bool(), gt.ravel().bool() Args: prediction: Prediction array with continuous values target: Grondgtruth values {0,1} threshold_value: Value to use to convert the continuous prediction into binary. Returns: tuple of two boolean arrays, prediction and target """ prediction_bin: NDArrayBool = prediction >= threshold_value return prediction_bin.ravel().astype(bool), target.ravel().astype(bool)
[docs] class BinaryEvaluator(Evaluator): """Creates a new Evaluater that evaluates binary predictions.""" METRIC_BINARY = "BinaryCls" KEY_IOU = "IoU" KEY_ACCURACY = "Accuracy" KEY_F1 = "F1" KEY_PRECISION = "Precision" KEY_RECALL = "Recall" def __init__( self, threshold: float = 0.5, ) -> None: """Creates a new binary evaluator. Args: threshold (float): Threshold for prediction to convert to binary. All prediction that are higher than this value will be assigned the 'True' label """ super().__init__() self.threshold = threshold self.reset() self.true_positives: list[float] = [] self.false_positives: list[float] = [] self.true_negatives: list[float] = [] self.false_negatives: list[float] = [] self.n_samples: list[float] = [] self.has_samples = False def _calc_confusion_matrix( self, prediction: NDArrayBool, target: NDArrayBool ) -> None: """Calculates the confusion matrix and stores them as attributes. Args: prediction: the prediction (binary) (N, Pts) target: the groundtruth (binary) (N, Pts) """ tp = int(np.sum(np.logical_and(prediction == 1, target == 1))) fp = int(np.sum(np.logical_and(prediction == 1, target == 0))) tn = int(np.sum(np.logical_and(prediction == 0, target == 0))) fn = int(np.sum(np.logical_and(prediction == 0, target == 1))) self.true_positives.append(tp) self.false_positives.append(fp) self.true_negatives.append(tn) self.false_negatives.append(fn) self.n_samples.append(tp + fp + tn + fn) @property def metrics(self) -> list[str]: """Supported metrics.""" return [self.METRIC_BINARY]
[docs] def reset(self) -> None: """Reset the saved predictions to start new round of evaluation.""" self.true_positives = [] self.false_positives = [] self.true_negatives = [] self.false_negatives = [] self.n_samples = []
[docs] def process_batch( # type: ignore # pylint: disable=arguments-differ self, prediction: ArrayLike, groundtruth: ArrayLike, ) -> None: """Processes a new (batch) of predictions. Calculates the metrics and caches them internally. Args: prediction: the prediction(continuous values or bin) (Batch x Pts) groundtruth: the groundtruth (binary) (Batch x Pts) """ pred, gt = threshold_and_flatten( array_to_numpy(prediction, n_dims=None, dtype=np.float32), array_to_numpy(groundtruth, n_dims=None, dtype=np.bool_), self.threshold, ) # Confusion Matrix self._calc_confusion_matrix(pred, gt) self.has_samples = True
[docs] def evaluate(self, metric: str) -> tuple[MetricLogs, str]: """Evaluate predictions. Returns a dict containing the raw data and a short description string containing a readable result. Args: metric (str): Metric to use. See @property metric Returns: metric_data, description tuple containing the metric data (dict with metric name and value) as well as a short string with shortened information. Raises: RuntimeError: if no data has been registered to be evaluated. ValueError: if metric is not supported. """ if not self.has_samples: raise RuntimeError( """No data registered to calculate metric. Register data using .process() first!""" ) metric_data: MetricLogs = {} short_description = "" if metric == self.METRIC_BINARY: # IoU iou = sum(self.true_positives) / ( sum(self.n_samples) - sum(self.true_negatives) + 1e-6 ) metric_data[self.KEY_IOU] = iou short_description += f"IoU: {iou:.3f}\n" # Accuracy acc = (sum(self.true_positives) + sum(self.true_negatives)) / sum( self.n_samples ) metric_data[self.KEY_ACCURACY] = acc short_description += f"Accuracy: {acc:.3f}\n" # Precision tp_fp = sum(self.true_positives) + sum(self.false_positives) precision = sum(self.true_positives) / tp_fp if tp_fp != 0 else 1 metric_data[self.KEY_PRECISION] = precision short_description += f"Precision: {precision:.3f}\n" # Recall tp_fn = sum(self.true_positives) + sum(self.false_negatives) recall = sum(self.true_positives) / tp_fn if tp_fn != 0 else 1 metric_data[self.KEY_RECALL] = recall short_description += f"Recall: {acc:.3f}\n" # F1 f1 = 2 * precision * recall / (precision + recall + 1e-8) metric_data[self.KEY_F1] = f1 short_description += f"F1: {f1:.3f}\n" else: raise ValueError( f"Unsupported metric: {metric}" ) # pragma: no cover return metric_data, short_description