Source code for vis4d.data.transforms.random_erasing

"""Random erasing data augmentation."""

import numpy as np

from vis4d.common.typing import NDArrayNumber
from vis4d.data.const import CommonKeys as K

from .base import Transform


[docs] @Transform(in_keys=K.images, out_keys=K.images) class RandomErasing: """Randomly erase a rectangular region in an image tensor.""" def __init__( self, min_area: float = 0.02, max_area: float = 0.4, min_aspect_ratio: float = 0.3, max_aspect_ratio: float = 1 / 0.3, mean: tuple[float, float, float] = (0.0, 0.0, 0.0), num_attempt: int = 10, ): """Creates an instance of RandomErasing. Recommended to use this transform after normalization. The erased region will be filled with the mean value. See `https://arxiv.org/abs/1708.04896`. Args: min_area (float, optional): Minimum area of the erased region. Defaults to 0.02. max_area (float, optional): Maximum area of the erased region. Defaults to 0.4. min_aspect_ratio (float, optional): Minimum aspect ratio of the erased region. Defaults to 0.3. max_aspect_ratio (float, optional): Maximum aspect ratio of the erased region. Defaults to 1 / 0.3. mean (tuple[float, float, float], optional): Mean of the dataset. Defaults to (0.0, 0.0, 0.0). num_attempt (int, optional): Number of maximum attempts to find a valid erased region. This is used to avoid infinite attempts of resampling the region, though such cases are very unlikely to happen. Defaults to 10. Returns: Callable: A function that takes a tensor of shape [N, H, W, C] and returns a tensor of the same shape. """ self.min_area = min_area self.max_area = max_area self.min_aspect_ratio = min_aspect_ratio self.max_aspect_ratio = max_aspect_ratio self.mean = mean self.num_attempt = num_attempt
[docs] def do_erasing(self, images: NDArrayNumber) -> NDArrayNumber: """Execute the random erasing.""" fill = np.array(self.mean) for i in range(images.shape[0]): image = images[i] h, w = image.shape[0:2] area = h * w for _ in range(self.num_attempt): target_area = ( np.random.uniform(self.min_area, self.max_area) * area ) aspect_ratio = np.random.uniform( self.min_aspect_ratio, self.max_aspect_ratio ) h_erase = int(round(np.sqrt(target_area * aspect_ratio))) w_erase = int(round(np.sqrt(target_area / aspect_ratio))) if w_erase < w and h_erase < h: x_erase = np.random.randint(0, w - w_erase) y_erase = np.random.randint(0, h - h_erase) image[ y_erase : y_erase + h_erase, x_erase : x_erase + w_erase, :, ] = fill break return images
[docs] def __call__( self, images_list: list[NDArrayNumber] ) -> list[NDArrayNumber]: """Execute the transform.""" for i, images in enumerate(images_list): images_list[i] = self.do_erasing(images) return images_list