"""Affine transformation.
Modified from mmdetection (https://github.com/open-mmlab/mmdetection).
"""
from __future__ import annotations
import math
import random
from typing import TypedDict
import numpy as np
import torch
from vis4d.common.imports import OPENCV_AVAILABLE
from vis4d.common.typing import NDArrayF32, NDArrayI64
from vis4d.data.const import CommonKeys as K
from vis4d.op.box.box2d import bbox_clip, bbox_project
from .base import Transform
from .crop import _get_keep_mask
if OPENCV_AVAILABLE:
import cv2
else:
raise ImportError("Please install opencv-python to use this module.")
[docs]
class AffineParam(TypedDict):
"""Parameters for Affine."""
warp_matrix: NDArrayF32
height: int
width: int
[docs]
def get_rotation_matrix(rotate_degrees: float) -> NDArrayF32:
"""Generate rotation matrix.
Args:
rotate_degrees (float): Rotation degrees.
"""
radian = math.radians(rotate_degrees)
rotation_matrix = np.array(
[
[np.cos(radian), -np.sin(radian), 0.0],
[np.sin(radian), np.cos(radian), 0.0],
[0.0, 0.0, 1.0],
],
dtype=np.float32,
)
return rotation_matrix
[docs]
def get_scaling_matrix(scale_ratio: float) -> NDArrayF32:
"""Generate scaling matrix.
Args:
scale_ratio (float): Scale ratio.
"""
scaling_matrix = np.array(
[[scale_ratio, 0.0, 0.0], [0.0, scale_ratio, 0.0], [0.0, 0.0, 1.0]],
dtype=np.float32,
)
return scaling_matrix
[docs]
def get_shear_matrix(
x_shear_degrees: float, y_shear_degrees: float
) -> NDArrayF32:
"""Generate shear matrix.
Args:
x_shear_degrees (float): X shear degrees.
y_shear_degrees (float): Y shear degrees.
"""
x_radian = math.radians(x_shear_degrees)
y_radian = math.radians(y_shear_degrees)
shear_matrix = np.array(
[
[1, np.tan(x_radian), 0.0],
[np.tan(y_radian), 1, 0.0],
[0.0, 0.0, 1.0],
],
dtype=np.float32,
)
return shear_matrix
[docs]
def get_translation_matrix(x_trans: float, y_trans: float) -> NDArrayF32:
"""Generate translation matrix.
Args:
x_trans (float): X translation.
y_trans (float): Y translation.
"""
translation_matrix = np.array(
[[1, 0.0, x_trans], [0.0, 1, y_trans], [0.0, 0.0, 1.0]],
dtype=np.float32,
)
return translation_matrix
[docs]
@Transform(K.input_hw, ["transforms.affine"])
class GenAffineParameters:
"""Random affine transform data augmentation.
This operation randomly generates affine transform matrix which including
rotation, translation, shear, and scaling transforms.
"""
def __init__(
self,
max_rotate_degree: float = 10.0,
max_translate_ratio: float = 0.1,
scaling_ratio_range: tuple[float, float] = (0.5, 1.5),
max_shear_degree: float = 2.0,
border: tuple[int, int] = (0, 0),
) -> None:
"""Creates an instance of the class.
Args:
max_rotate_degree (float): Maximum degrees of rotation transform.
Defaults to 10.
max_translate_ratio (float): Maximum ratio of translation.
Defaults to 0.1.
scaling_ratio_range (tuple[float]): Min and max ratio of
scaling transform. Defaults to (0.5, 1.5).
max_shear_degree (float): Maximum degrees of shear
transform. Defaults to 2.
border (tuple[int, int]): Distance from height and width sides of
input image to adjust output shape. Only used in mosaic
dataset. Defaults to (0, 0).
"""
assert 0 <= max_translate_ratio <= 1
assert scaling_ratio_range[0] <= scaling_ratio_range[1]
assert scaling_ratio_range[0] > 0
self.max_rotate_degree = max_rotate_degree
self.max_translate_ratio = max_translate_ratio
self.scaling_ratio_range = scaling_ratio_range
self.max_shear_degree = max_shear_degree
self.border = border
def _get_random_homography_matrix(
self, height: int, width: int
) -> NDArrayF32:
"""Generate random homography matrix."""
# Rotation
rotation_degree = random.uniform(
-self.max_rotate_degree, self.max_rotate_degree
)
rotation_matrix = get_rotation_matrix(rotation_degree)
# Scaling
scaling_ratio = random.uniform(
self.scaling_ratio_range[0], self.scaling_ratio_range[1]
)
scaling_matrix = get_scaling_matrix(scaling_ratio)
# Shear
x_degree = random.uniform(
-self.max_shear_degree, self.max_shear_degree
)
y_degree = random.uniform(
-self.max_shear_degree, self.max_shear_degree
)
shear_matrix = get_shear_matrix(x_degree, y_degree)
# Translation
trans_x = (
random.uniform(-self.max_translate_ratio, self.max_translate_ratio)
* width
)
trans_y = (
random.uniform(-self.max_translate_ratio, self.max_translate_ratio)
* height
)
translate_matrix = get_translation_matrix(trans_x, trans_y)
warp_matrix = (
translate_matrix @ shear_matrix @ rotation_matrix @ scaling_matrix
)
return warp_matrix
[docs]
def __call__(self, input_hw: list[tuple[int, int]]) -> list[AffineParam]:
"""Compute the parameters and put them in the data dict."""
img_shape = input_hw[0]
height = img_shape[0] + self.border[0] * 2
width = img_shape[1] + self.border[1] * 2
warp_matrix = self._get_random_homography_matrix(height, width)
return [
AffineParam(warp_matrix=warp_matrix, height=height, width=width)
] * len(input_hw)
[docs]
@Transform(
[
K.images,
"transforms.affine.warp_matrix",
"transforms.affine.height",
"transforms.affine.width",
],
[K.images, K.input_hw],
)
class AffineImages:
"""Affine Images."""
def __init__(
self,
border_val: tuple[int, int, int] = (114, 114, 114),
as_int: bool = False,
) -> None:
"""Creates an instance of the class.
Args:
border_val (tuple[int, int, int]): Border padding values of 3
channels. Defaults to (114, 114, 114).
as_int (bool): Whether to convert the image to int. Defaults to
False.
"""
self.border_val = border_val
self.as_int = as_int
[docs]
def __call__(
self,
images: list[NDArrayF32],
warp_matrix_list: list[NDArrayF32],
height_list: list[int],
width_list: list[int],
) -> tuple[list[NDArrayF32], list[tuple[int, int]]]:
"""Affine a list of image of dimensions [N, H, W, C]."""
input_hw_list = []
for i, (image, warp_matrix, height, width) in enumerate(
zip(images, warp_matrix_list, height_list, width_list)
):
image = image[0].astype(np.uint8) if self.as_int else image[0]
image = cv2.warpPerspective( # pylint: disable=no-member, unsubscriptable-object, line-too-long
image,
warp_matrix,
dsize=(width, height),
borderValue=self.border_val,
)[
None, ...
].astype(
np.float32
)
images[i] = image
input_hw_list.append((height, width))
return images, input_hw_list
[docs]
@Transform(
in_keys=[
K.boxes2d,
K.boxes2d_classes,
K.boxes2d_track_ids,
"transforms.affine.warp_matrix",
"transforms.affine.height",
"transforms.affine.width",
],
out_keys=[K.boxes2d, K.boxes2d_classes, K.boxes2d_track_ids],
)
class AffineBoxes2D:
"""Apply Affine to a list of 2D bounding boxes."""
def __init__(self, bbox_clip_border: bool = True) -> None:
"""Creates an instance of the class.
Args:
bbox_clip_border (bool, optional): Whether to clip the objects
outside the border of the image. In some dataset like MOT17,
the gt bboxes are allowed to cross the border of images.
Therefore, we don't need to clip the gt bboxes in these cases.
Defaults to True.
"""
self.bbox_clip_border = bbox_clip_border
[docs]
def __call__(
self,
boxes: list[NDArrayF32],
classes: list[NDArrayI64],
track_ids: list[NDArrayI64] | None,
warp_matrix_list: list[NDArrayF32],
height_list: list[int],
width_list: list[int],
) -> tuple[list[NDArrayF32], list[NDArrayI64], list[NDArrayI64] | None]:
"""Apply Affine to 2D bounding boxes."""
for i, (box, class_, warp_matrix, height, width) in enumerate(
zip(
boxes,
classes,
warp_matrix_list,
height_list,
width_list,
)
):
box_ = bbox_project(
torch.from_numpy(box), torch.from_numpy(warp_matrix)
)
if self.bbox_clip_border:
box_ = bbox_clip(box_, (height, width))
boxes[i] = box_.numpy()
keep_mask = _get_keep_mask(
boxes[i], np.array([0, 0, width, height])
)
boxes[i] = boxes[i][keep_mask]
classes[i] = class_[keep_mask]
if track_ids is not None:
track_ids[i] = track_ids[i][keep_mask]
return boxes, classes, track_ids