Source code for vis4d.op.box.encoder.delta_xywh

"""XYWH Delta coder for 2D boxes.

Modified from mmdetection (https://github.com/open-mmlab/mmdetection).
"""

from __future__ import annotations

import math

import torch
from torch import Tensor


[docs] class DeltaXYWHBBoxEncoder: """Delta XYWH BBox encoder. Following the practice in `R-CNN <https://arxiv.org/abs/1311.2524>`_, it encodes bbox (x1, y1, x2, y2) into delta (dx, dy, dw, dh). """ def __init__( self, target_means: tuple[float, float, float, float] = (0.0, 0.0, 0.0, 0.0), target_stds: tuple[float, float, float, float] = (1.0, 1.0, 1.0, 1.0), ) -> None: """Creates an instance of the class. Args: target_means (tuple, optional): Denormalizing means of target for delta coordinates. Defaults to (0.0, 0.0, 0.0, 0.0). target_stds (tuple, optional): Denormalizing standard deviation of target for delta coordinates. Defaults to (1.0, 1.0, 1.0, 1.0). """ self.means = target_means self.stds = target_stds
[docs] def __call__(self, boxes: Tensor, targets: Tensor) -> Tensor: """Get box regression transformation deltas. Used to transform target boxes into target regression parameters. Args: boxes (Tensor): Source boxes, e.g., object proposals. targets (Tensor): Target of the transformation, e.g., ground-truth boxes. Returns: Tensor: Box transformation deltas """ assert boxes.size(0) == targets.size(0) assert boxes.size(-1) == targets.size(-1) == 4 encoded_bboxes = bbox2delta(boxes, targets, self.means, self.stds) return encoded_bboxes
[docs] class DeltaXYWHBBoxDecoder: """Delta XYWH BBox decoder. Following the practice in `R-CNN <https://arxiv.org/abs/1311.2524>`_, it decodes delta (dx, dy, dw, dh) back to original bbox (x1, y1, x2, y2). """ def __init__( self, target_means: tuple[float, float, float, float] = (0.0, 0.0, 0.0, 0.0), target_stds: tuple[float, float, float, float] = (1.0, 1.0, 1.0, 1.0), wh_ratio_clip: float = 16 / 1000, ) -> None: """Creates an instance of the class. Args: target_means (tuple, optional): Denormalizing means of target for delta coordinates. Defaults to (0.0, 0.0, 0.0, 0.0). target_stds (tuple, optional): Denormalizing standard deviation of target for delta coordinates. Defaults to (1.0, 1.0, 1.0, 1.0). wh_ratio_clip (float, optional): Maximum aspect ratio for boxes. Defaults to 16/1000. """ self.means = target_means self.stds = target_stds self.wh_ratio_clip = wh_ratio_clip
[docs] def __call__(self, boxes: Tensor, box_deltas: Tensor) -> Tensor: """Apply box offset energies box_deltas to boxes. Args: boxes (Tensor): Basic boxes. Shape (B, N, 4) or (N, 4) box_deltas (Tensor): Encoded offsets with respect to each roi. Has shape (B, N, num_classes * 4) or (B, N, 4) or (N, num_classes * 4) or (N, 4). Note N = num_anchors * W * H when rois is a grid of anchors.Offset encoding follows [1]_. Returns: Tensor: Decoded boxes. """ assert box_deltas.size(0) == boxes.size(0) decoded_boxes = delta2bbox( boxes, box_deltas, self.means, self.stds, self.wh_ratio_clip ) return decoded_boxes
[docs] def bbox2delta( proposals: torch.Tensor, gt_boxes: torch.Tensor, means: tuple[float, float, float, float] = (0.0, 0.0, 0.0, 0.0), stds: tuple[float, float, float, float] = (1.0, 1.0, 1.0, 1.0), ) -> Tensor: """Compute deltas of proposals w.r.t. gt. We usually compute the deltas of x, y, w, h of proposals w.r.t ground truth boxes to get regression target. This is the inverse function of :func:`delta2bbox`. Args: proposals (Tensor): Boxes to be transformed, shape (N, ..., 4). gt_boxes (Tensor): Gt boxes to be used as base, shape (N, ..., 4). means (Sequence[float]): Denormalizing means for delta coordinates. stds (Sequence[float]): Denormalizing standard deviation for delta coordinates. Returns: Tensor: deltas with shape (N, 4), where columns represent dx, dy, dw, dh. """ assert proposals.size() == gt_boxes.size() proposals = proposals.float() gt = gt_boxes.float() px = (proposals[..., 0] + proposals[..., 2]) * 0.5 py = (proposals[..., 1] + proposals[..., 3]) * 0.5 pw = proposals[..., 2] - proposals[..., 0] ph = proposals[..., 3] - proposals[..., 1] gx = (gt[..., 0] + gt[..., 2]) * 0.5 gy = (gt[..., 1] + gt[..., 3]) * 0.5 gw = gt[..., 2] - gt[..., 0] gh = gt[..., 3] - gt[..., 1] dx = (gx - px) / pw dy = (gy - py) / ph dw = torch.log(gw / pw) dh = torch.log(gh / ph) deltas = torch.stack([dx, dy, dw, dh], dim=-1) mean_tensor = torch.tensor(means, dtype=deltas.dtype, device=deltas.device) std_tensor = torch.tensor(stds, dtype=deltas.dtype, device=deltas.device) deltas = deltas.sub_(mean_tensor.view(1, -1)).div_(std_tensor.view(1, -1)) return deltas
[docs] def delta2bbox( rois: torch.Tensor, deltas: torch.Tensor, means: tuple[float, float, float, float] = (0.0, 0.0, 0.0, 0.0), stds: tuple[float, float, float, float] = (1.0, 1.0, 1.0, 1.0), wh_ratio_clip: float = 16 / 1000, ) -> Tensor: """Apply deltas to shift/scale base boxes. Typically the rois are anchor or proposed bounding boxes and the deltas are network outputs used to shift/scale those boxes. This is the inverse function of :func:`bbox2delta`. Args: rois (Tensor): Boxes to be transformed. Has shape (N, 4). deltas (Tensor): Encoded offsets relative to each roi. Has shape (N, num_classes * 4) or (N, 4). Note N = num_base_anchors * W * H, when rois is a grid of anchors. Offset encoding follows [1]_. means (Sequence[float]): Denormalizing means for delta coordinates. Default (0., 0., 0., 0.). stds (Sequence[float]): Denormalizing standard deviation for delta coordinates. Default (1., 1., 1., 1.). wh_ratio_clip (float): Maximum aspect ratio for boxes. Default 16 / 1000. Returns: Tensor: Boxes with shape (N, num_classes * 4) or (N, 4), where 4 represent tl_x, tl_y, br_x, br_y. References: .. [1] https://arxiv.org/abs/1311.2524 """ num_boxes, num_classes = deltas.size(0), deltas.size(1) // 4 if num_boxes == 0: return deltas deltas = deltas.reshape(-1, 4) mean_tensor = torch.tensor(means, dtype=deltas.dtype, device=deltas.device) std_tensor = torch.tensor(stds, dtype=deltas.dtype, device=deltas.device) denorm_deltas = deltas * std_tensor.view(1, -1) + mean_tensor.view(1, -1) dxy = denorm_deltas[:, :2] dwh = denorm_deltas[:, 2:] # Compute width/height of each roi rois_ = rois.repeat(1, num_classes).reshape(-1, 4) pxy = (rois_[:, :2] + rois_[:, 2:]) * 0.5 pwh = rois_[:, 2:] - rois_[:, :2] dxy_wh = pwh * dxy max_ratio = abs(math.log(wh_ratio_clip)) dwh = dwh.clamp(min=-max_ratio, max=max_ratio) gxy = pxy + dxy_wh gwh = pwh * dwh.exp() x1y1 = gxy - (gwh * 0.5) x2y2 = gxy + (gwh * 0.5) boxes = torch.cat([x1y1, x2y2], dim=-1) boxes = boxes.reshape(num_boxes, -1) return boxes