Source code for vis4d.op.base.dla

"""DLA base model."""

from __future__ import annotations

import torch
from torch import Tensor, nn

from .base import BaseModel

BN_MOMENTUM = 0.1
DLA_MODEL_PREFIX = "http://dl.yf.io/dla/models/"
DLA_MODEL_MAPPING = {
    "dla34": "dla34-ba72cf86.pth",
    "dla46_c": "dla46_c-2bfd52c3.pth",
    "dla46x_c": "dla46x_c-d761bae7.pth",
    "dla60x_c": "dla60x_c-b870c45c.pth",
    "dla60": "dla60-24839fc4.pth",
    "dla60x": "dla60x-d15cacda.pth",
    "dla102": "dla102-d94d9790.pth",
    "dla102x": "dla102x-ad62be81.pth",
    "dla102x2": "dla102x2-262837b6.pth",
    "dla169": "dla169-0914e092.pth",
}
DLA_ARCH_SETTINGS = {  # pylint: disable=consider-using-namedtuple-or-dataclass
    "dla34": (
        (1, 1, 1, 2, 2, 1),
        (16, 32, 64, 128, 256, 512),
        False,
        "BasicBlock",
    ),
    "dla46_c": (
        (1, 1, 1, 2, 2, 1),
        (16, 32, 64, 64, 128, 256),
        False,
        "Bottleneck",
    ),
    "dla46x_c": (
        (1, 1, 1, 2, 2, 1),
        (16, 32, 64, 64, 128, 256),
        False,
        "BottleneckX",
    ),
    "dla60x_c": (
        (1, 1, 1, 2, 3, 1),
        (16, 32, 64, 64, 128, 256),
        False,
        "BottleneckX",
    ),
    "dla60": (
        (1, 1, 1, 2, 3, 1),
        (16, 32, 128, 256, 512, 1024),
        False,
        "Bottleneck",
    ),
    "dla60x": (
        (1, 1, 1, 2, 3, 1),
        (16, 32, 128, 256, 512, 1024),
        False,
        "BottleneckX",
    ),
    "dla102": (
        (1, 1, 1, 3, 4, 1),
        (16, 32, 128, 256, 512, 1024),
        True,
        "Bottleneck",
    ),
    "dla102x": (
        (1, 1, 1, 3, 4, 1),
        (16, 32, 128, 256, 512, 1024),
        True,
        "BottleneckX",
    ),
    "dla102x2": (
        (1, 1, 1, 3, 4, 1),
        (16, 32, 128, 256, 512, 1024),
        True,
        "BottleneckX",
    ),
    "dla169": (
        (1, 1, 2, 3, 5, 1),
        (16, 32, 128, 256, 512, 1024),
        True,
        "Bottleneck",
    ),
}



[docs]
class BasicBlock(nn.Module):
    """BasicBlock."""

    def __init__(
        self, inplanes: int, planes: int, stride: int = 1, dilation: int = 1
    ) -> None:
        """Creates an instance of the class."""
        super().__init__()
        self.conv1 = nn.Conv2d(
            inplanes,
            planes,
            kernel_size=3,
            stride=stride,
            padding=dilation,
            bias=False,
            dilation=dilation,
        )
        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(
            planes,
            planes,
            kernel_size=3,
            stride=1,
            padding=dilation,
            bias=False,
            dilation=dilation,
        )
        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
        self.stride = stride


[docs]
    def forward(
        self, input_x: Tensor, residual: None | Tensor = None
    ) -> Tensor:
        """Forward."""
        if residual is None:
            residual = input_x

        out = self.conv1(input_x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += residual
        out = self.relu(out)

        return out





[docs]
class Bottleneck(nn.Module):
    """Bottleneck."""

    expansion = 2

    def __init__(
        self, inplanes: int, planes: int, stride: int = 1, dilation: int = 1
    ) -> None:
        """Creates an instance of the class."""
        super().__init__()
        expansion = Bottleneck.expansion
        bottle_planes = planes // expansion
        self.conv1 = nn.Conv2d(
            inplanes, bottle_planes, kernel_size=1, bias=False
        )
        self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
        self.conv2 = nn.Conv2d(
            bottle_planes,
            bottle_planes,
            kernel_size=3,
            stride=stride,
            padding=dilation,
            bias=False,
            dilation=dilation,
        )
        self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
        self.conv3 = nn.Conv2d(
            bottle_planes, planes, kernel_size=1, bias=False
        )
        self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
        self.relu = nn.ReLU(inplace=True)
        self.stride = stride


[docs]
    def forward(
        self, input_x: Tensor, residual: None | Tensor = None
    ) -> Tensor:
        """Forward."""
        if residual is None:
            residual = input_x

        out = self.conv1(input_x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += residual
        out = self.relu(out)

        return out





[docs]
class BottleneckX(nn.Module):
    """BottleneckX."""

    expansion = 2
    cardinality = 32

    def __init__(
        self, inplanes: int, planes: int, stride: int = 1, dilation: int = 1
    ) -> None:
        """Creates an instance of the class."""
        super().__init__()
        cardinality = BottleneckX.cardinality
        bottle_planes = planes * cardinality // 32
        self.conv1 = nn.Conv2d(
            inplanes, bottle_planes, kernel_size=1, bias=False
        )
        self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
        self.conv2 = nn.Conv2d(
            bottle_planes,
            bottle_planes,
            kernel_size=3,
            stride=stride,
            padding=dilation,
            bias=False,
            dilation=dilation,
            groups=cardinality,
        )
        self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
        self.conv3 = nn.Conv2d(
            bottle_planes, planes, kernel_size=1, bias=False
        )
        self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
        self.relu = nn.ReLU(inplace=True)
        self.stride = stride


[docs]
    def forward(
        self, input_x: Tensor, residual: None | Tensor = None
    ) -> Tensor:
        """Forward."""
        if residual is None:
            residual = input_x

        out = self.conv1(input_x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += residual
        out = self.relu(out)

        return out





[docs]
class Root(nn.Module):
    """Root."""

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        kernel_size: int,
        residual: bool,
    ) -> None:
        """Creates an instance of the class."""
        super().__init__()
        self.conv = nn.Conv2d(
            in_channels,
            out_channels,
            1,
            stride=1,
            bias=False,
            padding=(kernel_size - 1) // 2,
        )
        self.bn1 = nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)
        self.relu = nn.ReLU(inplace=True)
        self.residual = residual


[docs]
    def forward(self, *input_x: Tensor) -> Tensor:
        """Forward."""
        children = input_x
        feats = self.conv(torch.cat(input_x, 1))
        feats = self.bn1(feats)
        if self.residual:
            feats += children[0]
        feats = self.relu(feats)

        return feats





[docs]
class Tree(nn.Module):
    """Tree."""

    def __init__(  # pylint: disable=too-many-arguments
        self,
        levels: int,
        block: str,
        in_channels: int,
        out_channels: int,
        stride: int = 1,
        level_root: bool = False,
        root_dim: int = 0,
        root_kernel_size: int = 1,
        dilation: int = 1,
        root_residual: bool = False,
    ) -> None:
        """Creates an instance of the class."""
        super().__init__()
        if block == "BasicBlock":
            block_c = BasicBlock
        elif block == "Bottleneck":
            block_c = Bottleneck  # type: ignore
        elif block == "BottleneckX":
            block_c = BottleneckX  # type: ignore
        else:
            raise ValueError(f"Block={block} not yet supported in DLA!")
        if root_dim == 0:
            root_dim = 2 * out_channels
        if level_root:
            root_dim += in_channels
        if levels == 1:
            self.tree1: Tree | BasicBlock = block_c(
                in_channels, out_channels, stride, dilation=dilation
            )
            self.tree2: Tree | BasicBlock = block_c(
                out_channels, out_channels, 1, dilation=dilation
            )
            self.root = Root(
                root_dim, out_channels, root_kernel_size, root_residual
            )
        else:
            self.tree1 = Tree(
                levels - 1,
                block,
                in_channels,
                out_channels,
                stride,
                root_dim=0,
                root_kernel_size=root_kernel_size,
                dilation=dilation,
                root_residual=root_residual,
            )
            self.tree2 = Tree(
                levels - 1,
                block,
                out_channels,
                out_channels,
                root_dim=root_dim + out_channels,
                root_kernel_size=root_kernel_size,
                dilation=dilation,
                root_residual=root_residual,
            )
        self.level_root = level_root
        self.root_dim = root_dim
        self.downsample = None
        self.project = None
        self.levels = levels
        if stride > 1:
            self.downsample = nn.MaxPool2d(stride, stride=stride)
        if in_channels != out_channels and levels == 1:
            # NOTE the official impl/weights have project layers in levels > 1
            # case that are never used, hence 'levels == 1' is added but
            # pretrained models will need strict=False while loading.
            self.project = nn.Sequential(
                nn.Conv2d(
                    in_channels,
                    out_channels,
                    kernel_size=1,
                    stride=1,
                    bias=False,
                ),
                nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM),
            )


[docs]
    def forward(
        self,
        input_x: Tensor,
        residual: None | Tensor = None,
        children: None | list[Tensor] = None,
    ) -> Tensor:
        """Forward."""
        children = [] if children is None else children
        bottom = self.downsample(input_x) if self.downsample else input_x
        residual = self.project(bottom) if self.project else bottom
        if self.level_root:
            children.append(bottom)
        input_x1 = self.tree1(input_x, residual)
        if self.levels == 1:
            input_x2 = self.tree2(input_x1)
            input_x = self.root(input_x2, input_x1, *children)
        else:
            children.append(input_x1)
            input_x = self.tree2(input_x1, children=children)
        return input_x





[docs]
class DLA(BaseModel):
    """DLA base model."""

    def __init__(
        self,
        name: None | str = None,
        levels: tuple[int, int, int, int, int, int] = (1, 1, 1, 2, 2, 1),
        channels: tuple[int, int, int, int, int, int] = (
            16,
            32,
            64,
            128,
            256,
            512,
        ),
        block: str = "BasicBlock",
        residual_root: bool = False,
        cardinality: int = 32,
        weights: None | str = None,
        style: str = "imagenet",
    ) -> None:
        """Creates an instance of the class."""
        super().__init__()
        if name is not None:
            assert name in DLA_ARCH_SETTINGS
            arch_setting = DLA_ARCH_SETTINGS[name]
            levels, channels, residual_root, block = arch_setting
            if name == "dla102x2":  # pragma: no cover
                BottleneckX.cardinality = 64
        else:
            BottleneckX.cardinality = cardinality
        self.base_layer = nn.Sequential(
            nn.Conv2d(
                3, channels[0], kernel_size=7, stride=1, padding=3, bias=False
            ),
            nn.BatchNorm2d(channels[0], momentum=BN_MOMENTUM),
            nn.ReLU(inplace=True),
        )
        self.level0 = self._make_conv_level(
            channels[0], channels[0], levels[0]
        )
        self.level1 = self._make_conv_level(
            channels[0], channels[1], levels[1], stride=2
        )
        self.level2 = Tree(
            levels[2],
            block,
            channels[1],
            channels[2],
            2,
            level_root=False,
            root_residual=residual_root,
        )
        self.level3 = Tree(
            levels[3],
            block,
            channels[2],
            channels[3],
            2,
            level_root=True,
            root_residual=residual_root,
        )
        self.level4 = Tree(
            levels[4],
            block,
            channels[3],
            channels[4],
            2,
            level_root=True,
            root_residual=residual_root,
        )
        self.level5 = Tree(
            levels[5],
            block,
            channels[4],
            channels[5],
            2,
            level_root=True,
            root_residual=residual_root,
        )

        self._out_channels = list(channels)

        if weights is not None:  # pragma: no cover
            if weights.startswith("dla://"):
                weights_name = weights.split("dla://")[-1]
                assert weights_name in DLA_MODEL_MAPPING
                weights = (
                    f"{DLA_MODEL_PREFIX}{style}/"
                    f"{DLA_MODEL_MAPPING[weights_name]}"
                )
            self.load_pretrained_model(weights)

    @staticmethod
    def _make_conv_level(
        inplanes: int,
        planes: int,
        convs: int,
        stride: int = 1,
        dilation: int = 1,
    ) -> nn.Sequential:
        """Build convolutional level."""
        modules = []
        for i in range(convs):
            modules.extend(
                [
                    nn.Conv2d(
                        inplanes,
                        planes,
                        kernel_size=3,
                        stride=stride if i == 0 else 1,
                        padding=dilation,
                        bias=False,
                        dilation=dilation,
                    ),
                    nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),
                    nn.ReLU(inplace=True),
                ]
            )
            inplanes = planes
        return nn.Sequential(*modules)


[docs]
    def load_pretrained_model(self, weights: str) -> None:
        """Load pretrained weights."""
        if weights.startswith("http://") or weights.startswith("https://"):
            model_weights = torch.hub.load_state_dict_from_url(weights)
        else:  # pragma: no cover
            model_weights = torch.load(weights)
        self.load_state_dict(model_weights, strict=False)



[docs]
    def forward(self, images: Tensor) -> list[Tensor]:
        """DLA forward.

        Args:
            images (Tensor[N, C, H, W]): Image input to process. Expected to
                type float32 with values ranging 0..255.

        Returns:
            fp (list[Tensor]): The output feature pyramid. The list index
            represents the level, which has a downsampling raio of 2^index.
            fp[0] is a feature map with the image resolution instead of the
            original image.
        """
        input_x = self.base_layer(images)
        outs: list[Tensor] = []
        for i in range(6):
            input_x = getattr(self, f"level{i}")(input_x)
            outs.append(input_x)
        return outs


    @property
    def out_channels(self) -> list[int]:
        """Get the numbers of channels for each level of feature pyramid.

        Returns:
            list[int]: number of channels
        """
        return self._out_channels