Source code for vis4d.zoo.yolox.data

# pylint: disable=duplicate-code
"""COCO data loading config for YOLOX object detection."""
from __future__ import annotations

from collections.abc import Sequence

from ml_collections import ConfigDict

from vis4d.config import class_config
from vis4d.config.typing import DataConfig
from vis4d.data.const import CommonKeys as K
from vis4d.data.data_pipe import DataPipe, MultiSampleDataPipe
from vis4d.data.datasets.coco import COCO
from vis4d.data.io import DataBackend
from vis4d.data.loader import build_train_dataloader, default_collate
from vis4d.data.transforms.affine import (
    AffineBoxes2D,
    AffineImages,
    GenAffineParameters,
)
from vis4d.data.transforms.base import RandomApply, compose
from vis4d.data.transforms.flip import FlipBoxes2D, FlipImages
from vis4d.data.transforms.mixup import (
    GenMixupParameters,
    MixupBoxes2D,
    MixupImages,
)
from vis4d.data.transforms.mosaic import (
    GenMosaicParameters,
    MosaicBoxes2D,
    MosaicImages,
)
from vis4d.data.transforms.pad import PadImages
from vis4d.data.transforms.photometric import RandomHSV
from vis4d.data.transforms.post_process import PostProcessBoxes2D
from vis4d.data.transforms.resize import (
    GenResizeParameters,
    ResizeBoxes2D,
    ResizeImages,
)
from vis4d.data.transforms.to_tensor import ToTensor
from vis4d.engine.connectors import data_key, pred_key
from vis4d.zoo.base import get_inference_dataloaders_cfg
from vis4d.zoo.base.callable import get_callable_cfg

CONN_COCO_BBOX_EVAL = {
    "coco_image_id": data_key(K.sample_names),
    "pred_boxes": pred_key("boxes"),
    "pred_scores": pred_key("scores"),
    "pred_classes": pred_key("class_ids"),
}

CONN_COCO_MASK_EVAL = {
    "coco_image_id": data_key(K.sample_names),
    "pred_boxes": pred_key("boxes.boxes"),
    "pred_scores": pred_key("boxes.scores"),
    "pred_classes": pred_key("boxes.class_ids"),
    "pred_masks": pred_key("masks"),
}


[docs] def get_train_dataloader( data_root: str, split: str, keys_to_load: Sequence[str], data_backend: None | DataBackend, image_size: tuple[int, int], scaling_ratio_range: tuple[float, float], use_mixup: bool, samples_per_gpu: int, workers_per_gpu: int, ) -> ConfigDict: """Get the default train dataloader for COCO detection.""" # Train Dataset train_dataset_cfg = class_config( COCO, keys_to_load=keys_to_load, data_root=data_root, split=split, remove_empty=False, image_channel_mode="BGR", data_backend=data_backend, ) # Train Preprocessing preprocess_transforms = [ [ class_config(GenMosaicParameters, out_shape=image_size), class_config(MosaicImages, imresize_backend="cv2"), class_config(MosaicBoxes2D), ] ] preprocess_transforms += [ [ class_config( GenAffineParameters, scaling_ratio_range=scaling_ratio_range, border=(-image_size[0] // 2, -image_size[1] // 2), ), class_config(AffineImages, as_int=True), class_config(AffineBoxes2D), ] ] if use_mixup: preprocess_transforms += [ [ class_config( GenMixupParameters, out_shape=image_size, mixup_ratio_dist="const", scale_range=(0.8, 1.6), pad_value=114.0, ), class_config(MixupImages, imresize_backend="cv2"), class_config(MixupBoxes2D), ] ] preprocess_transforms.append( [class_config(PostProcessBoxes2D, min_area=1.0)] ) train_batchprocess_cfg = class_config( compose, transforms=[ class_config(RandomHSV, same_on_batch=False), class_config( RandomApply, transforms=[ class_config(FlipImages), class_config(FlipBoxes2D), ], probability=0.5, same_on_batch=False, ), class_config( GenResizeParameters, shape=image_size, keep_ratio=True, same_on_batch=False, ), class_config(ResizeImages, imresize_backend="cv2"), class_config(ResizeBoxes2D), class_config(PadImages, value=114.0, pad2square=True), class_config(ToTensor), ], ) return class_config( build_train_dataloader, dataset=class_config( MultiSampleDataPipe, datasets=train_dataset_cfg, preprocess_fn=preprocess_transforms, ), samples_per_gpu=samples_per_gpu, workers_per_gpu=workers_per_gpu, batchprocess_fn=train_batchprocess_cfg, collate_fn=get_callable_cfg(default_collate), pin_memory=True, shuffle=True, )
[docs] def get_test_dataloader( data_root: str, split: str, keys_to_load: Sequence[str], data_backend: None | DataBackend, image_size: tuple[int, int], samples_per_gpu: int, workers_per_gpu: int, ) -> ConfigDict: """Get the default test dataloader for COCO detection.""" # Test Dataset test_dataset = class_config( COCO, keys_to_load=keys_to_load, data_root=data_root, split=split, image_channel_mode="BGR", data_backend=data_backend, ) # Test Preprocessing preprocess_transforms = [ class_config(GenResizeParameters, shape=image_size, keep_ratio=True), class_config(ResizeImages, imresize_backend="cv2"), ] test_preprocess_cfg = class_config( compose, transforms=preprocess_transforms ) test_batchprocess_cfg = class_config( compose, transforms=[ class_config(PadImages, value=114.0, pad2square=True), class_config(ToTensor), ], ) # Test Dataset Config test_dataset_cfg = class_config( DataPipe, datasets=test_dataset, preprocess_fn=test_preprocess_cfg ) return get_inference_dataloaders_cfg( datasets_cfg=test_dataset_cfg, batchprocess_cfg=test_batchprocess_cfg, samples_per_gpu=samples_per_gpu, workers_per_gpu=workers_per_gpu, )
[docs] def get_coco_yolox_cfg( data_root: str = "data/coco", train_split: str = "train2017", train_keys_to_load: Sequence[str] = ( K.images, K.boxes2d, K.boxes2d_classes, ), test_split: str = "val2017", test_keys_to_load: Sequence[str] = (K.images, K.original_images), data_backend: None | ConfigDict = None, train_image_size: tuple[int, int] = (640, 640), scaling_ratio_range: tuple[float, float] = (0.1, 2.0), use_mixup: bool = True, test_image_size: tuple[int, int] = (640, 640), samples_per_gpu: int = 2, workers_per_gpu: int = 2, ) -> DataConfig: """Get the default config for COCO detection.""" data = DataConfig() data.train_dataloader = get_train_dataloader( data_root=data_root, split=train_split, keys_to_load=train_keys_to_load, data_backend=data_backend, image_size=train_image_size, scaling_ratio_range=scaling_ratio_range, use_mixup=use_mixup, samples_per_gpu=samples_per_gpu, workers_per_gpu=workers_per_gpu, ) data.test_dataloader = get_test_dataloader( data_root=data_root, split=test_split, keys_to_load=test_keys_to_load, data_backend=data_backend, image_size=test_image_size, samples_per_gpu=1, workers_per_gpu=workers_per_gpu, ) return data