Source code for vis4d.data.datasets.s3dis

"""Stanford 3D indoor dataset."""

from __future__ import annotations

import copy
import glob
import os
from collections.abc import Sequence
from io import BytesIO

import numpy as np
import pandas as pd
import torch

from vis4d.common.typing import ArgsType, DictStrAny
from vis4d.data.const import CommonKeys as K
from vis4d.data.typing import DictData

from .base import Dataset
from .util import CacheMappingMixin


[docs] class S3DIS(CacheMappingMixin, Dataset): """S3DIS dataset class.""" DESCRIPTION = """S3DIS is a large-scale indoor pointcloud dataset.""" HOMEPAGE = "https://buildingparser.stanford.edu/dataset.html" PAPER = ( "https://openaccess.thecvf.com/content_cvpr_2016/papers/" "Armeni_3D_Semantic_Parsing_CVPR_2016_paper.pdf" ) LICENSE = "CC BY-NC-SA 4.0" KEYS = [ K.points3d, K.colors3d, K.semantics3d, K.instances3d, ] CLASS_NAME_TO_IDX = { "ceiling": 0, "floor": 1, "wall": 2, "beam": 3, "column": 4, "window": 5, "door": 6, "chair": 7, "table": 8, "bookcase": 9, "sofa": 10, "board": 11, "clutter": 12, } CLASS_COUNTS = torch.Tensor( [ 3370714, 2856755, 4919229, 318158, 375640, 478001, 974733, 650464, 791496, 88727, 1284130, 229758, 2272837, ] ) AVAILABLE_KEYS: Sequence[str] = ( K.points3d, K.colors3d, K.semantics3d, K.instances3d, ) COLOR_MAPPING = torch.tensor( [ [152, 223, 138], [31, 119, 180], [188, 189, 34], [140, 86, 75], [255, 152, 150], [214, 39, 40], [197, 176, 213], [23, 190, 207], [178, 76, 76], [247, 182, 210], [66, 188, 102], [219, 219, 141], [140, 57, 197], [202, 185, 52], ] ) def __init__( self, data_root: str, split: str = "trainNoArea5", keys_to_load: Sequence[str] = ( K.points3d, K.colors3d, K.semantics3d, K.instances3d, ), cache_points: bool = True, cache_as_binary: bool = False, cached_file_path: str | None = None, **kwargs: ArgsType, ) -> None: """Creates a new S3DIS dataset. Args: data_root (str): Path to S3DIS folder split (str): which split to load. Must either be trainNoArea[1-6] or testArea[1-6]. e.g. trainNoArea5 will load all areas except area 5 and testArea5 will only load area 5. keys_to_load (list[str]): What kind of data should be loaded (e.g. colors, xyz, semantics, ...) cache_points (bool): If true caches loaded points instead of reading them from the disk every time. cache_as_binary (bool): Whether to cache the dataset as binary. Default: False. cached_file_path (str | None): Path to a cached file. If cached file exist then it will load it instead of generating the data mapping. Default: None. Raises: ValueError: If requested split is malformed. """ super().__init__(**kwargs) self.data_root = data_root self.split = split self.areas: list[str] = [ "Area_1", "Area_2", "Area_3", "Area_4", "Area_5", "Area_6", ] area_number = int(self.split.split("Area")[-1]) if "trainNoArea" in self.split: self.areas.remove(self.areas[area_number - 1]) elif "testArea" in self.split: self.areas = [self.areas[area_number - 1]] else: raise ValueError("Unknown split: ", self.split) self.data, _ = self._load_mapping( self._generate_data_mapping, cache_as_binary=cache_as_binary, cached_file_path=cached_file_path, ) self.keys_to_load = keys_to_load # Cache self.cache_points = cache_points self._cache: dict[int, DictData] = {} @property def num_classes(self) -> int: """The number of classes int he datset.""" return len(S3DIS.CLASS_NAME_TO_IDX)
[docs] def __repr__(self) -> str: """Concise representation of the dataset.""" return f"S3DIS(root={self.data_root}, split={self.split})"
def _generate_data_mapping(self) -> list[DictStrAny]: """Generate 3dis dataset mapping.""" data: list[DictStrAny] = [] for area in self.areas: for room_path in glob.glob( os.path.join(self.data_root, area + "/*") ): room_data: DictStrAny = {} if not os.path.isdir(room_path): continue for anns in glob.glob( os.path.join(room_path, "Annotations/*.txt") ): instance_id = os.path.basename(anns.replace(".txt", "")) sem_name = instance_id.split("_")[0] room_data[instance_id] = { "class_label": S3DIS.CLASS_NAME_TO_IDX.get( sem_name, 12 ), "path": anns, } data.append(room_data) return data
[docs] def __len__(self) -> int: """Length of the datset.""" return len(self.data)
[docs] def __getitem__(self, idx: int) -> DictData: """Transform s3dis sample to vis4d input format. Returns: coordinates: 3D Poitns coordinate Shape(n x 3) colors: 3D Point colors Shape(n x 3) Semantic Classes: 3D Point classes Shape(n x 1) Raises: ValueError: If a requested key does not exist in this dataset. """ data = self.data[idx] # Cache data if self.cache_points and idx in self._cache: return copy.deepcopy(self._cache[idx]) coords = np.zeros((0, 3), dtype=np.float32) color = np.zeros((0, 3), dtype=np.float32) semantic_ids = np.zeros((0, 1), dtype=int) instance_ids = np.zeros((0, 1), dtype=int) for values in data.values(): data_path = values["path"] instance_id = int( values["path"].split("_")[-1].replace(".txt", "") ) np_data = pd.read_csv( BytesIO(self.data_backend.get(data_path)), header=None, delimiter=" ", ).values.astype(np.float32) if K.points3d in self.keys_to_load: coords = np.vstack([coords, np_data[:, :3]]) if K.colors3d in self.keys_to_load: color = np.vstack([color, np_data[:, 3:]]) if K.semantics3d in self.keys_to_load: semantic_ids = np.vstack( [ semantic_ids, np.ones((np_data.shape[0], 1), dtype=int) * values["class_label"], ] ) if K.instances3d in self.keys_to_load: instance_ids = np.vstack( [ instance_ids, np.ones((np_data.shape[0], 1), dtype=int) * instance_id, ] ) data = {} for key in self.keys_to_load: if key == K.points3d: data[key] = coords elif key == K.colors3d: data[key] = color / 255.0 elif key == K.semantics3d: data[key] = semantic_ids.squeeze(-1) elif key == K.instances3d: data[key] = instance_ids.squeeze(-1) else: raise ValueError(f"Can not load data for key: {key}") if self.cache_points: self._cache[idx] = copy.deepcopy(data) return data