Source code for vis4d.op.base.pointnetpp

"""Pointnet++ implementation.

based on https://github.com/yanx27/Pointnet_Pointnet2_pytorch
Added typing and named tuples for convenience.

#TODO write tests
"""

from __future__ import annotations

from collections.abc import Callable
from typing import NamedTuple

import torch
import torch.nn.functional as F
from torch import Tensor, nn


[docs] class PointNetSetAbstractionOut(NamedTuple): """Ouput of PointNet set abstraction.""" coordinates: Tensor # [B, C, S] features: Tensor # [B, D', S]
[docs] def square_distance(src: Tensor, dst: Tensor) -> Tensor: """Calculate Euclid distance between each two points. src^T * dst = xn * xm + yn * ym + zn * zm; sum(src^2, dim=-1) = xn*xn + yn*yn + zn*zn; sum(dst^2, dim=-1) = xm*xm + ym*ym + zm*zm; dist = (xn - xm)^2 + (yn - ym)^2 + (zn - zm)^2 = sum(src**2,dim=-1)+sum(dst**2,dim=-1)-2*src^T*dst Input: src: source points, [B, N, C] dst: target points, [B, M, C] Output: dist: per-point square distance, [B, N, M] """ bs, n_pts_in, _ = src.shape _, n_pts_out, _ = dst.shape dist = -2 * torch.matmul(src, dst.permute(0, 2, 1)) dist += torch.sum(src**2, -1).view(bs, n_pts_in, 1) dist += torch.sum(dst**2, -1).view(bs, 1, n_pts_out) return dist
[docs] def index_points(points: Tensor, idx: Tensor) -> Tensor: """Indexes points. Input: points: input points data, [B, N, C] idx: sample index data, [B, S] Return: new_points:, indexed points data, [B, S, C] """ device = points.device bs = points.shape[0] view_shape = list(idx.shape) view_shape[1:] = [1] * (len(view_shape) - 1) repeat_shape = list(idx.shape) repeat_shape[0] = 1 batch_indices = ( torch.arange(bs, dtype=torch.long) .to(device) .view(view_shape) .repeat(repeat_shape) ) new_points = points[batch_indices, idx, :] return new_points
[docs] def farthest_point_sample(xyz: Tensor, npoint: int) -> Tensor: """Farthest point sampling. Input: xyz: pointcloud data, [B, N, 3] npoint: number of samples Return: centroids: sampled pointcloud index, [B, npoint] """ device = xyz.device bs, n_pts, _ = xyz.shape centroids = torch.zeros(bs, npoint, dtype=torch.long).to(device) distance = torch.ones(bs, n_pts).to(device) * 1e10 farthest = torch.randint(0, n_pts, (bs,), dtype=torch.long).to(device) batch_indices = torch.arange(bs, dtype=torch.long).to(device) for i in range(npoint): centroids[:, i] = farthest centroid = xyz[batch_indices, farthest, :].view(bs, 1, 3) dist = torch.sum((xyz - centroid) ** 2, -1) mask = dist < distance distance[mask] = dist[mask] farthest = torch.max(distance, -1)[1] return centroids
[docs] def query_ball_point( radius: float, nsample: int, xyz: Tensor, new_xyz: Tensor ) -> Tensor: """Query around a ball with given radius. Input: radius: local region radius nsample: max sample number in local region xyz: all points, [B, N, 3] new_xyz: query points, [B, S, 3] Return: group_idx: grouped points index, [B, S, nsample] """ device = xyz.device bs, n_pts_in, _ = xyz.shape _, n_pts_out, _ = new_xyz.shape group_idx = ( torch.arange(n_pts_in, dtype=torch.long) .to(device) .view(1, 1, n_pts_in) .repeat([bs, n_pts_out, 1]) ) sqrdists = square_distance(new_xyz, xyz) group_idx[sqrdists > radius**2] = n_pts_in group_idx = group_idx.sort(dim=-1)[0][:, :, :nsample] group_first = ( group_idx[:, :, 0].view(bs, n_pts_out, 1).repeat([1, 1, nsample]) ) mask = group_idx == n_pts_in group_idx[mask] = group_first[mask] return group_idx
[docs] def sample_and_group( npoint: int, radius: float, nsample: int, xyz: Tensor, points: Tensor, ) -> tuple[Tensor, Tensor]: """Samples and groups. Input: npoint: Number of center to sample radius: Grouping Radius nsample: Max number of points to sample for each circle xyz: input points position data, [B, N, 3] points: input points data, [B, N, D] Return: new_xyz: sampled points position data, [B, npoint, nsample, 3] new_points: sampled points data, [B, npoint, nsample, 3+D] """ bs, _, channels = xyz.shape fps_idx = farthest_point_sample(xyz, npoint) # [B, npoint, C] new_xyz = index_points(xyz, fps_idx) idx = query_ball_point(radius, nsample, xyz, new_xyz) grouped_xyz = index_points(xyz, idx) # [B, npoint, nsample, C] grouped_xyz_norm = grouped_xyz - new_xyz.view(bs, npoint, 1, channels) if points is not None: grouped_points = index_points(points, idx) new_points = torch.cat( [grouped_xyz_norm, grouped_points], dim=-1 ) # [B, npoint, nsample, C+D] else: new_points = grouped_xyz_norm return new_xyz, new_points
[docs] def sample_and_group_all(xyz: Tensor, points: Tensor) -> tuple[Tensor, Tensor]: """Sample and groups all. Input: xyz: input points position data, [B, N, 3] points: input points data, [B, N, D] Return: new_xyz: sampled points position data, [B, 1, 3] new_points: sampled points data, [B, 1, N, 3+D] """ device = xyz.device bs, n_pts, channels = xyz.shape new_xyz = torch.zeros(bs, 1, channels).to(device) grouped_xyz = xyz.view(bs, 1, n_pts, channels) if points is not None: new_points = torch.cat( [grouped_xyz, points.view(bs, 1, n_pts, -1)], dim=-1 ) else: new_points = grouped_xyz return new_xyz, new_points
[docs] class PointNetSetAbstraction(nn.Module): """PointNet set abstraction layer.""" def __init__( self, npoint: int, radius: float, nsample: int, in_channel: int, mlp: list[int], group_all: bool, norm_cls: str | None = "BatchNorm2d", ): """Set Abstraction Layer from the Pointnet Architecture. Args: npoint: How many points to sample radius: Size of the ball query nsample: Max number of points to group inside circle in_channel: Input channel dimension mlp: Input channel dimension of the mlp layers. E.g. [32 , 32, 64] will use a MLP with three layers group_all: If true, groups all point inside the ball, otherwise samples 'nsample' points. norm_cls (Optional(str)): class for norm (nn.'norm_cls') or None """ super().__init__() self.npoint = npoint self.radius = radius self.nsample = nsample self.mlp_convs = nn.ModuleList() self.mlp_bns = nn.ModuleList() last_channel = in_channel # Create norms norm_fn: Callable[[int], nn.Module] = ( getattr(nn, norm_cls) if norm_cls is not None else None ) for out_channel in mlp: self.mlp_convs.append(nn.Conv2d(last_channel, out_channel, 1)) if norm_cls is not None: self.mlp_bns.append(norm_fn(out_channel)) last_channel = out_channel self.group_all = group_all
[docs] def __call__( self, coordinates: Tensor, features: Tensor ) -> PointNetSetAbstractionOut: """Call function. Input: coordinates: input points position data, [B, C, N] features: input points data, [B, D, N] Return: PointNetSetAbstractionOut with: coordinates: sampled points position data, [B, C, S] features: sample points feature data, [B, D', S] """ return self._call_impl(coordinates, features)
[docs] def forward( self, xyz: Tensor, points: Tensor ) -> PointNetSetAbstractionOut: """Pointnet++ set abstraction layer forward. Input: xyz: input points position data, [B, C, N] points: input points data, [B, D, N] Return: PointNetSetAbstractionOut with: coordinates: sampled points position data, [B, C, S] features: sample points feature data, [B, D', S] """ xyz = xyz.permute(0, 2, 1) if points is not None: points = points.permute(0, 2, 1) if self.group_all: new_xyz, new_points = sample_and_group_all(xyz, points) else: new_xyz, new_points = sample_and_group( self.npoint, self.radius, self.nsample, xyz, points ) # new_xyz: sampled points position data, [B, npoint, C] # new_points: sampled points data, [B, npoint, nsample, C+D] new_points = new_points.permute(0, 3, 2, 1) # [B, C+D, nsample,npoint] for i, conv in enumerate(self.mlp_convs): bn = self.mlp_bns[i] if len(self.mlp_bns) != 0 else lambda x: x new_points = F.relu(bn(conv(new_points))) new_points = torch.max(new_points, 2)[0] new_xyz = new_xyz.permute(0, 2, 1) return PointNetSetAbstractionOut(new_xyz, new_points)
[docs] class PointNetFeaturePropagation(nn.Module): """Pointnet++ Feature Propagation Layer.""" def __init__( self, in_channel: int, mlp: list[int], norm_cls: str = "BatchNorm1d", ): """Creates a pointnet++ feature propagation layer. Args: in_channel: Number of input channels mlp: list with hidden dimensions of the MLP. norm_cls (Optional(str)): class for norm (nn.'norm_cls') or None """ super().__init__() self.mlp_convs = nn.ModuleList() self.mlp_bns = nn.ModuleList() # Create norms norm_fn: Callable[[int], nn.Module] = ( getattr(nn, norm_cls) if norm_cls is not None else None ) last_channel = in_channel for out_channel in mlp: self.mlp_convs.append(nn.Conv1d(last_channel, out_channel, 1)) if norm_cls is not None: self.mlp_bns.append(norm_fn(out_channel)) last_channel = out_channel
[docs] def __call__( self, xyz1: Tensor, xyz2: Tensor, points1: Tensor | None, points2: Tensor, ) -> Tensor: """Call function. Input: xyz1: input points position data, [B, C, N] xyz2: sampled input points position data, [B, C, S] points1: input points features, [B, D, N] points2: sampled points features, [B, D, S] Return: new_points: upsampled points data, [B, D', N] """ return self._call_impl(xyz1, xyz2, points1, points2)
[docs] def forward( self, xyz1: Tensor, xyz2: Tensor, points1: Tensor | None, points2: Tensor, ) -> Tensor: """Forward Implementation. Input: xyz1: input points position data, [B, C, N] xyz2: sampled input points position data, [B, C, S] points1: input points features, [B, D, N] points2: sampled points features, [B, D, S] Return: new_points: upsampled points data, [B, D', N] """ xyz1 = xyz1.permute(0, 2, 1) xyz2 = xyz2.permute(0, 2, 1) points2 = points2.permute(0, 2, 1) bs, n_pts, _ = xyz1.shape _, n_out_pts, _ = xyz2.shape if n_out_pts == 1: interpolated_points = points2.repeat(1, n_pts, 1) else: dists = square_distance(xyz1, xyz2) dists, idx = dists.sort(dim=-1) dists, idx = dists[:, :, :3], idx[:, :, :3] # [B, N, 3] dist_recip: Tensor = 1.0 / (dists + 1e-8) norm = torch.sum(dist_recip, dim=2, keepdim=True) weight = dist_recip / norm interpolated_points = torch.sum( index_points(points2, idx) * weight.view(bs, n_pts, 3, 1), dim=2, ) if points1 is not None: points1 = points1.permute(0, 2, 1) new_points = torch.cat([points1, interpolated_points], dim=-1) else: new_points = interpolated_points new_points = new_points.permute(0, 2, 1) for i, conv in enumerate(self.mlp_convs): bn = self.mlp_bns[i] if len(self.mlp_bns) != 0 else lambda x: x new_points = F.relu(bn(conv(new_points))) return new_points
[docs] class PointNet2SegmentationOut(NamedTuple): """Prediction for the pointnet++ semantic segmentation network.""" class_logits: Tensor
[docs] class PointNet2Segmentation(nn.Module): # TODO, probably move to module? """Pointnet++ Segmentation Network.""" def __init__(self, num_classes: int, in_channels: int = 3): """Creates a new Pointnet++ for segmentation. Args: num_classes: Number of semantic classes in_channels: Number of input channels """ super().__init__() self.set_abstractions = [ PointNetSetAbstraction( 1024, 0.1, 32, in_channels + 3, [32, 32, 64], False ), PointNetSetAbstraction(256, 0.2, 32, 64 + 3, [64, 64, 128], False), PointNetSetAbstraction( 64, 0.4, 32, 128 + 3, [128, 128, 256], False ), PointNetSetAbstraction( 16, 0.8, 32, 256 + 3, [256, 256, 512], False ), ] self.feature_propagations = [ PointNetFeaturePropagation(768, [256, 256]), PointNetFeaturePropagation(384, [256, 256]), PointNetFeaturePropagation(320, [256, 128]), PointNetFeaturePropagation(128 + 3, [128, 128, 128]), ] # Final convolutions self.conv1 = nn.Conv1d(128, 128, 1) self.bn1 = nn.BatchNorm1d(128) self.drop1 = nn.Dropout(0.5) self.conv2 = nn.Conv1d(128, num_classes, 1) self.in_channels = in_channels
[docs] def __call__(self, xyz: Tensor) -> PointNet2SegmentationOut: """Call implementation. Args: xyz: Pointcloud data shaped [N, n_feats, n_pts] Returns: PointNet2SegmentationOut, class logits for each point """ return self._call_impl(xyz)
[docs] def forward(self, xyz: Tensor) -> PointNet2SegmentationOut: """Predicts the semantic class logits for each point. Args: xyz: Pointcloud data shaped [N, n_feats, n_pts]$ Returns: PointNet2SegmentationOut, class logits for each point """ assert xyz.size(1) == self.in_channels l0_points = xyz l0_xyz = xyz[:, :3, :] set_abstraction_out = PointNetSetAbstractionOut( coordinates=l0_xyz, features=l0_points ) outputs: list[PointNetSetAbstractionOut] = [set_abstraction_out] for set_abs_layer in self.set_abstractions: set_abstraction_out = set_abs_layer( set_abstraction_out.coordinates, set_abstraction_out.features ) outputs.append(set_abstraction_out) pointwise_features = outputs[-1].features for idx, feature_prop_layer in enumerate(self.feature_propagations): layer_after_out = outputs[-idx - 1] # l4 layer_out = outputs[-idx - 2] # l3 out_features = ( layer_out.features if idx < len(outputs) - 1 else None ) pointwise_features = feature_prop_layer( layer_out.coordinates, layer_after_out.coordinates, out_features, pointwise_features, ) x = self.drop1(F.relu(self.bn1(self.conv1(pointwise_features)))) x = self.conv2(x) return PointNet2SegmentationOut(class_logits=x)