"""Semantic FPN Head for segmentation."""from__future__importannotationsfromtypingimportNamedTupleimporttorch.nn.functionalasFfromtorchimportTensor,nnfromvis4d.op.layer.conv2dimportConv2d
[docs]classSemanticFPNOut(NamedTuple):"""Output of the SemanticFPN prediction."""outputs:Tensor# logits for final prediction, (N, C, H, W)
[docs]classSemanticFPNHead(nn.Module):"""SemanticFPNHead used in Panoptic FPN."""def__init__(self,num_classes:int=53,in_channels:int=256,inner_channels:int=128,start_level:int=2,end_level:int=6,dropout_ratio:float=0.1,):"""Creates an instance of the class. Args: num_classes (int): Number of classes. Default: 53. in_channels (int): Number of channels in the input feature map. inner_channels (int): Number of channels in inner features. start_level (int): The start level of the input features used in SemanticFPN. end_level (int): The end level of the used features, the ``end_level``-th layer will not be used. dropout_ratio (float): The drop ratio of dropout layer. Default: 0.1. """super().__init__()self.num_classes=num_classes# Used feature layers are [start_level, end_level)self.start_level=start_levelself.end_level=end_levelself.num_stages=end_level-start_levelself.inner_channels=inner_channelsself.scale_heads=nn.ModuleList()foriinrange(start_level,end_level):head_length=max(1,i-start_level)scale_head:list[nn.Module]=[]forkinrange(head_length):scale_head.append(Conv2d(in_channelsifk==0elseinner_channels,inner_channels,3,padding=1,stride=1,bias=False,norm=nn.BatchNorm2d(inner_channels),activation=nn.ReLU(inplace=True),))ifi>start_level:scale_head.append(nn.Upsample(scale_factor=2,mode="bilinear",align_corners=False,))self.scale_heads.append(nn.Sequential(*scale_head))self.conv_seg=nn.Conv2d(inner_channels,num_classes,1)self.dropout_ratio=dropout_ratioifdropout_ratio>0:self.dropout=nn.Dropout2d(dropout_ratio)self.init_weights()
[docs]defforward(self,features:list[Tensor])->SemanticFPNOut:"""Transforms feature maps and returns segmentation prediction. Args: features (list[Tensor]): List of multi-level image features. Returns: SemanticFPNOut: Segmentation outputs. """assertself.num_stages<=len(features),"Number of subnets must be not more than length of features."output=self.scale_heads[0](features[self.start_level])foriinrange(1,self.num_stages):output=output+F.interpolate(self.scale_heads[i](features[self.start_level+i]),size=output.shape[2:],mode="bilinear",align_corners=False,)ifself.dropout_ratio>0:output=self.dropout(output)seg_preds=self.conv_seg(output)returnSemanticFPNOut(outputs=seg_preds)
[docs]def__call__(self,feats:list[Tensor])->SemanticFPNOut:"""Type definition for function call."""returnsuper()._call_impl(feats)