Source code for sc2bench.models.detection.transform

from typing import List, Tuple, Dict, Optional

from torch import Tensor
from torchvision.models.detection.image_list import ImageList
from torchvision.models.detection.transform import GeneralizedRCNNTransform
from torchvision.transforms.functional import to_pil_image, to_tensor, crop

from ...analysis import AnalyzableModule
from ...transforms.misc import AdaptivePad


[docs] class RCNNTransformWithCompression(GeneralizedRCNNTransform, AnalyzableModule): """ An R-CNN Transform with codec-based or model-based compression. :param transform: performs the data transformation from the inputs to feed into the model :type transform: nn.Module :param device: torch device :type device: torch.device or str :param codec_encoder_decoder: transform sequence configuration for codec :type codec_encoder_decoder: nn.Module :param analyzer_configs: list of analysis configurations :type analyzer_configs: list[dict] :param analyzes_after_compress: run analysis with `analyzer_configs` if True :type analyzes_after_compress: bool :param compression_model: compression model :type compression_model: nn.Module or None :param uses_cpu4compression_model: whether to use CPU instead of GPU for `compression_model` :type uses_cpu4compression_model: bool :param pre_transform: pre-transform :type pre_transform: nn.Module or None :param post_transform: post-transform :type post_transform: nn.Module or None :param adaptive_pad_kwargs: keyword arguments for AdaptivePad :type adaptive_pad_kwargs: dict or None """ # Referred to https://github.com/pytorch/vision/blob/main/torchvision/models/detection/transform.py def __init__(self, transform, device, codec_encoder_decoder, analyzer_configs, analyzes_after_compress=False, compression_model=None, uses_cpu4compression_model=False, pre_transform=None, post_transform=None, adaptive_pad_kwargs=None): GeneralizedRCNNTransform.__init__(self, transform.min_size, transform.max_size, transform.image_mean, transform.image_std) AnalyzableModule.__init__(self, analyzer_configs) self.device = device self.codec_encoder_decoder = codec_encoder_decoder self.analyzes_after_compress = analyzes_after_compress self.pre_transform = pre_transform self.post_transform = post_transform if uses_cpu4compression_model: compression_model = compression_model.cpu() self.compression_model = compression_model self.uses_cpu4compression_model = uses_cpu4compression_model self.adaptive_pad = AdaptivePad(**adaptive_pad_kwargs) if isinstance(adaptive_pad_kwargs, dict) else None
[docs] def compress_by_codec(self, org_img): """ Convert a tensor to an image and compress-decompress it by codec. :param org_img: image tensor :type org_img: torch.Tensor :return: compressed-and-decompressed image tensor :rtype: torch.Tensor """ pil_img = to_pil_image(org_img, mode='RGB') pil_img, file_size = self.codec_encoder_decoder(pil_img) if not self.training: self.analyze(file_size) return to_tensor(pil_img).to(org_img.device)
[docs] def compress_by_model(self, org_img): """ Convert a tensor to an image and compress-decompress it by model. :param org_img: image tensor :type org_img: torch.Tensor :return: compressed-and-decompressed image tensor :rtype: torch.Tensor """ org_img = org_img.unsqueeze(0) org_height, org_width = None, None if self.adaptive_pad is not None: org_height, org_width = org_img.shape[-2:] org_img = self.adaptive_pad(org_img) compressed_obj = self.compression_model.compress(org_img) if not self.training and self.analyzes_after_compress: compressed_data = compressed_obj if org_height is None or org_width is None \ else (compressed_obj, org_height, org_width) self.analyze(compressed_data) decompressed_obj = self.compression_model.decompress(**compressed_obj) decompressed_obj = decompressed_obj['x_hat'] if org_height is not None and org_width is not None: decompressed_obj = crop(decompressed_obj, 0, 0, org_height, org_width) return decompressed_obj.squeeze(0)
[docs] def compress(self, org_img): """ Apply `pre_transform` to an image tensor, compress and decompress it, and apply `post_transform` to the compressed-decompressed image tensor. :param org_img: image tensor :type org_img: torch.Tensor :return: compressed-and-decompressed image tensor :rtype: torch.Tensor """ if self.pre_transform is not None: org_img = self.pre_transform(org_img) org_device = org_img.device if self.uses_cpu4compression_model: org_img = org_img.cpu() org_img = self.compress_by_codec(org_img) if self.compression_model is None else self.compress_by_model(org_img) if self.uses_cpu4compression_model: org_img = org_img.to(org_device) if self.post_transform is not None: org_img = self.post_transform(org_img) return org_img
def forward( self, images: List[Tensor], targets: Optional[List[Dict[str, Tensor]]] = None ) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]]: images = [img for img in images] if targets is not None: # make a copy of targets to avoid modifying it in-place # once torchscript supports dict comprehension # this can be simplified as as follows # targets = [{k: v for k,v in t.items()} for t in targets] targets_copy: List[Dict[str, Tensor]] = [] for t in targets: data: Dict[str, Tensor] = {} for k, v in t.items(): data[k] = v targets_copy.append(data) targets = targets_copy for i in range(len(images)): image = images[i] target_index = targets[i] if targets is not None else None if image.dim() != 3: raise ValueError("images is expected to be a list of 3d tensors " "of shape [C, H, W], got {}".format(image.shape)) image, target_index = self.resize(image, target_index) shape_before_compression = image.shape image = self.compress(image) shape_after_compression = image.shape assert shape_after_compression == shape_before_compression, \ 'Compression should not change tensor shape {} -> {}'.format(shape_before_compression, shape_after_compression) image = self.normalize(image) images[i] = image if targets is not None and target_index is not None: targets[i] = target_index image_sizes = [img.shape[-2:] for img in images] images = self.batch_images(images) image_sizes_list: List[Tuple[int, int]] = [] for image_size in image_sizes: assert len(image_size) == 2 image_sizes_list.append((image_size[0], image_size[1])) image_list = ImageList(images, image_sizes_list) return image_list, targets