Add at new repo again
This commit is contained in:
@@ -0,0 +1,9 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
from .data.datasets import builtin # just to register data
|
||||
from .config import add_densepose_config, add_dataset_category_config
|
||||
from .densepose_head import ROI_DENSEPOSE_HEAD_REGISTRY
|
||||
from .evaluator import DensePoseCOCOEvaluator
|
||||
from .roi_head import DensePoseROIHeads
|
||||
from .data.structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
|
||||
from .modeling.test_time_augmentation import DensePoseGeneralizedRCNNWithTTA
|
||||
from .utils.transform import load_from_cfg
|
@@ -0,0 +1,68 @@
|
||||
# -*- coding = utf-8 -*-
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
from detectron2.config import CfgNode as CN
|
||||
|
||||
|
||||
def add_dataset_category_config(cfg: CN):
|
||||
"""
|
||||
Add config for additional category-related dataset options
|
||||
- category whitelisting
|
||||
- category mapping
|
||||
"""
|
||||
_C = cfg
|
||||
_C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True)
|
||||
_C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True)
|
||||
|
||||
|
||||
def add_densepose_config(cfg: CN):
|
||||
"""
|
||||
Add config for densepose head.
|
||||
"""
|
||||
_C = cfg
|
||||
|
||||
_C.MODEL.DENSEPOSE_ON = True
|
||||
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD = CN()
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
|
||||
# Number of parts used for point labels
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2 # 15 or 2
|
||||
# Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
|
||||
# Loss weights for annotation masks.(14 Parts)
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0
|
||||
# Loss weights for surface parts. (24 Parts)
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0
|
||||
# Loss weights for UV regression.
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01
|
||||
# For Decoder
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = ""
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4
|
||||
# For DeepLab head
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN()
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN"
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0
|
||||
# Confidences
|
||||
# Enable learning confidences (variances) along with the actual values
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False})
|
||||
# UV confidence lower bound
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01
|
||||
# Statistical model type for confidence learning, possible values:
|
||||
# - "iid_iso": statistically independent identically distributed residuals
|
||||
# with isotropic covariance
|
||||
# - "indep_aniso": statistically independent residuals with anisotropic
|
||||
# covariances
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso"
|
@@ -0,0 +1,9 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
from .build import build_detection_test_loader, build_detection_train_loader
|
||||
from .dataset_mapper import DatasetMapper
|
||||
|
||||
# ensure the builtin data are registered
|
||||
from . import datasets
|
||||
|
||||
__all__ = [k for k in globals().keys() if not k.startswith("_")]
|
@@ -0,0 +1,405 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
import itertools
|
||||
import logging
|
||||
import numpy as np
|
||||
import operator
|
||||
from typing import Any, Callable, Collection, Dict, Iterable, List, Optional
|
||||
import torch
|
||||
|
||||
from detectron2.config import CfgNode
|
||||
from detectron2.data import samplers
|
||||
from detectron2.data.build import (
|
||||
load_proposals_into_dataset,
|
||||
print_instances_class_histogram,
|
||||
trivial_batch_collator,
|
||||
worker_init_reset_seed,
|
||||
)
|
||||
from detectron2.data.catalog import DatasetCatalog, MetadataCatalog
|
||||
from detectron2.data.common import AspectRatioGroupedDataset, DatasetFromList, MapDataset
|
||||
from detectron2.utils.comm import get_world_size
|
||||
|
||||
from .dataset_mapper import DatasetMapper
|
||||
from .datasets.coco import DENSEPOSE_KEYS_WITHOUT_MASK as DENSEPOSE_COCO_KEYS_WITHOUT_MASK
|
||||
from .datasets.coco import DENSEPOSE_MASK_KEY as DENSEPOSE_COCO_MASK_KEY
|
||||
|
||||
__all__ = ["build_detection_train_loader", "build_detection_test_loader"]
|
||||
|
||||
|
||||
Instance = Dict[str, Any]
|
||||
InstancePredicate = Callable[[Instance], bool]
|
||||
|
||||
|
||||
def _compute_num_images_per_worker(cfg: CfgNode):
|
||||
num_workers = get_world_size()
|
||||
images_per_batch = cfg.SOLVER.IMS_PER_BATCH
|
||||
assert (
|
||||
images_per_batch % num_workers == 0
|
||||
), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
|
||||
images_per_batch, num_workers
|
||||
)
|
||||
assert (
|
||||
images_per_batch >= num_workers
|
||||
), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
|
||||
images_per_batch, num_workers
|
||||
)
|
||||
images_per_worker = images_per_batch // num_workers
|
||||
return images_per_worker
|
||||
|
||||
|
||||
def _map_category_id_to_contiguous_id(dataset_name: str, dataset_dicts: Iterable[Instance]):
|
||||
meta = MetadataCatalog.get(dataset_name)
|
||||
for dataset_dict in dataset_dicts:
|
||||
for ann in dataset_dict["annotations"]:
|
||||
ann["category_id"] = meta.thing_dataset_id_to_contiguous_id[ann["category_id"]]
|
||||
|
||||
|
||||
def _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names: Iterable[str]):
|
||||
# merge categories for all data
|
||||
merged_categories = {}
|
||||
for dataset_name in dataset_names:
|
||||
meta = MetadataCatalog.get(dataset_name)
|
||||
for cat_id, cat_name in meta.categories.items():
|
||||
if cat_id not in merged_categories:
|
||||
merged_categories[cat_id] = (cat_name, dataset_name)
|
||||
continue
|
||||
cat_name_other, dataset_name_other = merged_categories[cat_id]
|
||||
if cat_name_other != cat_name:
|
||||
raise ValueError(
|
||||
f"Incompatible categories for category ID {cat_id}: "
|
||||
f'dataset {dataset_name} value "{cat_name}", '
|
||||
f'dataset {dataset_name_other} value "{cat_name_other}"'
|
||||
)
|
||||
|
||||
merged_cat_id_to_cont_id = {}
|
||||
for i, cat_id in enumerate(sorted(merged_categories.keys())):
|
||||
merged_cat_id_to_cont_id[cat_id] = i
|
||||
|
||||
# add category maps to metadata
|
||||
for dataset_name in dataset_names:
|
||||
meta = MetadataCatalog.get(dataset_name)
|
||||
categories = meta.get("categories")
|
||||
meta.thing_classes = [categories[cat_id] for cat_id in sorted(categories.keys())]
|
||||
meta.thing_dataset_id_to_contiguous_id = {
|
||||
cat_id: merged_cat_id_to_cont_id[cat_id] for cat_id in sorted(categories.keys())
|
||||
}
|
||||
meta.thing_contiguous_id_to_dataset_id = {
|
||||
merged_cat_id_to_cont_id[cat_id]: cat_id for cat_id in sorted(categories.keys())
|
||||
}
|
||||
|
||||
|
||||
def _maybe_create_general_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
|
||||
def has_annotations(instance: Instance) -> bool:
|
||||
return "annotations" in instance
|
||||
|
||||
def has_only_crowd_anotations(instance: Instance) -> bool:
|
||||
for ann in instance["annotations"]:
|
||||
if ann.get("is_crowd", 0) == 0:
|
||||
return False
|
||||
return True
|
||||
|
||||
def general_keep_instance_predicate(instance: Instance) -> bool:
|
||||
return has_annotations(instance) and not has_only_crowd_anotations(instance)
|
||||
|
||||
if not cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS:
|
||||
return None
|
||||
return general_keep_instance_predicate
|
||||
|
||||
|
||||
def _maybe_create_keypoints_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
|
||||
|
||||
min_num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
|
||||
|
||||
def has_sufficient_num_keypoints(instance: Instance) -> bool:
|
||||
num_kpts = sum(
|
||||
(np.array(ann["keypoints"][2::3]) > 0).sum()
|
||||
for ann in instance["annotations"]
|
||||
if "keypoints" in ann
|
||||
)
|
||||
return num_kpts >= min_num_keypoints
|
||||
|
||||
if cfg.MODEL.KEYPOINT_ON and (min_num_keypoints > 0):
|
||||
return has_sufficient_num_keypoints
|
||||
return None
|
||||
|
||||
|
||||
def _maybe_create_mask_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
|
||||
if not cfg.MODEL.MASK_ON:
|
||||
return None
|
||||
|
||||
def has_mask_annotations(instance: Instance) -> bool:
|
||||
return any("segmentation" in ann for ann in instance["annotations"])
|
||||
|
||||
return has_mask_annotations
|
||||
|
||||
|
||||
def _maybe_create_densepose_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
|
||||
if not cfg.MODEL.DENSEPOSE_ON:
|
||||
return None
|
||||
|
||||
def has_densepose_annotations(instance: Instance) -> bool:
|
||||
for ann in instance["annotations"]:
|
||||
if all(key in ann for key in DENSEPOSE_COCO_KEYS_WITHOUT_MASK) and (
|
||||
(DENSEPOSE_COCO_MASK_KEY in ann) or ("segmentation" in ann)
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
return has_densepose_annotations
|
||||
|
||||
|
||||
def _maybe_create_specific_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
|
||||
specific_predicate_creators = [
|
||||
_maybe_create_keypoints_keep_instance_predicate,
|
||||
_maybe_create_mask_keep_instance_predicate,
|
||||
_maybe_create_densepose_keep_instance_predicate,
|
||||
]
|
||||
predicates = [creator(cfg) for creator in specific_predicate_creators]
|
||||
predicates = [p for p in predicates if p is not None]
|
||||
if not predicates:
|
||||
return None
|
||||
|
||||
def combined_predicate(instance: Instance) -> bool:
|
||||
return any(p(instance) for p in predicates)
|
||||
|
||||
return combined_predicate
|
||||
|
||||
|
||||
def _get_train_keep_instance_predicate(cfg: CfgNode):
|
||||
general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
|
||||
combined_specific_keep_predicate = _maybe_create_specific_keep_instance_predicate(cfg)
|
||||
|
||||
def combined_general_specific_keep_predicate(instance: Instance) -> bool:
|
||||
return general_keep_predicate(instance) and combined_specific_keep_predicate(instance)
|
||||
|
||||
if (general_keep_predicate is None) and (combined_specific_keep_predicate is None):
|
||||
return None
|
||||
if general_keep_predicate is None:
|
||||
return combined_specific_keep_predicate
|
||||
if combined_specific_keep_predicate is None:
|
||||
return general_keep_predicate
|
||||
return combined_general_specific_keep_predicate
|
||||
|
||||
|
||||
def _get_test_keep_instance_predicate(cfg: CfgNode):
|
||||
general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
|
||||
return general_keep_predicate
|
||||
|
||||
|
||||
def _maybe_filter_and_map_categories(
|
||||
dataset_name: str, dataset_dicts: List[Instance]
|
||||
) -> List[Instance]:
|
||||
meta = MetadataCatalog.get(dataset_name)
|
||||
whitelisted_categories = meta.get("whitelisted_categories")
|
||||
category_map = meta.get("category_map", {})
|
||||
if whitelisted_categories is None and not category_map:
|
||||
return dataset_dicts
|
||||
filtered_dataset_dicts = []
|
||||
for dataset_dict in dataset_dicts:
|
||||
anns = []
|
||||
for ann in dataset_dict["annotations"]:
|
||||
cat_id = ann["category_id"]
|
||||
if whitelisted_categories is not None and cat_id not in whitelisted_categories:
|
||||
continue
|
||||
ann["category_id"] = category_map.get(cat_id, cat_id)
|
||||
anns.append(ann)
|
||||
dataset_dict["annotations"] = anns
|
||||
filtered_dataset_dicts.append(dataset_dict)
|
||||
return filtered_dataset_dicts
|
||||
|
||||
|
||||
def _add_category_whitelists_to_metadata(cfg: CfgNode):
|
||||
for dataset_name, whitelisted_cat_ids in cfg.DATASETS.WHITELISTED_CATEGORIES.items():
|
||||
meta = MetadataCatalog.get(dataset_name)
|
||||
meta.whitelisted_categories = whitelisted_cat_ids
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info(
|
||||
"Whitelisted categories for dataset {}: {}".format(
|
||||
dataset_name, meta.whitelisted_categories
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _add_category_maps_to_metadata(cfg: CfgNode):
|
||||
for dataset_name, category_map in cfg.DATASETS.CATEGORY_MAPS.items():
|
||||
category_map = {
|
||||
int(cat_id_src): int(cat_id_dst) for cat_id_src, cat_id_dst in category_map.items()
|
||||
}
|
||||
meta = MetadataCatalog.get(dataset_name)
|
||||
meta.category_map = category_map
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("Category maps for dataset {}: {}".format(dataset_name, meta.category_map))
|
||||
|
||||
|
||||
def combine_detection_dataset_dicts(
|
||||
dataset_names: Collection[str],
|
||||
keep_instance_predicate: Optional[InstancePredicate] = None,
|
||||
proposal_files: Optional[Collection[str]] = None,
|
||||
) -> List[Instance]:
|
||||
"""
|
||||
Load and prepare dataset dicts for training / testing
|
||||
|
||||
Args:
|
||||
dataset_names (Collection[str]): a list of dataset names
|
||||
keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate
|
||||
applied to instance dicts which defines whether to keep the instance
|
||||
proposal_files (Collection[str]): if given, a list of object proposal files
|
||||
that match each dataset in `dataset_names`.
|
||||
"""
|
||||
assert len(dataset_names)
|
||||
if proposal_files is None:
|
||||
proposal_files = [None] * len(dataset_names)
|
||||
assert len(dataset_names) == len(proposal_files)
|
||||
# load annotations and dataset metadata
|
||||
dataset_map = {}
|
||||
for dataset_name in dataset_names:
|
||||
dataset_dicts = DatasetCatalog.get(dataset_name)
|
||||
dataset_map[dataset_name] = dataset_dicts
|
||||
# initialize category maps
|
||||
_add_category_id_to_contiguous_id_maps_to_metadata(dataset_names)
|
||||
# apply category maps
|
||||
all_datasets_dicts = []
|
||||
for dataset_name, proposal_file in zip(dataset_names, proposal_files):
|
||||
dataset_dicts = dataset_map[dataset_name]
|
||||
assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!"
|
||||
if proposal_file is not None:
|
||||
dataset_dicts = load_proposals_into_dataset(dataset_dicts, proposal_file)
|
||||
dataset_dicts = _maybe_filter_and_map_categories(dataset_name, dataset_dicts)
|
||||
_map_category_id_to_contiguous_id(dataset_name, dataset_dicts)
|
||||
print_instances_class_histogram(
|
||||
dataset_dicts, MetadataCatalog.get(dataset_name).thing_classes
|
||||
)
|
||||
all_datasets_dicts.append(dataset_dicts)
|
||||
|
||||
if keep_instance_predicate is not None:
|
||||
all_datasets_dicts_plain = [
|
||||
d
|
||||
for d in itertools.chain.from_iterable(all_datasets_dicts)
|
||||
if keep_instance_predicate(d)
|
||||
]
|
||||
else:
|
||||
all_datasets_dicts_plain = list(itertools.chain.from_iterable(all_datasets_dicts))
|
||||
return all_datasets_dicts_plain
|
||||
|
||||
|
||||
def build_detection_train_loader(cfg: CfgNode, mapper=None):
|
||||
"""
|
||||
A data loader is created in a way similar to that of Detectron2.
|
||||
The main differences are:
|
||||
- it allows to combine data with different but compatible object category sets
|
||||
|
||||
The data loader is created by the following steps:
|
||||
1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts.
|
||||
2. Start workers to work on the dicts. Each worker will:
|
||||
* Map each metadata dict into another format to be consumed by the model.
|
||||
* Batch them by simply putting dicts into a list.
|
||||
The batched ``list[mapped_dict]`` is what this dataloader will return.
|
||||
|
||||
Args:
|
||||
cfg (CfgNode): the config
|
||||
mapper (callable): a callable which takes a sample (dict) from dataset and
|
||||
returns the format to be consumed by the model.
|
||||
By default it will be `DatasetMapper(cfg, True)`.
|
||||
|
||||
Returns:
|
||||
an infinite iterator of training data
|
||||
"""
|
||||
images_per_worker = _compute_num_images_per_worker(cfg)
|
||||
|
||||
_add_category_whitelists_to_metadata(cfg)
|
||||
_add_category_maps_to_metadata(cfg)
|
||||
dataset_dicts = combine_detection_dataset_dicts(
|
||||
cfg.DATASETS.TRAIN,
|
||||
keep_instance_predicate=_get_train_keep_instance_predicate(cfg),
|
||||
proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
|
||||
)
|
||||
dataset = DatasetFromList(dataset_dicts, copy=False)
|
||||
|
||||
if mapper is None:
|
||||
mapper = DatasetMapper(cfg, True)
|
||||
dataset = MapDataset(dataset, mapper)
|
||||
|
||||
sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("Using training sampler {}".format(sampler_name))
|
||||
if sampler_name == "TrainingSampler":
|
||||
sampler = samplers.TrainingSampler(len(dataset))
|
||||
elif sampler_name == "RepeatFactorTrainingSampler":
|
||||
sampler = samplers.RepeatFactorTrainingSampler(
|
||||
dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD
|
||||
)
|
||||
else:
|
||||
raise ValueError("Unknown training sampler: {}".format(sampler_name))
|
||||
|
||||
if cfg.DATALOADER.ASPECT_RATIO_GROUPING:
|
||||
data_loader = torch.utils.data.DataLoader(
|
||||
dataset,
|
||||
sampler=sampler,
|
||||
num_workers=cfg.DATALOADER.NUM_WORKERS,
|
||||
batch_sampler=None,
|
||||
collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements
|
||||
worker_init_fn=worker_init_reset_seed,
|
||||
) # yield individual mapped dict
|
||||
data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker)
|
||||
else:
|
||||
batch_sampler = torch.utils.data.sampler.BatchSampler(
|
||||
sampler, images_per_worker, drop_last=True
|
||||
)
|
||||
# drop_last so the batch always have the same size
|
||||
data_loader = torch.utils.data.DataLoader(
|
||||
dataset,
|
||||
num_workers=cfg.DATALOADER.NUM_WORKERS,
|
||||
batch_sampler=batch_sampler,
|
||||
collate_fn=trivial_batch_collator,
|
||||
worker_init_fn=worker_init_reset_seed,
|
||||
)
|
||||
|
||||
return data_loader
|
||||
|
||||
|
||||
def build_detection_test_loader(cfg, dataset_name, mapper=None):
|
||||
"""
|
||||
Similar to `build_detection_train_loader`.
|
||||
But this function uses the given `dataset_name` argument (instead of the names in cfg),
|
||||
and uses batch size 1.
|
||||
|
||||
Args:
|
||||
cfg: a detectron2 CfgNode
|
||||
dataset_name (str): a name of the dataset that's available in the DatasetCatalog
|
||||
mapper (callable): a callable which takes a sample (dict) from dataset
|
||||
and returns the format to be consumed by the model.
|
||||
By default it will be `DatasetMapper(cfg, False)`.
|
||||
|
||||
Returns:
|
||||
DataLoader: a torch DataLoader, that loads the given detection
|
||||
dataset, with test-time transformation and batching.
|
||||
"""
|
||||
_add_category_whitelists_to_metadata(cfg)
|
||||
_add_category_maps_to_metadata(cfg)
|
||||
dataset_dicts = combine_detection_dataset_dicts(
|
||||
[dataset_name],
|
||||
keep_instance_predicate=_get_test_keep_instance_predicate(cfg),
|
||||
proposal_files=[
|
||||
cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]
|
||||
]
|
||||
if cfg.MODEL.LOAD_PROPOSALS
|
||||
else None,
|
||||
)
|
||||
|
||||
dataset = DatasetFromList(dataset_dicts)
|
||||
if mapper is None:
|
||||
mapper = DatasetMapper(cfg, False)
|
||||
dataset = MapDataset(dataset, mapper)
|
||||
|
||||
sampler = samplers.InferenceSampler(len(dataset))
|
||||
# Always use 1 image per worker during inference since this is the
|
||||
# standard when reporting inference time in papers.
|
||||
batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)
|
||||
|
||||
data_loader = torch.utils.data.DataLoader(
|
||||
dataset,
|
||||
num_workers=cfg.DATALOADER.NUM_WORKERS,
|
||||
batch_sampler=batch_sampler,
|
||||
collate_fn=trivial_batch_collator,
|
||||
)
|
||||
return data_loader
|
@@ -0,0 +1,118 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
import copy
|
||||
import torch
|
||||
from fvcore.common.file_io import PathManager
|
||||
|
||||
from detectron2.data import MetadataCatalog
|
||||
from detectron2.data import detection_utils as utils
|
||||
from detectron2.data import transforms as T
|
||||
|
||||
from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
|
||||
|
||||
|
||||
class DatasetMapper:
|
||||
"""
|
||||
A customized version of `detectron2.data.DatasetMapper`
|
||||
"""
|
||||
|
||||
def __init__(self, cfg, is_train=True):
|
||||
self.tfm_gens = utils.build_transform_gen(cfg, is_train)
|
||||
|
||||
# fmt: off
|
||||
self.img_format = cfg.INPUT.FORMAT
|
||||
self.mask_on = cfg.MODEL.MASK_ON
|
||||
self.keypoint_on = cfg.MODEL.KEYPOINT_ON
|
||||
self.densepose_on = cfg.MODEL.DENSEPOSE_ON
|
||||
assert not cfg.MODEL.LOAD_PROPOSALS, "not supported yet"
|
||||
# fmt: on
|
||||
if self.keypoint_on and is_train:
|
||||
# Flip only makes sense in training
|
||||
self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
|
||||
else:
|
||||
self.keypoint_hflip_indices = None
|
||||
|
||||
if self.densepose_on:
|
||||
densepose_transform_srcs = [
|
||||
MetadataCatalog.get(ds).densepose_transform_src
|
||||
for ds in cfg.DATASETS.TRAIN + cfg.DATASETS.TEST
|
||||
]
|
||||
assert len(densepose_transform_srcs) > 0
|
||||
# TODO: check that DensePose transformation data is the same for
|
||||
# all the data. Otherwise one would have to pass DB ID with
|
||||
# each entry to select proper transformation data. For now, since
|
||||
# all DensePose annotated data uses the same data semantics, we
|
||||
# omit this check.
|
||||
densepose_transform_data_fpath = PathManager.get_local_path(densepose_transform_srcs[0])
|
||||
self.densepose_transform_data = DensePoseTransformData.load(
|
||||
densepose_transform_data_fpath
|
||||
)
|
||||
|
||||
self.is_train = is_train
|
||||
|
||||
def __call__(self, dataset_dict):
|
||||
"""
|
||||
Args:
|
||||
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
|
||||
|
||||
Returns:
|
||||
dict: a format that builtin models in detectron2 accept
|
||||
"""
|
||||
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
|
||||
image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
|
||||
utils.check_image_size(dataset_dict, image)
|
||||
|
||||
image, transforms = T.apply_transform_gens(self.tfm_gens, image)
|
||||
image_shape = image.shape[:2] # h, w
|
||||
dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
|
||||
|
||||
if not self.is_train:
|
||||
dataset_dict.pop("annotations", None)
|
||||
return dataset_dict
|
||||
|
||||
for anno in dataset_dict["annotations"]:
|
||||
if not self.mask_on:
|
||||
anno.pop("segmentation", None)
|
||||
if not self.keypoint_on:
|
||||
anno.pop("keypoints", None)
|
||||
|
||||
# USER: Implement additional transformations if you have other types of data
|
||||
# USER: Don't call transpose_densepose if you don't need
|
||||
annos = [
|
||||
self._transform_densepose(
|
||||
utils.transform_instance_annotations(
|
||||
obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
|
||||
),
|
||||
transforms,
|
||||
)
|
||||
for obj in dataset_dict.pop("annotations")
|
||||
if obj.get("iscrowd", 0) == 0
|
||||
]
|
||||
instances = utils.annotations_to_instances(annos, image_shape)
|
||||
|
||||
if len(annos) and "densepose" in annos[0]:
|
||||
gt_densepose = [obj["densepose"] for obj in annos]
|
||||
instances.gt_densepose = DensePoseList(gt_densepose, instances.gt_boxes, image_shape)
|
||||
|
||||
dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()]
|
||||
return dataset_dict
|
||||
|
||||
def _transform_densepose(self, annotation, transforms):
|
||||
if not self.densepose_on:
|
||||
return annotation
|
||||
|
||||
# Handle densepose annotations
|
||||
is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
|
||||
if is_valid:
|
||||
densepose_data = DensePoseDataRelative(annotation, cleanup=True)
|
||||
densepose_data.apply_transform(transforms, self.densepose_transform_data)
|
||||
annotation["densepose"] = densepose_data
|
||||
else:
|
||||
# logger = logging.getLogger(__name__)
|
||||
# logger.debug("Could not load DensePose annotation: {}".format(reason_not_valid))
|
||||
DensePoseDataRelative.cleanup_annotation(annotation)
|
||||
# NOTE: annotations for certain instances may be unavailable.
|
||||
# 'None' is accepted by the DensePostList data structure.
|
||||
annotation["densepose"] = None
|
||||
return annotation
|
@@ -0,0 +1,5 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
from . import builtin # ensure the builtin data are registered
|
||||
|
||||
__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")]
|
@@ -0,0 +1,10 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
from .coco import BASE_DATASETS as BASE_COCO_DATASETS
|
||||
from .coco import DATASETS as COCO_DATASETS
|
||||
from .coco import register_datasets as register_coco_datasets
|
||||
|
||||
DEFAULT_DATASETS_ROOT = "data"
|
||||
|
||||
|
||||
register_coco_datasets(COCO_DATASETS, DEFAULT_DATASETS_ROOT)
|
||||
register_coco_datasets(BASE_COCO_DATASETS, DEFAULT_DATASETS_ROOT)
|
@@ -0,0 +1,314 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import contextlib
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Iterable, List, Optional
|
||||
from fvcore.common.file_io import PathManager
|
||||
from fvcore.common.timer import Timer
|
||||
|
||||
from detectron2.data import DatasetCatalog, MetadataCatalog
|
||||
from detectron2.structures import BoxMode
|
||||
|
||||
DENSEPOSE_MASK_KEY = "dp_masks"
|
||||
DENSEPOSE_KEYS_WITHOUT_MASK = ["dp_x", "dp_y", "dp_I", "dp_U", "dp_V"]
|
||||
DENSEPOSE_KEYS = DENSEPOSE_KEYS_WITHOUT_MASK + [DENSEPOSE_MASK_KEY]
|
||||
DENSEPOSE_METADATA_URL_PREFIX = "https://dl.fbaipublicfiles.com/densepose/data/"
|
||||
|
||||
|
||||
@dataclass
|
||||
class CocoDatasetInfo:
|
||||
name: str
|
||||
images_root: str
|
||||
annotations_fpath: str
|
||||
|
||||
|
||||
DATASETS = [
|
||||
CocoDatasetInfo(
|
||||
name="densepose_coco_2014_train",
|
||||
images_root="coco/train2014",
|
||||
annotations_fpath="coco/annotations/densepose_train2014.json",
|
||||
),
|
||||
CocoDatasetInfo(
|
||||
name="densepose_coco_2014_minival",
|
||||
images_root="coco/val2014",
|
||||
annotations_fpath="coco/annotations/densepose_minival2014.json",
|
||||
),
|
||||
CocoDatasetInfo(
|
||||
name="densepose_coco_2014_minival_100",
|
||||
images_root="coco/val2014",
|
||||
annotations_fpath="coco/annotations/densepose_minival2014_100.json",
|
||||
),
|
||||
CocoDatasetInfo(
|
||||
name="densepose_coco_2014_valminusminival",
|
||||
images_root="coco/val2014",
|
||||
annotations_fpath="coco/annotations/densepose_valminusminival2014.json",
|
||||
),
|
||||
CocoDatasetInfo(
|
||||
name="densepose_chimps",
|
||||
images_root="densepose_evolution/densepose_chimps",
|
||||
annotations_fpath="densepose_evolution/annotations/densepose_chimps_densepose.json",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
BASE_DATASETS = [
|
||||
CocoDatasetInfo(
|
||||
name="base_coco_2017_train",
|
||||
images_root="coco/train2017",
|
||||
annotations_fpath="coco/annotations/instances_train2017.json",
|
||||
),
|
||||
CocoDatasetInfo(
|
||||
name="base_coco_2017_val",
|
||||
images_root="coco/val2017",
|
||||
annotations_fpath="coco/annotations/instances_val2017.json",
|
||||
),
|
||||
CocoDatasetInfo(
|
||||
name="base_coco_2017_val_100",
|
||||
images_root="coco/val2017",
|
||||
annotations_fpath="coco/annotations/instances_val2017_100.json",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def _is_relative_local_path(path: os.PathLike):
|
||||
path_str = os.fsdecode(path)
|
||||
return ("://" not in path_str) and not os.path.isabs(path)
|
||||
|
||||
|
||||
def _maybe_prepend_base_path(base_path: Optional[os.PathLike], path: os.PathLike):
|
||||
"""
|
||||
Prepends the provided path with a base path prefix if:
|
||||
1) base path is not None;
|
||||
2) path is a local path
|
||||
"""
|
||||
if base_path is None:
|
||||
return path
|
||||
if _is_relative_local_path(path):
|
||||
return os.path.join(base_path, path)
|
||||
return path
|
||||
|
||||
|
||||
def get_metadata(base_path: Optional[os.PathLike]) -> Dict[str, Any]:
|
||||
"""
|
||||
Returns metadata associated with COCO DensePose data
|
||||
|
||||
Args:
|
||||
base_path: Optional[os.PathLike]
|
||||
Base path used to load metadata from
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]
|
||||
Metadata in the form of a dictionary
|
||||
"""
|
||||
meta = {
|
||||
"densepose_transform_src": _maybe_prepend_base_path(
|
||||
base_path, "UV_symmetry_transforms.mat"
|
||||
),
|
||||
"densepose_smpl_subdiv": _maybe_prepend_base_path(base_path, "SMPL_subdiv.mat"),
|
||||
"densepose_smpl_subdiv_transform": _maybe_prepend_base_path(
|
||||
base_path, "SMPL_SUBDIV_TRANSFORM.mat"
|
||||
),
|
||||
}
|
||||
return meta
|
||||
|
||||
|
||||
def _load_coco_annotations(json_file: str):
|
||||
"""
|
||||
Load COCO annotations from a JSON file
|
||||
|
||||
Args:
|
||||
json_file: str
|
||||
Path to the file to load annotations from
|
||||
Returns:
|
||||
Instance of `pycocotools.coco.COCO` that provides access to annotations
|
||||
data
|
||||
"""
|
||||
from pycocotools.coco import COCO
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
timer = Timer()
|
||||
with contextlib.redirect_stdout(io.StringIO()):
|
||||
coco_api = COCO(json_file)
|
||||
if timer.seconds() > 1:
|
||||
logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
|
||||
return coco_api
|
||||
|
||||
|
||||
def _add_categories_metadata(dataset_name: str, categories: Dict[str, Any]):
|
||||
meta = MetadataCatalog.get(dataset_name)
|
||||
meta.categories = {c["id"]: c["name"] for c in categories}
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("Dataset {} categories: {}".format(dataset_name, categories))
|
||||
|
||||
|
||||
def _verify_annotations_have_unique_ids(json_file: str, anns: List[List[Dict[str, Any]]]):
|
||||
if "minival" in json_file:
|
||||
# Skip validation on COCO2014 valminusminival and minival annotations
|
||||
# The ratio of buggy annotations there is tiny and does not affect accuracy
|
||||
# Therefore we explicitly white-list them
|
||||
return
|
||||
ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
|
||||
assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
|
||||
json_file
|
||||
)
|
||||
|
||||
|
||||
def _maybe_add_bbox(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
|
||||
if "bbox" not in ann_dict:
|
||||
return
|
||||
obj["bbox"] = ann_dict["bbox"]
|
||||
obj["bbox_mode"] = BoxMode.XYWH_ABS
|
||||
|
||||
|
||||
def _maybe_add_segm(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
|
||||
if "segmentation" not in ann_dict:
|
||||
return
|
||||
segm = ann_dict["segmentation"]
|
||||
if not isinstance(segm, dict):
|
||||
# filter out invalid polygons (< 3 points)
|
||||
segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
|
||||
if len(segm) == 0:
|
||||
return
|
||||
obj["segmentation"] = segm
|
||||
|
||||
|
||||
def _maybe_add_keypoints(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
|
||||
if "keypoints" not in ann_dict:
|
||||
return
|
||||
keypts = ann_dict["keypoints"] # list[int]
|
||||
for idx, v in enumerate(keypts):
|
||||
if idx % 3 != 2:
|
||||
# COCO's segmentation coordinates are floating points in [0, H or W],
|
||||
# but keypoint coordinates are integers in [0, H-1 or W-1]
|
||||
# Therefore we assume the coordinates are "pixel indices" and
|
||||
# add 0.5 to convert to floating point coordinates.
|
||||
keypts[idx] = v + 0.5
|
||||
obj["keypoints"] = keypts
|
||||
|
||||
|
||||
def _maybe_add_densepose(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
|
||||
for key in DENSEPOSE_KEYS:
|
||||
if key in ann_dict:
|
||||
obj[key] = ann_dict[key]
|
||||
|
||||
|
||||
def _combine_images_with_annotations(
|
||||
dataset_name: str,
|
||||
image_root: str,
|
||||
img_datas: Iterable[Dict[str, Any]],
|
||||
ann_datas: Iterable[Iterable[Dict[str, Any]]],
|
||||
):
|
||||
|
||||
ann_keys = ["iscrowd", "category_id"]
|
||||
dataset_dicts = []
|
||||
|
||||
for img_dict, ann_dicts in zip(img_datas, ann_datas):
|
||||
record = {}
|
||||
record["file_name"] = os.path.join(image_root, img_dict["file_name"])
|
||||
record["height"] = img_dict["height"]
|
||||
record["width"] = img_dict["width"]
|
||||
record["image_id"] = img_dict["id"]
|
||||
record["dataset"] = dataset_name
|
||||
objs = []
|
||||
for ann_dict in ann_dicts:
|
||||
assert ann_dict["image_id"] == record["image_id"]
|
||||
assert ann_dict.get("ignore", 0) == 0
|
||||
obj = {key: ann_dict[key] for key in ann_keys if key in ann_dict}
|
||||
_maybe_add_bbox(obj, ann_dict)
|
||||
_maybe_add_segm(obj, ann_dict)
|
||||
_maybe_add_keypoints(obj, ann_dict)
|
||||
_maybe_add_densepose(obj, ann_dict)
|
||||
objs.append(obj)
|
||||
record["annotations"] = objs
|
||||
dataset_dicts.append(record)
|
||||
return dataset_dicts
|
||||
|
||||
|
||||
def load_coco_json(annotations_json_file: str, image_root: str, dataset_name: str):
|
||||
"""
|
||||
Loads a JSON file with annotations in COCO instances format.
|
||||
Replaces `detectron2.data.data.coco.load_coco_json` to handle metadata
|
||||
in a more flexible way. Postpones category mapping to a later stage to be
|
||||
able to combine several data with different (but coherent) sets of
|
||||
categories.
|
||||
|
||||
Args:
|
||||
|
||||
annotations_json_file: str
|
||||
Path to the JSON file with annotations in COCO instances format.
|
||||
image_root: str
|
||||
directory that contains all the images
|
||||
dataset_name: str
|
||||
the name that identifies a dataset, e.g. "densepose_coco_2014_train"
|
||||
extra_annotation_keys: Optional[List[str]]
|
||||
If provided, these keys are used to extract additional data from
|
||||
the annotations.
|
||||
"""
|
||||
coco_api = _load_coco_annotations(PathManager.get_local_path(annotations_json_file))
|
||||
_add_categories_metadata(dataset_name, coco_api.loadCats(coco_api.getCatIds()))
|
||||
# sort indices for reproducible results
|
||||
img_ids = sorted(coco_api.imgs.keys())
|
||||
# imgs is a list of dicts, each looks something like:
|
||||
# {'license': 4,
|
||||
# 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
|
||||
# 'file_name': 'COCO_val2014_000000001268.jpg',
|
||||
# 'height': 427,
|
||||
# 'width': 640,
|
||||
# 'date_captured': '2013-11-17 05:57:24',
|
||||
# 'id': 1268}
|
||||
imgs = coco_api.loadImgs(img_ids)
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("Loaded {} images in COCO format from {}".format(len(imgs), annotations_json_file))
|
||||
# anns is a list[list[dict]], where each dict is an annotation
|
||||
# record for an object. The inner list enumerates the objects in an image
|
||||
# and the outer list enumerates over images.
|
||||
anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
|
||||
_verify_annotations_have_unique_ids(annotations_json_file, anns)
|
||||
dataset_records = _combine_images_with_annotations(dataset_name, image_root, imgs, anns)
|
||||
return dataset_records
|
||||
|
||||
|
||||
def register_dataset(dataset_data: CocoDatasetInfo, datasets_root: Optional[os.PathLike] = None):
|
||||
"""
|
||||
Registers provided COCO DensePose dataset
|
||||
|
||||
Args:
|
||||
dataset_data: CocoDatasetInfo
|
||||
Dataset data
|
||||
datasets_root: Optional[os.PathLike]
|
||||
Datasets root folder (default: None)
|
||||
"""
|
||||
annotations_fpath = _maybe_prepend_base_path(datasets_root, dataset_data.annotations_fpath)
|
||||
images_root = _maybe_prepend_base_path(datasets_root, dataset_data.images_root)
|
||||
|
||||
def load_annotations():
|
||||
return load_coco_json(
|
||||
annotations_json_file=annotations_fpath,
|
||||
image_root=images_root,
|
||||
dataset_name=dataset_data.name,
|
||||
)
|
||||
|
||||
DatasetCatalog.register(dataset_data.name, load_annotations)
|
||||
MetadataCatalog.get(dataset_data.name).set(
|
||||
json_file=annotations_fpath,
|
||||
image_root=images_root,
|
||||
**get_metadata(DENSEPOSE_METADATA_URL_PREFIX)
|
||||
)
|
||||
|
||||
|
||||
def register_datasets(
|
||||
datasets_data: Iterable[CocoDatasetInfo], datasets_root: Optional[os.PathLike] = None
|
||||
):
|
||||
"""
|
||||
Registers provided COCO DensePose data
|
||||
|
||||
Args:
|
||||
datasets_data: Iterable[CocoDatasetInfo]
|
||||
An iterable of dataset datas
|
||||
datasets_root: Optional[os.PathLike]
|
||||
Datasets root folder (default: None)
|
||||
"""
|
||||
for dataset_data in datasets_data:
|
||||
register_dataset(dataset_data, datasets_root)
|
@@ -0,0 +1,579 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import base64
|
||||
import numpy as np
|
||||
from io import BytesIO
|
||||
import torch
|
||||
from PIL import Image
|
||||
from torch.nn import functional as F
|
||||
|
||||
|
||||
class DensePoseTransformData(object):
|
||||
|
||||
# Horizontal symmetry label transforms used for horizontal flip
|
||||
MASK_LABEL_SYMMETRIES = [0, 1, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14]
|
||||
# fmt: off
|
||||
POINT_LABEL_SYMMETRIES = [ 0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24, 23] # noqa
|
||||
# fmt: on
|
||||
|
||||
def __init__(self, uv_symmetries):
|
||||
self.mask_label_symmetries = DensePoseTransformData.MASK_LABEL_SYMMETRIES
|
||||
self.point_label_symmetries = DensePoseTransformData.POINT_LABEL_SYMMETRIES
|
||||
self.uv_symmetries = uv_symmetries
|
||||
|
||||
@staticmethod
|
||||
def load(fpath):
|
||||
import scipy.io
|
||||
|
||||
uv_symmetry_map = scipy.io.loadmat(fpath)
|
||||
uv_symmetry_map_torch = {}
|
||||
for key in ["U_transforms", "V_transforms"]:
|
||||
uv_symmetry_map_torch[key] = []
|
||||
map_src = uv_symmetry_map[key]
|
||||
map_dst = uv_symmetry_map_torch[key]
|
||||
for i in range(map_src.shape[1]):
|
||||
map_dst.append(torch.from_numpy(map_src[0, i]).to(dtype=torch.float))
|
||||
uv_symmetry_map_torch[key] = torch.stack(map_dst, dim=0).to(
|
||||
device=torch.cuda.current_device()
|
||||
)
|
||||
transform_data = DensePoseTransformData(uv_symmetry_map_torch)
|
||||
return transform_data
|
||||
|
||||
|
||||
class DensePoseDataRelative(object):
|
||||
"""
|
||||
Dense pose relative annotations that can be applied to any bounding box:
|
||||
x - normalized X coordinates [0, 255] of annotated points
|
||||
y - normalized Y coordinates [0, 255] of annotated points
|
||||
i - body part labels 0,...,24 for annotated points
|
||||
u - body part U coordinates [0, 1] for annotated points
|
||||
v - body part V coordinates [0, 1] for annotated points
|
||||
segm - 256x256 segmentation mask with values 0,...,14
|
||||
To obtain absolute x and y data wrt some bounding box one needs to first
|
||||
divide the data by 256, multiply by the respective bounding box size
|
||||
and add bounding box offset:
|
||||
x_img = x0 + x_norm * w / 256.0
|
||||
y_img = y0 + y_norm * h / 256.0
|
||||
Segmentation masks are typically sampled to get image-based masks.
|
||||
"""
|
||||
|
||||
# Key for normalized X coordinates in annotation dict
|
||||
X_KEY = "dp_x"
|
||||
# Key for normalized Y coordinates in annotation dict
|
||||
Y_KEY = "dp_y"
|
||||
# Key for U part coordinates in annotation dict
|
||||
U_KEY = "dp_U"
|
||||
# Key for V part coordinates in annotation dict
|
||||
V_KEY = "dp_V"
|
||||
# Key for I point labels in annotation dict
|
||||
I_KEY = "dp_I"
|
||||
# Key for segmentation mask in annotation dict
|
||||
S_KEY = "dp_masks"
|
||||
# Number of body parts in segmentation masks
|
||||
N_BODY_PARTS = 14
|
||||
# Number of parts in point labels
|
||||
N_PART_LABELS = 24
|
||||
MASK_SIZE = 256
|
||||
|
||||
def __init__(self, annotation, cleanup=False):
|
||||
is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
|
||||
assert is_valid, "Invalid DensePose annotations: {}".format(reason_not_valid)
|
||||
self.x = torch.as_tensor(annotation[DensePoseDataRelative.X_KEY])
|
||||
self.y = torch.as_tensor(annotation[DensePoseDataRelative.Y_KEY])
|
||||
self.i = torch.as_tensor(annotation[DensePoseDataRelative.I_KEY])
|
||||
self.u = torch.as_tensor(annotation[DensePoseDataRelative.U_KEY])
|
||||
self.v = torch.as_tensor(annotation[DensePoseDataRelative.V_KEY])
|
||||
self.segm = DensePoseDataRelative.extract_segmentation_mask(annotation)
|
||||
self.device = torch.device("cpu")
|
||||
if cleanup:
|
||||
DensePoseDataRelative.cleanup_annotation(annotation)
|
||||
|
||||
def to(self, device):
|
||||
if self.device == device:
|
||||
return self
|
||||
new_data = DensePoseDataRelative.__new__(DensePoseDataRelative)
|
||||
new_data.x = self.x
|
||||
new_data.x = self.x.to(device)
|
||||
new_data.y = self.y.to(device)
|
||||
new_data.i = self.i.to(device)
|
||||
new_data.u = self.u.to(device)
|
||||
new_data.v = self.v.to(device)
|
||||
new_data.segm = self.segm.to(device)
|
||||
new_data.device = device
|
||||
return new_data
|
||||
|
||||
@staticmethod
|
||||
def extract_segmentation_mask(annotation):
|
||||
import pycocotools.mask as mask_utils
|
||||
|
||||
poly_specs = annotation[DensePoseDataRelative.S_KEY]
|
||||
segm = torch.zeros((DensePoseDataRelative.MASK_SIZE,) * 2, dtype=torch.float32)
|
||||
for i in range(DensePoseDataRelative.N_BODY_PARTS):
|
||||
poly_i = poly_specs[i]
|
||||
if poly_i:
|
||||
mask_i = mask_utils.decode(poly_i)
|
||||
segm[mask_i > 0] = i + 1
|
||||
return segm
|
||||
|
||||
@staticmethod
|
||||
def validate_annotation(annotation):
|
||||
for key in [
|
||||
DensePoseDataRelative.X_KEY,
|
||||
DensePoseDataRelative.Y_KEY,
|
||||
DensePoseDataRelative.I_KEY,
|
||||
DensePoseDataRelative.U_KEY,
|
||||
DensePoseDataRelative.V_KEY,
|
||||
DensePoseDataRelative.S_KEY,
|
||||
]:
|
||||
if key not in annotation:
|
||||
return False, "no {key} data in the annotation".format(key=key)
|
||||
return True, None
|
||||
|
||||
@staticmethod
|
||||
def cleanup_annotation(annotation):
|
||||
for key in [
|
||||
DensePoseDataRelative.X_KEY,
|
||||
DensePoseDataRelative.Y_KEY,
|
||||
DensePoseDataRelative.I_KEY,
|
||||
DensePoseDataRelative.U_KEY,
|
||||
DensePoseDataRelative.V_KEY,
|
||||
DensePoseDataRelative.S_KEY,
|
||||
]:
|
||||
if key in annotation:
|
||||
del annotation[key]
|
||||
|
||||
def apply_transform(self, transforms, densepose_transform_data):
|
||||
self._transform_pts(transforms, densepose_transform_data)
|
||||
self._transform_segm(transforms, densepose_transform_data)
|
||||
|
||||
def _transform_pts(self, transforms, dp_transform_data):
|
||||
import detectron2.data.transforms as T
|
||||
|
||||
# NOTE: This assumes that HorizFlipTransform is the only one that does flip
|
||||
do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
|
||||
if do_hflip:
|
||||
self.x = self.segm.size(1) - self.x
|
||||
self._flip_iuv_semantics(dp_transform_data)
|
||||
|
||||
def _flip_iuv_semantics(self, dp_transform_data: DensePoseTransformData) -> None:
|
||||
i_old = self.i.clone()
|
||||
uv_symmetries = dp_transform_data.uv_symmetries
|
||||
pt_label_symmetries = dp_transform_data.point_label_symmetries
|
||||
for i in range(self.N_PART_LABELS):
|
||||
if i + 1 in i_old:
|
||||
annot_indices_i = i_old == i + 1
|
||||
if pt_label_symmetries[i + 1] != i + 1:
|
||||
self.i[annot_indices_i] = pt_label_symmetries[i + 1]
|
||||
u_loc = (self.u[annot_indices_i] * 255).long()
|
||||
v_loc = (self.v[annot_indices_i] * 255).long()
|
||||
self.u[annot_indices_i] = uv_symmetries["U_transforms"][i][v_loc, u_loc].to(
|
||||
device=self.u.device
|
||||
)
|
||||
self.v[annot_indices_i] = uv_symmetries["V_transforms"][i][v_loc, u_loc].to(
|
||||
device=self.v.device
|
||||
)
|
||||
|
||||
def _transform_segm(self, transforms, dp_transform_data):
|
||||
import detectron2.data.transforms as T
|
||||
|
||||
# NOTE: This assumes that HorizFlipTransform is the only one that does flip
|
||||
do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
|
||||
if do_hflip:
|
||||
self.segm = torch.flip(self.segm, [1])
|
||||
self._flip_segm_semantics(dp_transform_data)
|
||||
|
||||
def _flip_segm_semantics(self, dp_transform_data):
|
||||
old_segm = self.segm.clone()
|
||||
mask_label_symmetries = dp_transform_data.mask_label_symmetries
|
||||
for i in range(self.N_BODY_PARTS):
|
||||
if mask_label_symmetries[i + 1] != i + 1:
|
||||
self.segm[old_segm == i + 1] = mask_label_symmetries[i + 1]
|
||||
|
||||
|
||||
def normalized_coords_transform(x0, y0, w, h):
|
||||
"""
|
||||
Coordinates transform that maps top left corner to (-1, -1) and bottom
|
||||
right corner to (1, 1). Used for torch.grid_sample to initialize the
|
||||
grid
|
||||
"""
|
||||
|
||||
def f(p):
|
||||
return (2 * (p[0] - x0) / w - 1, 2 * (p[1] - y0) / h - 1)
|
||||
|
||||
return f
|
||||
|
||||
|
||||
class DensePoseOutput(object):
|
||||
def __init__(self, S, I, U, V, confidences):
|
||||
"""
|
||||
Args:
|
||||
S (`torch.Tensor`): coarse segmentation tensor of size (N, A, H, W)
|
||||
I (`torch.Tensor`): fine segmentation tensor of size (N, C, H, W)
|
||||
U (`torch.Tensor`): U coordinates for each fine segmentation label of size (N, C, H, W)
|
||||
V (`torch.Tensor`): V coordinates for each fine segmentation label of size (N, C, H, W)
|
||||
confidences (dict of str -> `torch.Tensor`) estimated confidence model parameters
|
||||
"""
|
||||
self.S = S
|
||||
self.I = I # noqa: E741
|
||||
self.U = U
|
||||
self.V = V
|
||||
self.confidences = confidences
|
||||
self._check_output_dims(S, I, U, V)
|
||||
|
||||
def _check_output_dims(self, S, I, U, V):
|
||||
assert (
|
||||
len(S.size()) == 4
|
||||
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
|
||||
S.size()
|
||||
)
|
||||
assert (
|
||||
len(I.size()) == 4
|
||||
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
|
||||
S.size()
|
||||
)
|
||||
assert (
|
||||
len(U.size()) == 4
|
||||
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
|
||||
S.size()
|
||||
)
|
||||
assert (
|
||||
len(V.size()) == 4
|
||||
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
|
||||
S.size()
|
||||
)
|
||||
assert len(S) == len(I), (
|
||||
"Number of output segmentation planes {} "
|
||||
"should be equal to the number of output part index "
|
||||
"planes {}".format(len(S), len(I))
|
||||
)
|
||||
assert S.size()[2:] == I.size()[2:], (
|
||||
"Output segmentation plane size {} "
|
||||
"should be equal to the output part index "
|
||||
"plane size {}".format(S.size()[2:], I.size()[2:])
|
||||
)
|
||||
assert I.size() == U.size(), (
|
||||
"Part index output shape {} "
|
||||
"should be the same as U coordinates output shape {}".format(I.size(), U.size())
|
||||
)
|
||||
assert I.size() == V.size(), (
|
||||
"Part index output shape {} "
|
||||
"should be the same as V coordinates output shape {}".format(I.size(), V.size())
|
||||
)
|
||||
|
||||
def resize(self, image_size_hw):
|
||||
# do nothing - outputs are invariant to resize
|
||||
pass
|
||||
|
||||
def _crop(self, S, I, U, V, bbox_old_xywh, bbox_new_xywh):
|
||||
"""
|
||||
Resample S, I, U, V from bbox_old to the cropped bbox_new
|
||||
"""
|
||||
x0old, y0old, wold, hold = bbox_old_xywh
|
||||
x0new, y0new, wnew, hnew = bbox_new_xywh
|
||||
tr_coords = normalized_coords_transform(x0old, y0old, wold, hold)
|
||||
topleft = (x0new, y0new)
|
||||
bottomright = (x0new + wnew, y0new + hnew)
|
||||
topleft_norm = tr_coords(topleft)
|
||||
bottomright_norm = tr_coords(bottomright)
|
||||
hsize = S.size(1)
|
||||
wsize = S.size(2)
|
||||
grid = torch.meshgrid(
|
||||
torch.arange(
|
||||
topleft_norm[1],
|
||||
bottomright_norm[1],
|
||||
(bottomright_norm[1] - topleft_norm[1]) / hsize,
|
||||
)[:hsize],
|
||||
torch.arange(
|
||||
topleft_norm[0],
|
||||
bottomright_norm[0],
|
||||
(bottomright_norm[0] - topleft_norm[0]) / wsize,
|
||||
)[:wsize],
|
||||
)
|
||||
grid = torch.stack(grid, dim=2).to(S.device)
|
||||
assert (
|
||||
grid.size(0) == hsize
|
||||
), "Resampled grid expected " "height={}, actual height={}".format(hsize, grid.size(0))
|
||||
assert grid.size(1) == wsize, "Resampled grid expected " "width={}, actual width={}".format(
|
||||
wsize, grid.size(1)
|
||||
)
|
||||
S_new = F.grid_sample(
|
||||
S.unsqueeze(0),
|
||||
torch.unsqueeze(grid, 0),
|
||||
mode="bilinear",
|
||||
padding_mode="border",
|
||||
align_corners=True,
|
||||
).squeeze(0)
|
||||
I_new = F.grid_sample(
|
||||
I.unsqueeze(0),
|
||||
torch.unsqueeze(grid, 0),
|
||||
mode="bilinear",
|
||||
padding_mode="border",
|
||||
align_corners=True,
|
||||
).squeeze(0)
|
||||
U_new = F.grid_sample(
|
||||
U.unsqueeze(0),
|
||||
torch.unsqueeze(grid, 0),
|
||||
mode="bilinear",
|
||||
padding_mode="border",
|
||||
align_corners=True,
|
||||
).squeeze(0)
|
||||
V_new = F.grid_sample(
|
||||
V.unsqueeze(0),
|
||||
torch.unsqueeze(grid, 0),
|
||||
mode="bilinear",
|
||||
padding_mode="border",
|
||||
align_corners=True,
|
||||
).squeeze(0)
|
||||
return S_new, I_new, U_new, V_new
|
||||
|
||||
def crop(self, indices_cropped, bboxes_old, bboxes_new):
|
||||
"""
|
||||
Crop outputs for selected bounding boxes to the new bounding boxes.
|
||||
"""
|
||||
# VK: cropping is ignored for now
|
||||
# for i, ic in enumerate(indices_cropped):
|
||||
# self.S[ic], self.I[ic], self.U[ic], self.V[ic] = \
|
||||
# self._crop(self.S[ic], self.I[ic], self.U[ic], self.V[ic],
|
||||
# bboxes_old[i], bboxes_new[i])
|
||||
pass
|
||||
|
||||
def hflip(self, transform_data: DensePoseTransformData) -> None:
|
||||
"""
|
||||
Change S, I, U and V to take into account a Horizontal flip.
|
||||
"""
|
||||
if self.I.shape[0] > 0:
|
||||
for el in "SIUV":
|
||||
self.__dict__[el] = torch.flip(self.__dict__[el], [3])
|
||||
self._flip_iuv_semantics_tensor(transform_data)
|
||||
self._flip_segm_semantics_tensor(transform_data)
|
||||
|
||||
def _flip_iuv_semantics_tensor(self, dp_transform_data: DensePoseTransformData) -> None:
|
||||
point_label_symmetries = dp_transform_data.point_label_symmetries
|
||||
uv_symmetries = dp_transform_data.uv_symmetries
|
||||
|
||||
N, C, H, W = self.U.shape
|
||||
u_loc = (self.U[:, 1:, :, :].clamp(0, 1) * 255).long()
|
||||
v_loc = (self.V[:, 1:, :, :].clamp(0, 1) * 255).long()
|
||||
Iindex = torch.arange(C - 1, device=self.U.device)[None, :, None, None].expand(
|
||||
N, C - 1, H, W
|
||||
)
|
||||
self.U[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc].to(
|
||||
device=self.U.device
|
||||
)
|
||||
self.V[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc].to(
|
||||
device=self.V.device
|
||||
)
|
||||
|
||||
for el in "IUV":
|
||||
self.__dict__[el] = self.__dict__[el][:, point_label_symmetries, :, :]
|
||||
|
||||
def _flip_segm_semantics_tensor(self, dp_transform_data):
|
||||
if self.S.shape[1] == DensePoseDataRelative.N_BODY_PARTS + 1:
|
||||
self.S = self.S[:, dp_transform_data.mask_label_symmetries, :, :]
|
||||
|
||||
def to_result(self, boxes_xywh):
|
||||
"""
|
||||
Convert DensePose outputs to results format. Results are more compact,
|
||||
but cannot be resampled any more
|
||||
"""
|
||||
result = DensePoseResult(boxes_xywh, self.S, self.I, self.U, self.V)
|
||||
return result
|
||||
|
||||
def __getitem__(self, item):
|
||||
if isinstance(item, int):
|
||||
S_selected = self.S[item].unsqueeze(0)
|
||||
I_selected = self.I[item].unsqueeze(0)
|
||||
U_selected = self.U[item].unsqueeze(0)
|
||||
V_selected = self.V[item].unsqueeze(0)
|
||||
conf_selected = {}
|
||||
for key in self.confidences:
|
||||
conf_selected[key] = self.confidences[key][item].unsqueeze(0)
|
||||
else:
|
||||
S_selected = self.S[item]
|
||||
I_selected = self.I[item]
|
||||
U_selected = self.U[item]
|
||||
V_selected = self.V[item]
|
||||
conf_selected = {}
|
||||
for key in self.confidences:
|
||||
conf_selected[key] = self.confidences[key][item]
|
||||
return DensePoseOutput(S_selected, I_selected, U_selected, V_selected, conf_selected)
|
||||
|
||||
def __str__(self):
|
||||
s = "DensePoseOutput S {}, I {}, U {}, V {}".format(
|
||||
list(self.S.size()), list(self.I.size()), list(self.U.size()), list(self.V.size())
|
||||
)
|
||||
s_conf = "confidences: [{}]".format(
|
||||
", ".join([f"{key} {list(self.confidences[key].size())}" for key in self.confidences])
|
||||
)
|
||||
return ", ".join([s, s_conf])
|
||||
|
||||
def __len__(self):
|
||||
return self.S.size(0)
|
||||
|
||||
|
||||
class DensePoseResult(object):
|
||||
def __init__(self, boxes_xywh, S, I, U, V):
|
||||
self.results = []
|
||||
self.boxes_xywh = boxes_xywh.cpu().tolist()
|
||||
assert len(boxes_xywh.size()) == 2
|
||||
assert boxes_xywh.size(1) == 4
|
||||
for i, box_xywh in enumerate(boxes_xywh):
|
||||
result_i = self._output_to_result(box_xywh, S[[i]], I[[i]], U[[i]], V[[i]])
|
||||
result_numpy_i = result_i.cpu().numpy()
|
||||
result_encoded_i = DensePoseResult.encode_png_data(result_numpy_i)
|
||||
result_encoded_with_shape_i = (result_numpy_i.shape, result_encoded_i)
|
||||
self.results.append(result_encoded_with_shape_i)
|
||||
|
||||
def __str__(self):
|
||||
s = "DensePoseResult: N={} [{}]".format(
|
||||
len(self.results), ", ".join([str(list(r[0])) for r in self.results])
|
||||
)
|
||||
return s
|
||||
|
||||
def _output_to_result(self, box_xywh, S, I, U, V):
|
||||
x, y, w, h = box_xywh
|
||||
w = max(int(w), 1)
|
||||
h = max(int(h), 1)
|
||||
result = torch.zeros([3, h, w], dtype=torch.uint8, device=U.device)
|
||||
assert (
|
||||
len(S.size()) == 4
|
||||
), "AnnIndex tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
|
||||
s_bbox = F.interpolate(S, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
|
||||
assert (
|
||||
len(I.size()) == 4
|
||||
), "IndexUV tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
|
||||
i_bbox = (
|
||||
F.interpolate(I, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
|
||||
* (s_bbox > 0).long()
|
||||
).squeeze(0)
|
||||
assert len(U.size()) == 4, "U tensor size should have {} " "dimensions but has {}".format(
|
||||
4, len(U.size())
|
||||
)
|
||||
u_bbox = F.interpolate(U, (h, w), mode="bilinear", align_corners=False)
|
||||
assert len(V.size()) == 4, "V tensor size should have {} " "dimensions but has {}".format(
|
||||
4, len(V.size())
|
||||
)
|
||||
v_bbox = F.interpolate(V, (h, w), mode="bilinear", align_corners=False)
|
||||
result[0] = i_bbox
|
||||
for part_id in range(1, u_bbox.size(1)):
|
||||
result[1][i_bbox == part_id] = (
|
||||
(u_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
|
||||
)
|
||||
result[2][i_bbox == part_id] = (
|
||||
(v_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
|
||||
)
|
||||
assert (
|
||||
result.size(1) == h
|
||||
), "Results height {} should be equal" "to bounding box height {}".format(result.size(1), h)
|
||||
assert (
|
||||
result.size(2) == w
|
||||
), "Results width {} should be equal" "to bounding box width {}".format(result.size(2), w)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def encode_png_data(arr):
|
||||
"""
|
||||
Encode array data as a PNG image using the highest compression rate
|
||||
@param arr [in] Data stored in an array of size (3, M, N) of type uint8
|
||||
@return Base64-encoded string containing PNG-compressed data
|
||||
"""
|
||||
assert len(arr.shape) == 3, "Expected a 3D array as an input," " got a {0}D array".format(
|
||||
len(arr.shape)
|
||||
)
|
||||
assert arr.shape[0] == 3, "Expected first array dimension of size 3," " got {0}".format(
|
||||
arr.shape[0]
|
||||
)
|
||||
assert arr.dtype == np.uint8, "Expected an array of type np.uint8, " " got {0}".format(
|
||||
arr.dtype
|
||||
)
|
||||
data = np.moveaxis(arr, 0, -1)
|
||||
im = Image.fromarray(data)
|
||||
fstream = BytesIO()
|
||||
im.save(fstream, format="png", optimize=True)
|
||||
s = base64.encodebytes(fstream.getvalue()).decode()
|
||||
return s
|
||||
|
||||
@staticmethod
|
||||
def decode_png_data(shape, s):
|
||||
"""
|
||||
Decode array data from a string that contains PNG-compressed data
|
||||
@param Base64-encoded string containing PNG-compressed data
|
||||
@return Data stored in an array of size (3, M, N) of type uint8
|
||||
"""
|
||||
fstream = BytesIO(base64.decodebytes(s.encode()))
|
||||
im = Image.open(fstream)
|
||||
data = np.moveaxis(np.array(im.getdata(), dtype=np.uint8), -1, 0)
|
||||
return data.reshape(shape)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.results)
|
||||
|
||||
def __getitem__(self, item):
|
||||
result_encoded = self.results[item]
|
||||
bbox_xywh = self.boxes_xywh[item]
|
||||
return result_encoded, bbox_xywh
|
||||
|
||||
|
||||
class DensePoseList(object):
|
||||
|
||||
_TORCH_DEVICE_CPU = torch.device("cpu")
|
||||
|
||||
def __init__(self, densepose_datas, boxes_xyxy_abs, image_size_hw, device=_TORCH_DEVICE_CPU):
|
||||
assert len(densepose_datas) == len(
|
||||
boxes_xyxy_abs
|
||||
), "Attempt to initialize DensePoseList with {} DensePose datas " "and {} boxes".format(
|
||||
len(densepose_datas), len(boxes_xyxy_abs)
|
||||
)
|
||||
self.densepose_datas = []
|
||||
for densepose_data in densepose_datas:
|
||||
assert isinstance(densepose_data, DensePoseDataRelative) or densepose_data is None, (
|
||||
"Attempt to initialize DensePoseList with DensePose datas "
|
||||
"of type {}, expected DensePoseDataRelative".format(type(densepose_data))
|
||||
)
|
||||
densepose_data_ondevice = (
|
||||
densepose_data.to(device) if densepose_data is not None else None
|
||||
)
|
||||
self.densepose_datas.append(densepose_data_ondevice)
|
||||
self.boxes_xyxy_abs = boxes_xyxy_abs.to(device)
|
||||
self.image_size_hw = image_size_hw
|
||||
self.device = device
|
||||
|
||||
def to(self, device):
|
||||
if self.device == device:
|
||||
return self
|
||||
return DensePoseList(self.densepose_datas, self.boxes_xyxy_abs, self.image_size_hw, device)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.densepose_datas)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.densepose_datas)
|
||||
|
||||
def __repr__(self):
|
||||
s = self.__class__.__name__ + "("
|
||||
s += "num_instances={}, ".format(len(self.densepose_datas))
|
||||
s += "image_width={}, ".format(self.image_size_hw[1])
|
||||
s += "image_height={})".format(self.image_size_hw[0])
|
||||
return s
|
||||
|
||||
def __getitem__(self, item):
|
||||
if isinstance(item, int):
|
||||
densepose_data_rel = self.densepose_datas[item]
|
||||
return densepose_data_rel
|
||||
elif isinstance(item, slice):
|
||||
densepose_datas_rel = self.densepose_datas[item]
|
||||
boxes_xyxy_abs = self.boxes_xyxy_abs[item]
|
||||
return DensePoseList(
|
||||
densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
|
||||
)
|
||||
elif isinstance(item, torch.Tensor) and (item.dtype == torch.bool):
|
||||
densepose_datas_rel = [self.densepose_datas[i] for i, x in enumerate(item) if x > 0]
|
||||
boxes_xyxy_abs = self.boxes_xyxy_abs[item]
|
||||
return DensePoseList(
|
||||
densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
|
||||
)
|
||||
else:
|
||||
densepose_datas_rel = [self.densepose_datas[i] for i in item]
|
||||
boxes_xyxy_abs = self.boxes_xyxy_abs[item]
|
||||
return DensePoseList(
|
||||
densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
|
||||
)
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,158 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
import contextlib
|
||||
import copy
|
||||
import io
|
||||
import itertools
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
import torch
|
||||
from fvcore.common.file_io import PathManager
|
||||
from pycocotools.coco import COCO
|
||||
|
||||
from detectron2.data import MetadataCatalog
|
||||
from detectron2.evaluation import DatasetEvaluator
|
||||
from detectron2.structures import BoxMode
|
||||
from detectron2.utils.comm import all_gather, is_main_process, synchronize
|
||||
from detectron2.utils.logger import create_small_table
|
||||
|
||||
from .densepose_coco_evaluation import DensePoseCocoEval, DensePoseEvalMode
|
||||
|
||||
|
||||
class DensePoseCOCOEvaluator(DatasetEvaluator):
|
||||
def __init__(self, dataset_name, distributed, output_dir=None):
|
||||
self._distributed = distributed
|
||||
self._output_dir = output_dir
|
||||
|
||||
self._cpu_device = torch.device("cpu")
|
||||
self._logger = logging.getLogger(__name__)
|
||||
|
||||
self._metadata = MetadataCatalog.get(dataset_name)
|
||||
json_file = PathManager.get_local_path(self._metadata.json_file)
|
||||
with contextlib.redirect_stdout(io.StringIO()):
|
||||
self._coco_api = COCO(json_file)
|
||||
|
||||
def reset(self):
|
||||
self._predictions = []
|
||||
|
||||
def process(self, inputs, outputs):
|
||||
"""
|
||||
Args:
|
||||
inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
|
||||
It is a list of dict. Each dict corresponds to an image and
|
||||
contains keys like "height", "width", "file_name", "image_id".
|
||||
outputs: the outputs of a COCO model. It is a list of dicts with key
|
||||
"instances" that contains :class:`Instances`.
|
||||
The :class:`Instances` object needs to have `densepose` field.
|
||||
"""
|
||||
for input, output in zip(inputs, outputs):
|
||||
instances = output["instances"].to(self._cpu_device)
|
||||
|
||||
boxes = instances.pred_boxes.tensor.clone()
|
||||
boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
|
||||
instances.pred_densepose = instances.pred_densepose.to_result(boxes)
|
||||
|
||||
json_results = prediction_to_json(instances, input["image_id"])
|
||||
self._predictions.extend(json_results)
|
||||
|
||||
def evaluate(self):
|
||||
if self._distributed:
|
||||
synchronize()
|
||||
predictions = all_gather(self._predictions)
|
||||
predictions = list(itertools.chain(*predictions))
|
||||
if not is_main_process():
|
||||
return
|
||||
else:
|
||||
predictions = self._predictions
|
||||
|
||||
return copy.deepcopy(self._eval_predictions(predictions))
|
||||
|
||||
def _eval_predictions(self, predictions):
|
||||
"""
|
||||
Evaluate predictions on densepose.
|
||||
Return results with the metrics of the tasks.
|
||||
"""
|
||||
self._logger.info("Preparing results for COCO format ...")
|
||||
|
||||
if self._output_dir:
|
||||
file_path = os.path.join(self._output_dir, "coco_densepose_results.json")
|
||||
with open(file_path, "w") as f:
|
||||
json.dump(predictions, f)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
|
||||
self._logger.info("Evaluating predictions ...")
|
||||
res = OrderedDict()
|
||||
results_gps, results_gpsm = _evaluate_predictions_on_coco(self._coco_api, predictions)
|
||||
res["densepose_gps"] = results_gps
|
||||
res["densepose_gpsm"] = results_gpsm
|
||||
return res
|
||||
|
||||
|
||||
def prediction_to_json(instances, img_id):
|
||||
"""
|
||||
Args:
|
||||
instances (Instances): the output of the model
|
||||
img_id (str): the image id in COCO
|
||||
|
||||
Returns:
|
||||
list[dict]: the results in densepose evaluation format
|
||||
"""
|
||||
scores = instances.scores.tolist()
|
||||
|
||||
results = []
|
||||
for k in range(len(instances)):
|
||||
densepose = instances.pred_densepose[k]
|
||||
result = {
|
||||
"image_id": img_id,
|
||||
"category_id": 1, # densepose only has one class
|
||||
"bbox": densepose[1],
|
||||
"score": scores[k],
|
||||
"densepose": densepose,
|
||||
}
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
|
||||
def _evaluate_predictions_on_coco(coco_gt, coco_results):
|
||||
metrics = ["AP", "AP50", "AP75", "APm", "APl"]
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if len(coco_results) == 0: # cocoapi does not handle empty results very well
|
||||
logger.warn("No predictions from the model! Set scores to -1")
|
||||
results_gps = {metric: -1 for metric in metrics}
|
||||
results_gpsm = {metric: -1 for metric in metrics}
|
||||
return results_gps, results_gpsm
|
||||
|
||||
coco_dt = coco_gt.loadRes(coco_results)
|
||||
results_gps = _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics)
|
||||
logger.info(
|
||||
"Evaluation results for densepose, GPS metric: \n" + create_small_table(results_gps)
|
||||
)
|
||||
results_gpsm = _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics)
|
||||
logger.info(
|
||||
"Evaluation results for densepose, GPSm metric: \n" + create_small_table(results_gpsm)
|
||||
)
|
||||
return results_gps, results_gpsm
|
||||
|
||||
|
||||
def _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics):
|
||||
coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPS)
|
||||
coco_eval.evaluate()
|
||||
coco_eval.accumulate()
|
||||
coco_eval.summarize()
|
||||
results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
|
||||
return results
|
||||
|
||||
|
||||
def _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics):
|
||||
coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPSM)
|
||||
coco_eval.evaluate()
|
||||
coco_eval.accumulate()
|
||||
coco_eval.summarize()
|
||||
results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
|
||||
return results
|
@@ -0,0 +1,75 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
from detectron2.modeling.test_time_augmentation import GeneralizedRCNNWithTTA
|
||||
|
||||
|
||||
class DensePoseGeneralizedRCNNWithTTA(GeneralizedRCNNWithTTA):
|
||||
def __init__(self, cfg, model, transform_data, tta_mapper=None, batch_size=1):
|
||||
"""
|
||||
Args:
|
||||
cfg (CfgNode):
|
||||
model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on.
|
||||
transform_data (DensePoseTransformData): contains symmetry label
|
||||
transforms used for horizontal flip
|
||||
tta_mapper (callable): takes a dataset dict and returns a list of
|
||||
augmented versions of the dataset dict. Defaults to
|
||||
`DatasetMapperTTA(cfg)`.
|
||||
batch_size (int): batch the augmented images into this batch size for inference.
|
||||
"""
|
||||
self._transform_data = transform_data
|
||||
super().__init__(cfg=cfg, model=model, tta_mapper=tta_mapper, batch_size=batch_size)
|
||||
|
||||
# the implementation follows closely the one from detectron2/modeling
|
||||
def _inference_one_image(self, input):
|
||||
"""
|
||||
Args:
|
||||
input (dict): one dataset dict
|
||||
|
||||
Returns:
|
||||
dict: one output dict
|
||||
"""
|
||||
|
||||
augmented_inputs, aug_vars = self._get_augmented_inputs(input)
|
||||
# Detect boxes from all augmented versions
|
||||
with self._turn_off_roi_heads(["mask_on", "keypoint_on", "densepose_on"]):
|
||||
# temporarily disable roi heads
|
||||
all_boxes, all_scores, all_classes = self._get_augmented_boxes(
|
||||
augmented_inputs, aug_vars
|
||||
)
|
||||
merged_instances = self._merge_detections(
|
||||
all_boxes, all_scores, all_classes, (aug_vars["height"], aug_vars["width"])
|
||||
)
|
||||
|
||||
if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.DENSEPOSE_ON:
|
||||
# Use the detected boxes to obtain new fields
|
||||
augmented_instances = self._rescale_detected_boxes(
|
||||
augmented_inputs, merged_instances, aug_vars
|
||||
)
|
||||
# run forward on the detected boxes
|
||||
outputs = self._batch_inference(
|
||||
augmented_inputs, augmented_instances, do_postprocess=False
|
||||
)
|
||||
# Delete now useless variables to avoid being out of memory
|
||||
del augmented_inputs, augmented_instances, merged_instances
|
||||
# average the predictions
|
||||
if self.cfg.MODEL.MASK_ON:
|
||||
outputs[0].pred_masks = self._reduce_pred_masks(outputs, aug_vars)
|
||||
if self.cfg.MODEL.DENSEPOSE_ON:
|
||||
outputs[0].pred_densepose = self._reduce_pred_densepose(outputs, aug_vars)
|
||||
# postprocess
|
||||
output = self._detector_postprocess(outputs[0], aug_vars)
|
||||
return {"instances": output}
|
||||
else:
|
||||
return {"instances": merged_instances}
|
||||
|
||||
def _reduce_pred_densepose(self, outputs, aug_vars):
|
||||
for idx, output in enumerate(outputs):
|
||||
if aug_vars["do_hflip"][idx]:
|
||||
output.pred_densepose.hflip(self._transform_data)
|
||||
# Less memory-intensive averaging
|
||||
for attr in "SIUV":
|
||||
setattr(
|
||||
outputs[0].pred_densepose,
|
||||
attr,
|
||||
sum(getattr(o.pred_densepose, attr) for o in outputs) / len(outputs),
|
||||
)
|
||||
return outputs[0].pred_densepose
|
@@ -0,0 +1,213 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
import numpy as np
|
||||
from typing import Dict
|
||||
import fvcore.nn.weight_init as weight_init
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
from detectron2.layers import Conv2d, ShapeSpec, get_norm
|
||||
from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
|
||||
from detectron2.modeling.poolers import ROIPooler
|
||||
from detectron2.modeling.roi_heads import select_foreground_proposals
|
||||
|
||||
from .densepose_head import (
|
||||
build_densepose_data_filter,
|
||||
build_densepose_head,
|
||||
build_densepose_losses,
|
||||
build_densepose_predictor,
|
||||
densepose_inference,
|
||||
)
|
||||
|
||||
|
||||
class Decoder(nn.Module):
|
||||
"""
|
||||
A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper
|
||||
(https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from
|
||||
all levels of the FPN into single output.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features):
|
||||
super(Decoder, self).__init__()
|
||||
|
||||
# fmt: off
|
||||
self.in_features = in_features
|
||||
feature_strides = {k: v.stride for k, v in input_shape.items()}
|
||||
feature_channels = {k: v.channels for k, v in input_shape.items()}
|
||||
num_classes = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
|
||||
conv_dims = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
|
||||
self.common_stride = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
|
||||
norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
|
||||
# fmt: on
|
||||
|
||||
self.scale_heads = []
|
||||
for in_feature in self.in_features:
|
||||
head_ops = []
|
||||
head_length = max(
|
||||
1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
|
||||
)
|
||||
for k in range(head_length):
|
||||
conv = Conv2d(
|
||||
feature_channels[in_feature] if k == 0 else conv_dims,
|
||||
conv_dims,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
bias=not norm,
|
||||
norm=get_norm(norm, conv_dims),
|
||||
activation=F.relu,
|
||||
)
|
||||
weight_init.c2_msra_fill(conv)
|
||||
head_ops.append(conv)
|
||||
if feature_strides[in_feature] != self.common_stride:
|
||||
head_ops.append(
|
||||
nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
|
||||
)
|
||||
self.scale_heads.append(nn.Sequential(*head_ops))
|
||||
self.add_module(in_feature, self.scale_heads[-1])
|
||||
self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
|
||||
weight_init.c2_msra_fill(self.predictor)
|
||||
|
||||
def forward(self, features):
|
||||
for i, _ in enumerate(self.in_features):
|
||||
if i == 0:
|
||||
x = self.scale_heads[i](features[i])
|
||||
else:
|
||||
x = x + self.scale_heads[i](features[i])
|
||||
x = self.predictor(x)
|
||||
return x
|
||||
|
||||
|
||||
@ROI_HEADS_REGISTRY.register()
|
||||
class DensePoseROIHeads(StandardROIHeads):
|
||||
"""
|
||||
A Standard ROIHeads which contains an addition of DensePose head.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg, input_shape):
|
||||
super().__init__(cfg, input_shape)
|
||||
self._init_densepose_head(cfg, input_shape)
|
||||
|
||||
def _init_densepose_head(self, cfg, input_shape):
|
||||
# fmt: off
|
||||
self.densepose_on = cfg.MODEL.DENSEPOSE_ON
|
||||
if not self.densepose_on:
|
||||
return
|
||||
self.densepose_data_filter = build_densepose_data_filter(cfg)
|
||||
dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION
|
||||
dp_pooler_sampling_ratio = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO
|
||||
dp_pooler_type = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE
|
||||
self.use_decoder = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON
|
||||
# fmt: on
|
||||
if self.use_decoder:
|
||||
dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,)
|
||||
else:
|
||||
dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features)
|
||||
in_channels = [input_shape[f].channels for f in self.in_features][0]
|
||||
|
||||
if self.use_decoder:
|
||||
self.decoder = Decoder(cfg, input_shape, self.in_features)
|
||||
|
||||
self.densepose_pooler = ROIPooler(
|
||||
output_size=dp_pooler_resolution,
|
||||
scales=dp_pooler_scales,
|
||||
sampling_ratio=dp_pooler_sampling_ratio,
|
||||
pooler_type=dp_pooler_type,
|
||||
)
|
||||
self.densepose_head = build_densepose_head(cfg, in_channels)
|
||||
self.densepose_predictor = build_densepose_predictor(
|
||||
cfg, self.densepose_head.n_out_channels
|
||||
)
|
||||
self.densepose_losses = build_densepose_losses(cfg)
|
||||
|
||||
def _forward_densepose(self, features, instances):
|
||||
"""
|
||||
Forward logic of the densepose prediction branch.
|
||||
|
||||
Args:
|
||||
features (list[Tensor]): #level input features for densepose prediction
|
||||
instances (list[Instances]): the per-image instances to train/predict densepose.
|
||||
In training, they can be the proposals.
|
||||
In inference, they can be the predicted boxes.
|
||||
|
||||
Returns:
|
||||
In training, a dict of losses.
|
||||
In inference, update `instances` with new fields "densepose" and return it.
|
||||
"""
|
||||
if not self.densepose_on:
|
||||
return {} if self.training else instances
|
||||
|
||||
features = [features[f] for f in self.in_features]
|
||||
if self.training:
|
||||
proposals, _ = select_foreground_proposals(instances, self.num_classes)
|
||||
proposals_dp = self.densepose_data_filter(proposals)
|
||||
if len(proposals_dp) > 0:
|
||||
# NOTE may deadlock in DDP if certain workers have empty proposals_dp
|
||||
proposal_boxes = [x.proposal_boxes for x in proposals_dp]
|
||||
|
||||
if self.use_decoder:
|
||||
features = [self.decoder(features)]
|
||||
|
||||
features_dp = self.densepose_pooler(features, proposal_boxes)
|
||||
densepose_head_outputs = self.densepose_head(features_dp)
|
||||
densepose_outputs, _, confidences, _ = self.densepose_predictor(
|
||||
densepose_head_outputs
|
||||
)
|
||||
densepose_loss_dict = self.densepose_losses(
|
||||
proposals_dp, densepose_outputs, confidences
|
||||
)
|
||||
return densepose_loss_dict
|
||||
else:
|
||||
pred_boxes = [x.pred_boxes for x in instances]
|
||||
|
||||
if self.use_decoder:
|
||||
features = [self.decoder(features)]
|
||||
|
||||
features_dp = self.densepose_pooler(features, pred_boxes)
|
||||
if len(features_dp) > 0:
|
||||
densepose_head_outputs = self.densepose_head(features_dp)
|
||||
densepose_outputs, _, confidences, _ = self.densepose_predictor(
|
||||
densepose_head_outputs
|
||||
)
|
||||
else:
|
||||
# If no detection occurred instances
|
||||
# set densepose_outputs to empty tensors
|
||||
empty_tensor = torch.zeros(size=(0, 0, 0, 0), device=features_dp.device)
|
||||
densepose_outputs = tuple([empty_tensor] * 4)
|
||||
confidences = tuple([empty_tensor] * 4)
|
||||
|
||||
densepose_inference(densepose_outputs, confidences, instances)
|
||||
return instances
|
||||
|
||||
def forward(self, images, features, proposals, targets=None):
|
||||
instances, losses = super().forward(images, features, proposals, targets)
|
||||
del targets, images
|
||||
|
||||
if self.training:
|
||||
losses.update(self._forward_densepose(features, instances))
|
||||
return instances, losses
|
||||
|
||||
def forward_with_given_boxes(self, features, instances):
|
||||
"""
|
||||
Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
|
||||
|
||||
This is useful for downstream tasks where a box is known, but need to obtain
|
||||
other attributes (outputs of other heads).
|
||||
Test-time augmentation also uses this.
|
||||
|
||||
Args:
|
||||
features: same as in `forward()`
|
||||
instances (list[Instances]): instances to predict other outputs. Expect the keys
|
||||
"pred_boxes" and "pred_classes" to exist.
|
||||
|
||||
Returns:
|
||||
instances (list[Instances]):
|
||||
the same `Instances` objects, with extra
|
||||
fields such as `pred_masks` or `pred_keypoints`.
|
||||
"""
|
||||
|
||||
instances = super().forward_with_given_boxes(features, instances)
|
||||
instances = self._forward_densepose(features, instances)
|
||||
return instances
|
@@ -0,0 +1,145 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
|
||||
class EntrySelector(object):
|
||||
"""
|
||||
Base class for entry selectors
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def from_string(spec: str) -> "EntrySelector":
|
||||
if spec == "*":
|
||||
return AllEntrySelector()
|
||||
return FieldEntrySelector(spec)
|
||||
|
||||
|
||||
class AllEntrySelector(EntrySelector):
|
||||
"""
|
||||
Selector that accepts all entries
|
||||
"""
|
||||
|
||||
SPECIFIER = "*"
|
||||
|
||||
def __call__(self, entry):
|
||||
return True
|
||||
|
||||
|
||||
class FieldEntrySelector(EntrySelector):
|
||||
"""
|
||||
Selector that accepts only entries that match provided field
|
||||
specifier(s). Only a limited set of specifiers is supported for now:
|
||||
<specifiers>::=<specifier>[<comma><specifiers>]
|
||||
<specifier>::=<field_name>[<type_delim><type>]<equal><value_or_range>
|
||||
<field_name> is a valid identifier
|
||||
<type> ::= "int" | "str"
|
||||
<equal> ::= "="
|
||||
<comma> ::= ","
|
||||
<type_delim> ::= ":"
|
||||
<value_or_range> ::= <value> | <range>
|
||||
<range> ::= <value><range_delim><value>
|
||||
<range_delim> ::= "-"
|
||||
<value> is a string without spaces and special symbols
|
||||
(e.g. <comma>, <equal>, <type_delim>, <range_delim>)
|
||||
"""
|
||||
|
||||
_SPEC_DELIM = ","
|
||||
_TYPE_DELIM = ":"
|
||||
_RANGE_DELIM = "-"
|
||||
_EQUAL = "="
|
||||
_ERROR_PREFIX = "Invalid field selector specifier"
|
||||
|
||||
class _FieldEntryValuePredicate(object):
|
||||
"""
|
||||
Predicate that checks strict equality for the specified entry field
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, typespec: str, value: str):
|
||||
import builtins
|
||||
|
||||
self.name = name
|
||||
self.type = getattr(builtins, typespec) if typespec is not None else str
|
||||
self.value = value
|
||||
|
||||
def __call__(self, entry):
|
||||
return entry[self.name] == self.type(self.value)
|
||||
|
||||
class _FieldEntryRangePredicate(object):
|
||||
"""
|
||||
Predicate that checks whether an entry field falls into the specified range
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, typespec: str, vmin: str, vmax: str):
|
||||
import builtins
|
||||
|
||||
self.name = name
|
||||
self.type = getattr(builtins, typespec) if typespec is not None else str
|
||||
self.vmin = vmin
|
||||
self.vmax = vmax
|
||||
|
||||
def __call__(self, entry):
|
||||
return (entry[self.name] >= self.type(self.vmin)) and (
|
||||
entry[self.name] <= self.type(self.vmax)
|
||||
)
|
||||
|
||||
def __init__(self, spec: str):
|
||||
self._predicates = self._parse_specifier_into_predicates(spec)
|
||||
|
||||
def __call__(self, entry: Dict[str, Any]):
|
||||
for predicate in self._predicates:
|
||||
if not predicate(entry):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _parse_specifier_into_predicates(self, spec: str):
|
||||
predicates = []
|
||||
specs = spec.split(self._SPEC_DELIM)
|
||||
for subspec in specs:
|
||||
eq_idx = subspec.find(self._EQUAL)
|
||||
if eq_idx > 0:
|
||||
field_name_with_type = subspec[:eq_idx]
|
||||
field_name, field_type = self._parse_field_name_type(field_name_with_type)
|
||||
field_value_or_range = subspec[eq_idx + 1 :]
|
||||
if self._is_range_spec(field_value_or_range):
|
||||
vmin, vmax = self._get_range_spec(field_value_or_range)
|
||||
predicate = FieldEntrySelector._FieldEntryRangePredicate(
|
||||
field_name, field_type, vmin, vmax
|
||||
)
|
||||
else:
|
||||
predicate = FieldEntrySelector._FieldEntryValuePredicate(
|
||||
field_name, field_type, field_value_or_range
|
||||
)
|
||||
predicates.append(predicate)
|
||||
elif eq_idx == 0:
|
||||
self._parse_error(f'"{subspec}", field name is empty!')
|
||||
else:
|
||||
self._parse_error(f'"{subspec}", should have format ' "<field>=<value_or_range>!")
|
||||
return predicates
|
||||
|
||||
def _parse_field_name_type(self, field_name_with_type: str) -> Tuple[str, Optional[str]]:
|
||||
type_delim_idx = field_name_with_type.find(self._TYPE_DELIM)
|
||||
if type_delim_idx > 0:
|
||||
field_name = field_name_with_type[:type_delim_idx]
|
||||
field_type = field_name_with_type[type_delim_idx + 1 :]
|
||||
elif type_delim_idx == 0:
|
||||
self._parse_error(f'"{field_name_with_type}", field name is empty!')
|
||||
else:
|
||||
field_name = field_name_with_type
|
||||
field_type = None
|
||||
return field_name, field_type
|
||||
|
||||
def _is_range_spec(self, field_value_or_range):
|
||||
delim_idx = field_value_or_range.find(self._RANGE_DELIM)
|
||||
return delim_idx > 0
|
||||
|
||||
def _get_range_spec(self, field_value_or_range):
|
||||
if self._is_range_spec(field_value_or_range):
|
||||
delim_idx = field_value_or_range.find(self._RANGE_DELIM)
|
||||
vmin = field_value_or_range[:delim_idx]
|
||||
vmax = field_value_or_range[delim_idx + 1 :]
|
||||
return vmin, vmax
|
||||
else:
|
||||
self._parse_error('"field_value_or_range", range of values expected!')
|
||||
|
||||
def _parse_error(self, msg):
|
||||
raise ValueError(f"{self._ERROR_PREFIX}: {msg}")
|
@@ -0,0 +1,13 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import logging
|
||||
|
||||
|
||||
def verbosity_to_level(verbosity):
|
||||
if verbosity is not None:
|
||||
if verbosity == 0:
|
||||
return logging.WARNING
|
||||
elif verbosity == 1:
|
||||
return logging.INFO
|
||||
elif verbosity >= 2:
|
||||
return logging.DEBUG
|
||||
return logging.WARNING
|
@@ -0,0 +1,16 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
from fvcore.common.file_io import PathManager
|
||||
|
||||
from detectron2.data import MetadataCatalog
|
||||
|
||||
from densepose import DensePoseTransformData
|
||||
|
||||
|
||||
def load_for_dataset(dataset_name):
|
||||
path = MetadataCatalog.get(dataset_name).densepose_transform_src
|
||||
densepose_transform_data_fpath = PathManager.get_local_path(path)
|
||||
return DensePoseTransformData.load(densepose_transform_data_fpath)
|
||||
|
||||
|
||||
def load_from_cfg(cfg):
|
||||
return load_for_dataset(cfg.DATASETS.TEST[0])
|
@@ -0,0 +1,191 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import logging
|
||||
import numpy as np
|
||||
import cv2
|
||||
import torch
|
||||
|
||||
Image = np.ndarray
|
||||
Boxes = torch.Tensor
|
||||
|
||||
|
||||
class MatrixVisualizer(object):
|
||||
"""
|
||||
Base visualizer for matrix data
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
inplace=True,
|
||||
cmap=cv2.COLORMAP_PARULA,
|
||||
val_scale=1.0,
|
||||
alpha=0.7,
|
||||
interp_method_matrix=cv2.INTER_LINEAR,
|
||||
interp_method_mask=cv2.INTER_NEAREST,
|
||||
):
|
||||
self.inplace = inplace
|
||||
self.cmap = cmap
|
||||
self.val_scale = val_scale
|
||||
self.alpha = alpha
|
||||
self.interp_method_matrix = interp_method_matrix
|
||||
self.interp_method_mask = interp_method_mask
|
||||
|
||||
def visualize(self, image_bgr, mask, matrix, bbox_xywh):
|
||||
self._check_image(image_bgr)
|
||||
self._check_mask_matrix(mask, matrix)
|
||||
if self.inplace:
|
||||
image_target_bgr = image_bgr
|
||||
else:
|
||||
image_target_bgr = image_bgr * 0
|
||||
x, y, w, h = [int(v) for v in bbox_xywh]
|
||||
if w <= 0 or h <= 0:
|
||||
return image_bgr
|
||||
mask, matrix = self._resize(mask, matrix, w, h)
|
||||
mask_bg = np.tile((mask == 0)[:, :, np.newaxis], [1, 1, 3])
|
||||
matrix_scaled = matrix.astype(np.float32) * self.val_scale
|
||||
_EPSILON = 1e-6
|
||||
if np.any(matrix_scaled > 255 + _EPSILON):
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.warning(
|
||||
f"Matrix has values > {255 + _EPSILON} after " f"scaling, clipping to [0..255]"
|
||||
)
|
||||
matrix_scaled_8u = matrix_scaled.clip(0, 255).astype(np.uint8)
|
||||
matrix_vis = cv2.applyColorMap(matrix_scaled_8u, self.cmap)
|
||||
matrix_vis[mask_bg] = image_target_bgr[y : y + h, x : x + w, :][mask_bg]
|
||||
image_target_bgr[y : y + h, x : x + w, :] = (
|
||||
image_target_bgr[y : y + h, x : x + w, :] * (1.0 - self.alpha) + matrix_vis * self.alpha
|
||||
)
|
||||
return image_target_bgr.astype(np.uint8)
|
||||
|
||||
def _resize(self, mask, matrix, w, h):
|
||||
if (w != mask.shape[1]) or (h != mask.shape[0]):
|
||||
mask = cv2.resize(mask, (w, h), self.interp_method_mask)
|
||||
if (w != matrix.shape[1]) or (h != matrix.shape[0]):
|
||||
matrix = cv2.resize(matrix, (w, h), self.interp_method_matrix)
|
||||
return mask, matrix
|
||||
|
||||
def _check_image(self, image_rgb):
|
||||
assert len(image_rgb.shape) == 3
|
||||
assert image_rgb.shape[2] == 3
|
||||
assert image_rgb.dtype == np.uint8
|
||||
|
||||
def _check_mask_matrix(self, mask, matrix):
|
||||
assert len(matrix.shape) == 2
|
||||
assert len(mask.shape) == 2
|
||||
assert mask.dtype == np.uint8
|
||||
|
||||
|
||||
class RectangleVisualizer(object):
|
||||
|
||||
_COLOR_GREEN = (18, 127, 15)
|
||||
|
||||
def __init__(self, color=_COLOR_GREEN, thickness=1):
|
||||
self.color = color
|
||||
self.thickness = thickness
|
||||
|
||||
def visualize(self, image_bgr, bbox_xywh, color=None, thickness=None):
|
||||
x, y, w, h = bbox_xywh
|
||||
color = color or self.color
|
||||
thickness = thickness or self.thickness
|
||||
cv2.rectangle(image_bgr, (int(x), int(y)), (int(x + w), int(y + h)), color, thickness)
|
||||
return image_bgr
|
||||
|
||||
|
||||
class PointsVisualizer(object):
|
||||
|
||||
_COLOR_GREEN = (18, 127, 15)
|
||||
|
||||
def __init__(self, color_bgr=_COLOR_GREEN, r=5):
|
||||
self.color_bgr = color_bgr
|
||||
self.r = r
|
||||
|
||||
def visualize(self, image_bgr, pts_xy, colors_bgr=None, rs=None):
|
||||
for j, pt_xy in enumerate(pts_xy):
|
||||
x, y = pt_xy
|
||||
color_bgr = colors_bgr[j] if colors_bgr is not None else self.color_bgr
|
||||
r = rs[j] if rs is not None else self.r
|
||||
cv2.circle(image_bgr, (x, y), r, color_bgr, -1)
|
||||
return image_bgr
|
||||
|
||||
|
||||
class TextVisualizer(object):
|
||||
|
||||
_COLOR_GRAY = (218, 227, 218)
|
||||
_COLOR_WHITE = (255, 255, 255)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
font_face=cv2.FONT_HERSHEY_SIMPLEX,
|
||||
font_color_bgr=_COLOR_GRAY,
|
||||
font_scale=0.35,
|
||||
font_line_type=cv2.LINE_AA,
|
||||
font_line_thickness=1,
|
||||
fill_color_bgr=_COLOR_WHITE,
|
||||
fill_color_transparency=1.0,
|
||||
frame_color_bgr=_COLOR_WHITE,
|
||||
frame_color_transparency=1.0,
|
||||
frame_thickness=1,
|
||||
):
|
||||
self.font_face = font_face
|
||||
self.font_color_bgr = font_color_bgr
|
||||
self.font_scale = font_scale
|
||||
self.font_line_type = font_line_type
|
||||
self.font_line_thickness = font_line_thickness
|
||||
self.fill_color_bgr = fill_color_bgr
|
||||
self.fill_color_transparency = fill_color_transparency
|
||||
self.frame_color_bgr = frame_color_bgr
|
||||
self.frame_color_transparency = frame_color_transparency
|
||||
self.frame_thickness = frame_thickness
|
||||
|
||||
def visualize(self, image_bgr, txt, topleft_xy):
|
||||
txt_w, txt_h = self.get_text_size_wh(txt)
|
||||
topleft_xy = tuple(map(int, topleft_xy))
|
||||
x, y = topleft_xy
|
||||
if self.frame_color_transparency < 1.0:
|
||||
t = self.frame_thickness
|
||||
image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :] = (
|
||||
image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :]
|
||||
* self.frame_color_transparency
|
||||
+ np.array(self.frame_color_bgr) * (1.0 - self.frame_color_transparency)
|
||||
).astype(np.float)
|
||||
if self.fill_color_transparency < 1.0:
|
||||
image_bgr[y : y + txt_h, x : x + txt_w, :] = (
|
||||
image_bgr[y : y + txt_h, x : x + txt_w, :] * self.fill_color_transparency
|
||||
+ np.array(self.fill_color_bgr) * (1.0 - self.fill_color_transparency)
|
||||
).astype(np.float)
|
||||
cv2.putText(
|
||||
image_bgr,
|
||||
txt,
|
||||
topleft_xy,
|
||||
self.font_face,
|
||||
self.font_scale,
|
||||
self.font_color_bgr,
|
||||
self.font_line_thickness,
|
||||
self.font_line_type,
|
||||
)
|
||||
return image_bgr
|
||||
|
||||
def get_text_size_wh(self, txt):
|
||||
((txt_w, txt_h), _) = cv2.getTextSize(
|
||||
txt, self.font_face, self.font_scale, self.font_line_thickness
|
||||
)
|
||||
return txt_w, txt_h
|
||||
|
||||
|
||||
class CompoundVisualizer(object):
|
||||
def __init__(self, visualizers):
|
||||
self.visualizers = visualizers
|
||||
|
||||
def visualize(self, image_bgr, data):
|
||||
assert len(data) == len(
|
||||
self.visualizers
|
||||
), "The number of datas {} should match the number of visualizers" " {}".format(
|
||||
len(data), len(self.visualizers)
|
||||
)
|
||||
image = image_bgr
|
||||
for i, visualizer in enumerate(self.visualizers):
|
||||
image = visualizer.visualize(image, data[i])
|
||||
return image
|
||||
|
||||
def __str__(self):
|
||||
visualizer_str = ", ".join([str(v) for v in self.visualizers])
|
||||
return "Compound Visualizer [{}]".format(visualizer_str)
|
@@ -0,0 +1,37 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
from .base import RectangleVisualizer, TextVisualizer
|
||||
|
||||
|
||||
class BoundingBoxVisualizer(object):
|
||||
def __init__(self):
|
||||
self.rectangle_visualizer = RectangleVisualizer()
|
||||
|
||||
def visualize(self, image_bgr, boxes_xywh):
|
||||
for bbox_xywh in boxes_xywh:
|
||||
image_bgr = self.rectangle_visualizer.visualize(image_bgr, bbox_xywh)
|
||||
return image_bgr
|
||||
|
||||
|
||||
class ScoredBoundingBoxVisualizer(object):
|
||||
def __init__(self, bbox_visualizer_params=None, score_visualizer_params=None):
|
||||
if bbox_visualizer_params is None:
|
||||
bbox_visualizer_params = {}
|
||||
if score_visualizer_params is None:
|
||||
score_visualizer_params = {}
|
||||
self.visualizer_bbox = RectangleVisualizer(**bbox_visualizer_params)
|
||||
self.visualizer_score = TextVisualizer(**score_visualizer_params)
|
||||
|
||||
def visualize(self, image_bgr, scored_bboxes):
|
||||
boxes_xywh, box_scores = scored_bboxes
|
||||
assert len(boxes_xywh) == len(
|
||||
box_scores
|
||||
), "Number of bounding boxes {} should be equal to the number of scores {}".format(
|
||||
len(boxes_xywh), len(box_scores)
|
||||
)
|
||||
for i, box_xywh in enumerate(boxes_xywh):
|
||||
score_i = box_scores[i]
|
||||
image_bgr = self.visualizer_bbox.visualize(image_bgr, box_xywh)
|
||||
score_txt = "{0:6.4f}".format(score_i)
|
||||
topleft_xy = box_xywh[0], box_xywh[1]
|
||||
image_bgr = self.visualizer_score.visualize(image_bgr, score_txt, topleft_xy)
|
||||
return image_bgr
|
@@ -0,0 +1,593 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import logging
|
||||
import numpy as np
|
||||
from typing import Iterable, Optional, Tuple
|
||||
import cv2
|
||||
|
||||
from ..data.structures import DensePoseDataRelative, DensePoseOutput, DensePoseResult
|
||||
from .base import Boxes, Image, MatrixVisualizer, PointsVisualizer
|
||||
|
||||
|
||||
class DensePoseResultsVisualizer(object):
|
||||
def visualize(self, image_bgr: Image, densepose_result: Optional[DensePoseResult]) -> Image:
|
||||
if densepose_result is None:
|
||||
return image_bgr
|
||||
context = self.create_visualization_context(image_bgr)
|
||||
for i, result_encoded_w_shape in enumerate(densepose_result.results):
|
||||
iuv_arr = DensePoseResult.decode_png_data(*result_encoded_w_shape)
|
||||
bbox_xywh = densepose_result.boxes_xywh[i]
|
||||
self.visualize_iuv_arr(context, iuv_arr, bbox_xywh)
|
||||
image_bgr = self.context_to_image_bgr(context)
|
||||
return image_bgr
|
||||
|
||||
|
||||
class DensePoseMaskedColormapResultsVisualizer(DensePoseResultsVisualizer):
|
||||
def __init__(
|
||||
self,
|
||||
data_extractor,
|
||||
segm_extractor,
|
||||
inplace=True,
|
||||
cmap=cv2.COLORMAP_PARULA,
|
||||
alpha=0.7,
|
||||
val_scale=1.0,
|
||||
):
|
||||
self.mask_visualizer = MatrixVisualizer(
|
||||
inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha
|
||||
)
|
||||
self.data_extractor = data_extractor
|
||||
self.segm_extractor = segm_extractor
|
||||
|
||||
def create_visualization_context(self, image_bgr: Image):
|
||||
return image_bgr
|
||||
|
||||
def context_to_image_bgr(self, context):
|
||||
return context
|
||||
|
||||
def get_image_bgr_from_context(self, context):
|
||||
return context
|
||||
|
||||
def visualize_iuv_arr(self, context, iuv_arr, bbox_xywh):
|
||||
image_bgr = self.get_image_bgr_from_context(context)
|
||||
matrix = self.data_extractor(iuv_arr)
|
||||
segm = self.segm_extractor(iuv_arr)
|
||||
mask = np.zeros(matrix.shape, dtype=np.uint8)
|
||||
mask[segm > 0] = 1
|
||||
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
|
||||
return image_bgr
|
||||
|
||||
|
||||
def _extract_i_from_iuvarr(iuv_arr):
|
||||
return iuv_arr[0, :, :]
|
||||
|
||||
|
||||
def _extract_u_from_iuvarr(iuv_arr):
|
||||
return iuv_arr[1, :, :]
|
||||
|
||||
|
||||
def _extract_v_from_iuvarr(iuv_arr):
|
||||
return iuv_arr[2, :, :]
|
||||
|
||||
|
||||
class DensePoseResultsMplContourVisualizer(DensePoseResultsVisualizer):
|
||||
def __init__(self, levels=10, **kwargs):
|
||||
self.levels = levels
|
||||
self.plot_args = kwargs
|
||||
|
||||
def create_visualization_context(self, image_bgr: Image):
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
|
||||
|
||||
context = {}
|
||||
context["image_bgr"] = image_bgr
|
||||
dpi = 100
|
||||
height_inches = float(image_bgr.shape[0]) / dpi
|
||||
width_inches = float(image_bgr.shape[1]) / dpi
|
||||
fig = plt.figure(figsize=(width_inches, height_inches), dpi=dpi)
|
||||
plt.axes([0, 0, 1, 1])
|
||||
plt.axis("off")
|
||||
context["fig"] = fig
|
||||
canvas = FigureCanvas(fig)
|
||||
context["canvas"] = canvas
|
||||
extent = (0, image_bgr.shape[1], image_bgr.shape[0], 0)
|
||||
plt.imshow(image_bgr[:, :, ::-1], extent=extent)
|
||||
return context
|
||||
|
||||
def context_to_image_bgr(self, context):
|
||||
fig = context["fig"]
|
||||
w, h = map(int, fig.get_size_inches() * fig.get_dpi())
|
||||
canvas = context["canvas"]
|
||||
canvas.draw()
|
||||
image_1d = np.fromstring(canvas.tostring_rgb(), dtype="uint8")
|
||||
image_rgb = image_1d.reshape(h, w, 3)
|
||||
image_bgr = image_rgb[:, :, ::-1].copy()
|
||||
return image_bgr
|
||||
|
||||
def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> Image:
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
|
||||
v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
|
||||
extent = (
|
||||
bbox_xywh[0],
|
||||
bbox_xywh[0] + bbox_xywh[2],
|
||||
bbox_xywh[1],
|
||||
bbox_xywh[1] + bbox_xywh[3],
|
||||
)
|
||||
plt.contour(u, self.levels, extent=extent, **self.plot_args)
|
||||
plt.contour(v, self.levels, extent=extent, **self.plot_args)
|
||||
|
||||
|
||||
class DensePoseResultsCustomContourVisualizer(DensePoseResultsVisualizer):
|
||||
"""
|
||||
Contour visualization using marching squares
|
||||
"""
|
||||
|
||||
def __init__(self, levels=10, **kwargs):
|
||||
# TODO: colormap is hardcoded
|
||||
cmap = cv2.COLORMAP_PARULA
|
||||
if isinstance(levels, int):
|
||||
self.levels = np.linspace(0, 1, levels)
|
||||
else:
|
||||
self.levels = levels
|
||||
if "linewidths" in kwargs:
|
||||
self.linewidths = kwargs["linewidths"]
|
||||
else:
|
||||
self.linewidths = [1] * len(self.levels)
|
||||
self.plot_args = kwargs
|
||||
img_colors_bgr = cv2.applyColorMap((self.levels * 255).astype(np.uint8), cmap)
|
||||
self.level_colors_bgr = [
|
||||
[int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
|
||||
]
|
||||
|
||||
def create_visualization_context(self, image_bgr: Image):
|
||||
return image_bgr
|
||||
|
||||
def context_to_image_bgr(self, context):
|
||||
return context
|
||||
|
||||
def get_image_bgr_from_context(self, context):
|
||||
return context
|
||||
|
||||
def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> Image:
|
||||
image_bgr = self.get_image_bgr_from_context(context)
|
||||
segm = _extract_i_from_iuvarr(iuv_arr)
|
||||
u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
|
||||
v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
|
||||
self._contours(image_bgr, u, segm, bbox_xywh)
|
||||
self._contours(image_bgr, v, segm, bbox_xywh)
|
||||
|
||||
def _contours(self, image_bgr, arr, segm, bbox_xywh):
|
||||
for part_idx in range(1, DensePoseDataRelative.N_PART_LABELS + 1):
|
||||
mask = segm == part_idx
|
||||
if not np.any(mask):
|
||||
continue
|
||||
arr_min = np.amin(arr[mask])
|
||||
arr_max = np.amax(arr[mask])
|
||||
I, J = np.nonzero(mask)
|
||||
i0 = np.amin(I)
|
||||
i1 = np.amax(I) + 1
|
||||
j0 = np.amin(J)
|
||||
j1 = np.amax(J) + 1
|
||||
if (j1 == j0 + 1) or (i1 == i0 + 1):
|
||||
continue
|
||||
Nw = arr.shape[1] - 1
|
||||
Nh = arr.shape[0] - 1
|
||||
for level_idx, level in enumerate(self.levels):
|
||||
if (level < arr_min) or (level > arr_max):
|
||||
continue
|
||||
vp = arr[i0:i1, j0:j1] >= level
|
||||
bin_codes = vp[:-1, :-1] + vp[1:, :-1] * 2 + vp[1:, 1:] * 4 + vp[:-1, 1:] * 8
|
||||
mp = mask[i0:i1, j0:j1]
|
||||
bin_mask_codes = mp[:-1, :-1] + mp[1:, :-1] * 2 + mp[1:, 1:] * 4 + mp[:-1, 1:] * 8
|
||||
it = np.nditer(bin_codes, flags=["multi_index"])
|
||||
color_bgr = self.level_colors_bgr[level_idx]
|
||||
linewidth = self.linewidths[level_idx]
|
||||
while not it.finished:
|
||||
if (it[0] != 0) and (it[0] != 15):
|
||||
i, j = it.multi_index
|
||||
if bin_mask_codes[i, j] != 0:
|
||||
self._draw_line(
|
||||
image_bgr,
|
||||
arr,
|
||||
mask,
|
||||
level,
|
||||
color_bgr,
|
||||
linewidth,
|
||||
it[0],
|
||||
it.multi_index,
|
||||
bbox_xywh,
|
||||
Nw,
|
||||
Nh,
|
||||
(i0, j0),
|
||||
)
|
||||
it.iternext()
|
||||
|
||||
def _draw_line(
|
||||
self,
|
||||
image_bgr,
|
||||
arr,
|
||||
mask,
|
||||
v,
|
||||
color_bgr,
|
||||
linewidth,
|
||||
bin_code,
|
||||
multi_idx,
|
||||
bbox_xywh,
|
||||
Nw,
|
||||
Nh,
|
||||
offset,
|
||||
):
|
||||
lines = self._bin_code_2_lines(arr, v, bin_code, multi_idx, Nw, Nh, offset)
|
||||
x0, y0, w, h = bbox_xywh
|
||||
x1 = x0 + w
|
||||
y1 = y0 + h
|
||||
for line in lines:
|
||||
x0r, y0r = line[0]
|
||||
x1r, y1r = line[1]
|
||||
pt0 = (int(x0 + x0r * (x1 - x0)), int(y0 + y0r * (y1 - y0)))
|
||||
pt1 = (int(x0 + x1r * (x1 - x0)), int(y0 + y1r * (y1 - y0)))
|
||||
cv2.line(image_bgr, pt0, pt1, color_bgr, linewidth)
|
||||
|
||||
def _bin_code_2_lines(self, arr, v, bin_code, multi_idx, Nw, Nh, offset):
|
||||
i0, j0 = offset
|
||||
i, j = multi_idx
|
||||
i += i0
|
||||
j += j0
|
||||
v0, v1, v2, v3 = arr[i, j], arr[i + 1, j], arr[i + 1, j + 1], arr[i, j + 1]
|
||||
x0i = float(j) / Nw
|
||||
y0j = float(i) / Nh
|
||||
He = 1.0 / Nh
|
||||
We = 1.0 / Nw
|
||||
if (bin_code == 1) or (bin_code == 14):
|
||||
a = (v - v0) / (v1 - v0)
|
||||
b = (v - v0) / (v3 - v0)
|
||||
pt1 = (x0i, y0j + a * He)
|
||||
pt2 = (x0i + b * We, y0j)
|
||||
return [(pt1, pt2)]
|
||||
elif (bin_code == 2) or (bin_code == 13):
|
||||
a = (v - v0) / (v1 - v0)
|
||||
b = (v - v1) / (v2 - v1)
|
||||
pt1 = (x0i, y0j + a * He)
|
||||
pt2 = (x0i + b * We, y0j + He)
|
||||
return [(pt1, pt2)]
|
||||
elif (bin_code == 3) or (bin_code == 12):
|
||||
a = (v - v0) / (v3 - v0)
|
||||
b = (v - v1) / (v2 - v1)
|
||||
pt1 = (x0i + a * We, y0j)
|
||||
pt2 = (x0i + b * We, y0j + He)
|
||||
return [(pt1, pt2)]
|
||||
elif (bin_code == 4) or (bin_code == 11):
|
||||
a = (v - v1) / (v2 - v1)
|
||||
b = (v - v3) / (v2 - v3)
|
||||
pt1 = (x0i + a * We, y0j + He)
|
||||
pt2 = (x0i + We, y0j + b * He)
|
||||
return [(pt1, pt2)]
|
||||
elif (bin_code == 6) or (bin_code == 9):
|
||||
a = (v - v0) / (v1 - v0)
|
||||
b = (v - v3) / (v2 - v3)
|
||||
pt1 = (x0i, y0j + a * He)
|
||||
pt2 = (x0i + We, y0j + b * He)
|
||||
return [(pt1, pt2)]
|
||||
elif (bin_code == 7) or (bin_code == 8):
|
||||
a = (v - v0) / (v3 - v0)
|
||||
b = (v - v3) / (v2 - v3)
|
||||
pt1 = (x0i + a * We, y0j)
|
||||
pt2 = (x0i + We, y0j + b * He)
|
||||
return [(pt1, pt2)]
|
||||
elif bin_code == 5:
|
||||
a1 = (v - v0) / (v1 - v0)
|
||||
b1 = (v - v1) / (v2 - v1)
|
||||
pt11 = (x0i, y0j + a1 * He)
|
||||
pt12 = (x0i + b1 * We, y0j + He)
|
||||
a2 = (v - v0) / (v3 - v0)
|
||||
b2 = (v - v3) / (v2 - v3)
|
||||
pt21 = (x0i + a2 * We, y0j)
|
||||
pt22 = (x0i + We, y0j + b2 * He)
|
||||
return [(pt11, pt12), (pt21, pt22)]
|
||||
elif bin_code == 10:
|
||||
a1 = (v - v0) / (v3 - v0)
|
||||
b1 = (v - v0) / (v1 - v0)
|
||||
pt11 = (x0i + a1 * We, y0j)
|
||||
pt12 = (x0i, y0j + b1 * He)
|
||||
a2 = (v - v1) / (v2 - v1)
|
||||
b2 = (v - v3) / (v2 - v3)
|
||||
pt21 = (x0i + a2 * We, y0j + He)
|
||||
pt22 = (x0i + We, y0j + b2 * He)
|
||||
return [(pt11, pt12), (pt21, pt22)]
|
||||
return []
|
||||
|
||||
|
||||
try:
|
||||
import matplotlib
|
||||
|
||||
matplotlib.use("Agg")
|
||||
DensePoseResultsContourVisualizer = DensePoseResultsMplContourVisualizer
|
||||
except ModuleNotFoundError:
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.warning("Could not import matplotlib, using custom contour visualizer")
|
||||
DensePoseResultsContourVisualizer = DensePoseResultsCustomContourVisualizer
|
||||
|
||||
|
||||
class DensePoseResultsFineSegmentationVisualizer(DensePoseMaskedColormapResultsVisualizer):
|
||||
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
|
||||
super(DensePoseResultsFineSegmentationVisualizer, self).__init__(
|
||||
_extract_i_from_iuvarr,
|
||||
_extract_i_from_iuvarr,
|
||||
inplace,
|
||||
cmap,
|
||||
alpha,
|
||||
val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
|
||||
)
|
||||
|
||||
|
||||
class DensePoseResultsUVisualizer(DensePoseMaskedColormapResultsVisualizer):
|
||||
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
|
||||
super(DensePoseResultsUVisualizer, self).__init__(
|
||||
_extract_u_from_iuvarr, _extract_i_from_iuvarr, inplace, cmap, alpha, val_scale=1.0
|
||||
)
|
||||
|
||||
|
||||
class DensePoseResultsVVisualizer(DensePoseMaskedColormapResultsVisualizer):
|
||||
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
|
||||
super(DensePoseResultsVVisualizer, self).__init__(
|
||||
_extract_v_from_iuvarr, _extract_i_from_iuvarr, inplace, cmap, alpha, val_scale=1.0
|
||||
)
|
||||
|
||||
|
||||
class DensePoseOutputsFineSegmentationVisualizer(object):
|
||||
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
|
||||
self.mask_visualizer = MatrixVisualizer(
|
||||
inplace=inplace,
|
||||
cmap=cmap,
|
||||
val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
|
||||
alpha=alpha,
|
||||
)
|
||||
|
||||
def visualize(
|
||||
self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
|
||||
) -> Image:
|
||||
if dp_output_with_bboxes is None:
|
||||
return image_bgr
|
||||
densepose_output, bboxes_xywh = dp_output_with_bboxes
|
||||
S = densepose_output.S
|
||||
I = densepose_output.I # noqa
|
||||
U = densepose_output.U
|
||||
V = densepose_output.V
|
||||
N = S.size(0)
|
||||
assert N == I.size(
|
||||
0
|
||||
), "densepose outputs S {} and I {}" " should have equal first dim size".format(
|
||||
S.size(), I.size()
|
||||
)
|
||||
assert N == U.size(
|
||||
0
|
||||
), "densepose outputs S {} and U {}" " should have equal first dim size".format(
|
||||
S.size(), U.size()
|
||||
)
|
||||
assert N == V.size(
|
||||
0
|
||||
), "densepose outputs S {} and V {}" " should have equal first dim size".format(
|
||||
S.size(), V.size()
|
||||
)
|
||||
assert N == len(
|
||||
bboxes_xywh
|
||||
), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
|
||||
len(bboxes_xywh), N
|
||||
)
|
||||
for n in range(N):
|
||||
Sn = S[n].argmax(dim=0)
|
||||
In = I[n].argmax(dim=0) * (Sn > 0).long()
|
||||
matrix = In.cpu().numpy().astype(np.uint8)
|
||||
mask = np.zeros(matrix.shape, dtype=np.uint8)
|
||||
mask[matrix > 0] = 1
|
||||
bbox_xywh = bboxes_xywh[n]
|
||||
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
|
||||
return image_bgr
|
||||
|
||||
|
||||
class DensePoseOutputsUVisualizer(object):
|
||||
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
|
||||
self.mask_visualizer = MatrixVisualizer(
|
||||
inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha
|
||||
)
|
||||
|
||||
def visualize(
|
||||
self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
|
||||
) -> Image:
|
||||
if dp_output_with_bboxes is None:
|
||||
return image_bgr
|
||||
densepose_output, bboxes_xywh = dp_output_with_bboxes
|
||||
assert isinstance(
|
||||
densepose_output, DensePoseOutput
|
||||
), "DensePoseOutput expected, {} encountered".format(type(densepose_output))
|
||||
S = densepose_output.S
|
||||
I = densepose_output.I # noqa
|
||||
U = densepose_output.U
|
||||
V = densepose_output.V
|
||||
N = S.size(0)
|
||||
assert N == I.size(
|
||||
0
|
||||
), "densepose outputs S {} and I {}" " should have equal first dim size".format(
|
||||
S.size(), I.size()
|
||||
)
|
||||
assert N == U.size(
|
||||
0
|
||||
), "densepose outputs S {} and U {}" " should have equal first dim size".format(
|
||||
S.size(), U.size()
|
||||
)
|
||||
assert N == V.size(
|
||||
0
|
||||
), "densepose outputs S {} and V {}" " should have equal first dim size".format(
|
||||
S.size(), V.size()
|
||||
)
|
||||
assert N == len(
|
||||
bboxes_xywh
|
||||
), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
|
||||
len(bboxes_xywh), N
|
||||
)
|
||||
for n in range(N):
|
||||
Sn = S[n].argmax(dim=0)
|
||||
In = I[n].argmax(dim=0) * (Sn > 0).long()
|
||||
segmentation = In.cpu().numpy().astype(np.uint8)
|
||||
mask = np.zeros(segmentation.shape, dtype=np.uint8)
|
||||
mask[segmentation > 0] = 1
|
||||
Un = U[n].cpu().numpy().astype(np.float32)
|
||||
Uvis = np.zeros(segmentation.shape, dtype=np.float32)
|
||||
for partId in range(Un.shape[0]):
|
||||
Uvis[segmentation == partId] = Un[partId][segmentation == partId].clip(0, 1) * 255
|
||||
bbox_xywh = bboxes_xywh[n]
|
||||
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, Uvis, bbox_xywh)
|
||||
return image_bgr
|
||||
|
||||
|
||||
class DensePoseOutputsVVisualizer(object):
|
||||
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
|
||||
self.mask_visualizer = MatrixVisualizer(
|
||||
inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha
|
||||
)
|
||||
|
||||
def visualize(
|
||||
self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
|
||||
) -> Image:
|
||||
if dp_output_with_bboxes is None:
|
||||
return image_bgr
|
||||
densepose_output, bboxes_xywh = dp_output_with_bboxes
|
||||
assert isinstance(
|
||||
densepose_output, DensePoseOutput
|
||||
), "DensePoseOutput expected, {} encountered".format(type(densepose_output))
|
||||
S = densepose_output.S
|
||||
I = densepose_output.I # noqa
|
||||
U = densepose_output.U
|
||||
V = densepose_output.V
|
||||
N = S.size(0)
|
||||
assert N == I.size(
|
||||
0
|
||||
), "densepose outputs S {} and I {}" " should have equal first dim size".format(
|
||||
S.size(), I.size()
|
||||
)
|
||||
assert N == U.size(
|
||||
0
|
||||
), "densepose outputs S {} and U {}" " should have equal first dim size".format(
|
||||
S.size(), U.size()
|
||||
)
|
||||
assert N == V.size(
|
||||
0
|
||||
), "densepose outputs S {} and V {}" " should have equal first dim size".format(
|
||||
S.size(), V.size()
|
||||
)
|
||||
assert N == len(
|
||||
bboxes_xywh
|
||||
), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
|
||||
len(bboxes_xywh), N
|
||||
)
|
||||
for n in range(N):
|
||||
Sn = S[n].argmax(dim=0)
|
||||
In = I[n].argmax(dim=0) * (Sn > 0).long()
|
||||
segmentation = In.cpu().numpy().astype(np.uint8)
|
||||
mask = np.zeros(segmentation.shape, dtype=np.uint8)
|
||||
mask[segmentation > 0] = 1
|
||||
Vn = V[n].cpu().numpy().astype(np.float32)
|
||||
Vvis = np.zeros(segmentation.shape, dtype=np.float32)
|
||||
for partId in range(Vn.size(0)):
|
||||
Vvis[segmentation == partId] = Vn[partId][segmentation == partId].clip(0, 1) * 255
|
||||
bbox_xywh = bboxes_xywh[n]
|
||||
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, Vvis, bbox_xywh)
|
||||
return image_bgr
|
||||
|
||||
|
||||
class DensePoseDataCoarseSegmentationVisualizer(object):
|
||||
"""
|
||||
Visualizer for ground truth segmentation
|
||||
"""
|
||||
|
||||
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
|
||||
self.mask_visualizer = MatrixVisualizer(
|
||||
inplace=inplace,
|
||||
cmap=cmap,
|
||||
val_scale=255.0 / DensePoseDataRelative.N_BODY_PARTS,
|
||||
alpha=alpha,
|
||||
)
|
||||
|
||||
def visualize(
|
||||
self,
|
||||
image_bgr: Image,
|
||||
bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
|
||||
) -> Image:
|
||||
if bbox_densepose_datas is None:
|
||||
return image_bgr
|
||||
for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
|
||||
matrix = densepose_data.segm.numpy()
|
||||
mask = np.zeros(matrix.shape, dtype=np.uint8)
|
||||
mask[matrix > 0] = 1
|
||||
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh.numpy())
|
||||
return image_bgr
|
||||
|
||||
|
||||
class DensePoseDataPointsVisualizer(object):
|
||||
def __init__(self, densepose_data_to_value_fn=None, cmap=cv2.COLORMAP_PARULA):
|
||||
self.points_visualizer = PointsVisualizer()
|
||||
self.densepose_data_to_value_fn = densepose_data_to_value_fn
|
||||
self.cmap = cmap
|
||||
|
||||
def visualize(
|
||||
self,
|
||||
image_bgr: Image,
|
||||
bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
|
||||
) -> Image:
|
||||
if bbox_densepose_datas is None:
|
||||
return image_bgr
|
||||
for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
|
||||
x0, y0, w, h = bbox_xywh.numpy()
|
||||
x = densepose_data.x.numpy() * w / 255.0 + x0
|
||||
y = densepose_data.y.numpy() * h / 255.0 + y0
|
||||
pts_xy = zip(x, y)
|
||||
if self.densepose_data_to_value_fn is None:
|
||||
image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy)
|
||||
else:
|
||||
v = self.densepose_data_to_value_fn(densepose_data)
|
||||
img_colors_bgr = cv2.applyColorMap(v, self.cmap)
|
||||
colors_bgr = [
|
||||
[int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
|
||||
]
|
||||
image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy, colors_bgr)
|
||||
return image_bgr
|
||||
|
||||
|
||||
def _densepose_data_u_for_cmap(densepose_data):
|
||||
u = np.clip(densepose_data.u.numpy(), 0, 1) * 255.0
|
||||
return u.astype(np.uint8)
|
||||
|
||||
|
||||
def _densepose_data_v_for_cmap(densepose_data):
|
||||
v = np.clip(densepose_data.v.numpy(), 0, 1) * 255.0
|
||||
return v.astype(np.uint8)
|
||||
|
||||
|
||||
def _densepose_data_i_for_cmap(densepose_data):
|
||||
i = (
|
||||
np.clip(densepose_data.i.numpy(), 0.0, DensePoseDataRelative.N_PART_LABELS)
|
||||
* 255.0
|
||||
/ DensePoseDataRelative.N_PART_LABELS
|
||||
)
|
||||
return i.astype(np.uint8)
|
||||
|
||||
|
||||
class DensePoseDataPointsUVisualizer(DensePoseDataPointsVisualizer):
|
||||
def __init__(self):
|
||||
super(DensePoseDataPointsUVisualizer, self).__init__(
|
||||
densepose_data_to_value_fn=_densepose_data_u_for_cmap
|
||||
)
|
||||
|
||||
|
||||
class DensePoseDataPointsVVisualizer(DensePoseDataPointsVisualizer):
|
||||
def __init__(self):
|
||||
super(DensePoseDataPointsVVisualizer, self).__init__(
|
||||
densepose_data_to_value_fn=_densepose_data_v_for_cmap
|
||||
)
|
||||
|
||||
|
||||
class DensePoseDataPointsIVisualizer(DensePoseDataPointsVisualizer):
|
||||
def __init__(self):
|
||||
super(DensePoseDataPointsIVisualizer, self).__init__(
|
||||
densepose_data_to_value_fn=_densepose_data_i_for_cmap
|
||||
)
|
@@ -0,0 +1,152 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import logging
|
||||
from typing import Sequence
|
||||
import torch
|
||||
|
||||
from detectron2.layers.nms import batched_nms
|
||||
from detectron2.structures.instances import Instances
|
||||
|
||||
from densepose.vis.bounding_box import BoundingBoxVisualizer, ScoredBoundingBoxVisualizer
|
||||
from densepose.vis.densepose import DensePoseResultsVisualizer
|
||||
|
||||
from .base import CompoundVisualizer
|
||||
|
||||
Scores = Sequence[float]
|
||||
|
||||
|
||||
def extract_scores_from_instances(instances: Instances, select=None):
|
||||
if instances.has("scores"):
|
||||
return instances.scores if select is None else instances.scores[select]
|
||||
return None
|
||||
|
||||
|
||||
def extract_boxes_xywh_from_instances(instances: Instances, select=None):
|
||||
if instances.has("pred_boxes"):
|
||||
boxes_xywh = instances.pred_boxes.tensor.clone()
|
||||
boxes_xywh[:, 2] -= boxes_xywh[:, 0]
|
||||
boxes_xywh[:, 3] -= boxes_xywh[:, 1]
|
||||
return boxes_xywh if select is None else boxes_xywh[select]
|
||||
return None
|
||||
|
||||
|
||||
def create_extractor(visualizer: object):
|
||||
"""
|
||||
Create an extractor for the provided visualizer
|
||||
"""
|
||||
if isinstance(visualizer, CompoundVisualizer):
|
||||
extractors = [create_extractor(v) for v in visualizer.visualizers]
|
||||
return CompoundExtractor(extractors)
|
||||
elif isinstance(visualizer, DensePoseResultsVisualizer):
|
||||
return DensePoseResultExtractor()
|
||||
elif isinstance(visualizer, ScoredBoundingBoxVisualizer):
|
||||
return CompoundExtractor([extract_boxes_xywh_from_instances, extract_scores_from_instances])
|
||||
elif isinstance(visualizer, BoundingBoxVisualizer):
|
||||
return extract_boxes_xywh_from_instances
|
||||
else:
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.error(f"Could not create extractor for {visualizer}")
|
||||
return None
|
||||
|
||||
|
||||
class BoundingBoxExtractor(object):
|
||||
"""
|
||||
Extracts bounding boxes from instances
|
||||
"""
|
||||
|
||||
def __call__(self, instances: Instances):
|
||||
boxes_xywh = extract_boxes_xywh_from_instances(instances)
|
||||
return boxes_xywh
|
||||
|
||||
|
||||
class ScoredBoundingBoxExtractor(object):
|
||||
"""
|
||||
Extracts bounding boxes from instances
|
||||
"""
|
||||
|
||||
def __call__(self, instances: Instances, select=None):
|
||||
scores = extract_scores_from_instances(instances)
|
||||
boxes_xywh = extract_boxes_xywh_from_instances(instances)
|
||||
if (scores is None) or (boxes_xywh is None):
|
||||
return (boxes_xywh, scores)
|
||||
if select is not None:
|
||||
scores = scores[select]
|
||||
boxes_xywh = boxes_xywh[select]
|
||||
return (boxes_xywh, scores)
|
||||
|
||||
|
||||
class DensePoseResultExtractor(object):
|
||||
"""
|
||||
Extracts DensePose result from instances
|
||||
"""
|
||||
|
||||
def __call__(self, instances: Instances, select=None):
|
||||
boxes_xywh = extract_boxes_xywh_from_instances(instances)
|
||||
if instances.has("pred_densepose") and (boxes_xywh is not None):
|
||||
dpout = instances.pred_densepose
|
||||
if select is not None:
|
||||
dpout = dpout[select]
|
||||
boxes_xywh = boxes_xywh[select]
|
||||
return dpout.to_result(boxes_xywh)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class CompoundExtractor(object):
|
||||
"""
|
||||
Extracts data for CompoundVisualizer
|
||||
"""
|
||||
|
||||
def __init__(self, extractors):
|
||||
self.extractors = extractors
|
||||
|
||||
def __call__(self, instances: Instances, select=None):
|
||||
datas = []
|
||||
for extractor in self.extractors:
|
||||
data = extractor(instances, select)
|
||||
datas.append(data)
|
||||
return datas
|
||||
|
||||
|
||||
class NmsFilteredExtractor(object):
|
||||
"""
|
||||
Extracts data in the format accepted by NmsFilteredVisualizer
|
||||
"""
|
||||
|
||||
def __init__(self, extractor, iou_threshold):
|
||||
self.extractor = extractor
|
||||
self.iou_threshold = iou_threshold
|
||||
|
||||
def __call__(self, instances: Instances, select=None):
|
||||
scores = extract_scores_from_instances(instances)
|
||||
boxes_xywh = extract_boxes_xywh_from_instances(instances)
|
||||
if boxes_xywh is None:
|
||||
return None
|
||||
select_local_idx = batched_nms(
|
||||
boxes_xywh,
|
||||
scores,
|
||||
torch.zeros(len(scores), dtype=torch.int32),
|
||||
iou_threshold=self.iou_threshold,
|
||||
).squeeze()
|
||||
select_local = torch.zeros(len(boxes_xywh), dtype=torch.bool, device=boxes_xywh.device)
|
||||
select_local[select_local_idx] = True
|
||||
select = select_local if select is None else (select & select_local)
|
||||
return self.extractor(instances, select=select)
|
||||
|
||||
|
||||
class ScoreThresholdedExtractor(object):
|
||||
"""
|
||||
Extracts data in the format accepted by ScoreThresholdedVisualizer
|
||||
"""
|
||||
|
||||
def __init__(self, extractor, min_score):
|
||||
self.extractor = extractor
|
||||
self.min_score = min_score
|
||||
|
||||
def __call__(self, instances: Instances, select=None):
|
||||
scores = extract_scores_from_instances(instances)
|
||||
if scores is None:
|
||||
return None
|
||||
select_local = scores > self.min_score
|
||||
select = select_local if select is None else (select & select_local)
|
||||
data = self.extractor(instances, select=select)
|
||||
return data
|
Reference in New Issue
Block a user