Add at new repo again

This commit is contained in:
2025-01-28 21:48:35 +00:00
commit 6e660ddb3c
564 changed files with 75575 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .data.datasets import builtin # just to register data
from .config import add_densepose_config, add_dataset_category_config
from .densepose_head import ROI_DENSEPOSE_HEAD_REGISTRY
from .evaluator import DensePoseCOCOEvaluator
from .roi_head import DensePoseROIHeads
from .data.structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
from .modeling.test_time_augmentation import DensePoseGeneralizedRCNNWithTTA
from .utils.transform import load_from_cfg

View File

@@ -0,0 +1,68 @@
# -*- coding = utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from detectron2.config import CfgNode as CN
def add_dataset_category_config(cfg: CN):
"""
Add config for additional category-related dataset options
- category whitelisting
- category mapping
"""
_C = cfg
_C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True)
_C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True)
def add_densepose_config(cfg: CN):
"""
Add config for densepose head.
"""
_C = cfg
_C.MODEL.DENSEPOSE_ON = True
_C.MODEL.ROI_DENSEPOSE_HEAD = CN()
_C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
# Number of parts used for point labels
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
_C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
_C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
_C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
_C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
_C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2 # 15 or 2
# Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
_C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
# Loss weights for annotation masks.(14 Parts)
_C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0
# Loss weights for surface parts. (24 Parts)
_C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0
# Loss weights for UV regression.
_C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01
# For Decoder
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = ""
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4
# For DeepLab head
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN()
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN"
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0
# Confidences
# Enable learning confidences (variances) along with the actual values
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False})
# UV confidence lower bound
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01
# Statistical model type for confidence learning, possible values:
# - "iid_iso": statistically independent identically distributed residuals
# with isotropic covariance
# - "indep_aniso": statistically independent residuals with anisotropic
# covariances
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso"

View File

@@ -0,0 +1,9 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .build import build_detection_test_loader, build_detection_train_loader
from .dataset_mapper import DatasetMapper
# ensure the builtin data are registered
from . import datasets
__all__ = [k for k in globals().keys() if not k.startswith("_")]

View File

@@ -0,0 +1,405 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import itertools
import logging
import numpy as np
import operator
from typing import Any, Callable, Collection, Dict, Iterable, List, Optional
import torch
from detectron2.config import CfgNode
from detectron2.data import samplers
from detectron2.data.build import (
load_proposals_into_dataset,
print_instances_class_histogram,
trivial_batch_collator,
worker_init_reset_seed,
)
from detectron2.data.catalog import DatasetCatalog, MetadataCatalog
from detectron2.data.common import AspectRatioGroupedDataset, DatasetFromList, MapDataset
from detectron2.utils.comm import get_world_size
from .dataset_mapper import DatasetMapper
from .datasets.coco import DENSEPOSE_KEYS_WITHOUT_MASK as DENSEPOSE_COCO_KEYS_WITHOUT_MASK
from .datasets.coco import DENSEPOSE_MASK_KEY as DENSEPOSE_COCO_MASK_KEY
__all__ = ["build_detection_train_loader", "build_detection_test_loader"]
Instance = Dict[str, Any]
InstancePredicate = Callable[[Instance], bool]
def _compute_num_images_per_worker(cfg: CfgNode):
num_workers = get_world_size()
images_per_batch = cfg.SOLVER.IMS_PER_BATCH
assert (
images_per_batch % num_workers == 0
), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
images_per_batch, num_workers
)
assert (
images_per_batch >= num_workers
), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
images_per_batch, num_workers
)
images_per_worker = images_per_batch // num_workers
return images_per_worker
def _map_category_id_to_contiguous_id(dataset_name: str, dataset_dicts: Iterable[Instance]):
meta = MetadataCatalog.get(dataset_name)
for dataset_dict in dataset_dicts:
for ann in dataset_dict["annotations"]:
ann["category_id"] = meta.thing_dataset_id_to_contiguous_id[ann["category_id"]]
def _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names: Iterable[str]):
# merge categories for all data
merged_categories = {}
for dataset_name in dataset_names:
meta = MetadataCatalog.get(dataset_name)
for cat_id, cat_name in meta.categories.items():
if cat_id not in merged_categories:
merged_categories[cat_id] = (cat_name, dataset_name)
continue
cat_name_other, dataset_name_other = merged_categories[cat_id]
if cat_name_other != cat_name:
raise ValueError(
f"Incompatible categories for category ID {cat_id}: "
f'dataset {dataset_name} value "{cat_name}", '
f'dataset {dataset_name_other} value "{cat_name_other}"'
)
merged_cat_id_to_cont_id = {}
for i, cat_id in enumerate(sorted(merged_categories.keys())):
merged_cat_id_to_cont_id[cat_id] = i
# add category maps to metadata
for dataset_name in dataset_names:
meta = MetadataCatalog.get(dataset_name)
categories = meta.get("categories")
meta.thing_classes = [categories[cat_id] for cat_id in sorted(categories.keys())]
meta.thing_dataset_id_to_contiguous_id = {
cat_id: merged_cat_id_to_cont_id[cat_id] for cat_id in sorted(categories.keys())
}
meta.thing_contiguous_id_to_dataset_id = {
merged_cat_id_to_cont_id[cat_id]: cat_id for cat_id in sorted(categories.keys())
}
def _maybe_create_general_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
def has_annotations(instance: Instance) -> bool:
return "annotations" in instance
def has_only_crowd_anotations(instance: Instance) -> bool:
for ann in instance["annotations"]:
if ann.get("is_crowd", 0) == 0:
return False
return True
def general_keep_instance_predicate(instance: Instance) -> bool:
return has_annotations(instance) and not has_only_crowd_anotations(instance)
if not cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS:
return None
return general_keep_instance_predicate
def _maybe_create_keypoints_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
min_num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
def has_sufficient_num_keypoints(instance: Instance) -> bool:
num_kpts = sum(
(np.array(ann["keypoints"][2::3]) > 0).sum()
for ann in instance["annotations"]
if "keypoints" in ann
)
return num_kpts >= min_num_keypoints
if cfg.MODEL.KEYPOINT_ON and (min_num_keypoints > 0):
return has_sufficient_num_keypoints
return None
def _maybe_create_mask_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
if not cfg.MODEL.MASK_ON:
return None
def has_mask_annotations(instance: Instance) -> bool:
return any("segmentation" in ann for ann in instance["annotations"])
return has_mask_annotations
def _maybe_create_densepose_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
if not cfg.MODEL.DENSEPOSE_ON:
return None
def has_densepose_annotations(instance: Instance) -> bool:
for ann in instance["annotations"]:
if all(key in ann for key in DENSEPOSE_COCO_KEYS_WITHOUT_MASK) and (
(DENSEPOSE_COCO_MASK_KEY in ann) or ("segmentation" in ann)
):
return True
return False
return has_densepose_annotations
def _maybe_create_specific_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
specific_predicate_creators = [
_maybe_create_keypoints_keep_instance_predicate,
_maybe_create_mask_keep_instance_predicate,
_maybe_create_densepose_keep_instance_predicate,
]
predicates = [creator(cfg) for creator in specific_predicate_creators]
predicates = [p for p in predicates if p is not None]
if not predicates:
return None
def combined_predicate(instance: Instance) -> bool:
return any(p(instance) for p in predicates)
return combined_predicate
def _get_train_keep_instance_predicate(cfg: CfgNode):
general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
combined_specific_keep_predicate = _maybe_create_specific_keep_instance_predicate(cfg)
def combined_general_specific_keep_predicate(instance: Instance) -> bool:
return general_keep_predicate(instance) and combined_specific_keep_predicate(instance)
if (general_keep_predicate is None) and (combined_specific_keep_predicate is None):
return None
if general_keep_predicate is None:
return combined_specific_keep_predicate
if combined_specific_keep_predicate is None:
return general_keep_predicate
return combined_general_specific_keep_predicate
def _get_test_keep_instance_predicate(cfg: CfgNode):
general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
return general_keep_predicate
def _maybe_filter_and_map_categories(
dataset_name: str, dataset_dicts: List[Instance]
) -> List[Instance]:
meta = MetadataCatalog.get(dataset_name)
whitelisted_categories = meta.get("whitelisted_categories")
category_map = meta.get("category_map", {})
if whitelisted_categories is None and not category_map:
return dataset_dicts
filtered_dataset_dicts = []
for dataset_dict in dataset_dicts:
anns = []
for ann in dataset_dict["annotations"]:
cat_id = ann["category_id"]
if whitelisted_categories is not None and cat_id not in whitelisted_categories:
continue
ann["category_id"] = category_map.get(cat_id, cat_id)
anns.append(ann)
dataset_dict["annotations"] = anns
filtered_dataset_dicts.append(dataset_dict)
return filtered_dataset_dicts
def _add_category_whitelists_to_metadata(cfg: CfgNode):
for dataset_name, whitelisted_cat_ids in cfg.DATASETS.WHITELISTED_CATEGORIES.items():
meta = MetadataCatalog.get(dataset_name)
meta.whitelisted_categories = whitelisted_cat_ids
logger = logging.getLogger(__name__)
logger.info(
"Whitelisted categories for dataset {}: {}".format(
dataset_name, meta.whitelisted_categories
)
)
def _add_category_maps_to_metadata(cfg: CfgNode):
for dataset_name, category_map in cfg.DATASETS.CATEGORY_MAPS.items():
category_map = {
int(cat_id_src): int(cat_id_dst) for cat_id_src, cat_id_dst in category_map.items()
}
meta = MetadataCatalog.get(dataset_name)
meta.category_map = category_map
logger = logging.getLogger(__name__)
logger.info("Category maps for dataset {}: {}".format(dataset_name, meta.category_map))
def combine_detection_dataset_dicts(
dataset_names: Collection[str],
keep_instance_predicate: Optional[InstancePredicate] = None,
proposal_files: Optional[Collection[str]] = None,
) -> List[Instance]:
"""
Load and prepare dataset dicts for training / testing
Args:
dataset_names (Collection[str]): a list of dataset names
keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate
applied to instance dicts which defines whether to keep the instance
proposal_files (Collection[str]): if given, a list of object proposal files
that match each dataset in `dataset_names`.
"""
assert len(dataset_names)
if proposal_files is None:
proposal_files = [None] * len(dataset_names)
assert len(dataset_names) == len(proposal_files)
# load annotations and dataset metadata
dataset_map = {}
for dataset_name in dataset_names:
dataset_dicts = DatasetCatalog.get(dataset_name)
dataset_map[dataset_name] = dataset_dicts
# initialize category maps
_add_category_id_to_contiguous_id_maps_to_metadata(dataset_names)
# apply category maps
all_datasets_dicts = []
for dataset_name, proposal_file in zip(dataset_names, proposal_files):
dataset_dicts = dataset_map[dataset_name]
assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!"
if proposal_file is not None:
dataset_dicts = load_proposals_into_dataset(dataset_dicts, proposal_file)
dataset_dicts = _maybe_filter_and_map_categories(dataset_name, dataset_dicts)
_map_category_id_to_contiguous_id(dataset_name, dataset_dicts)
print_instances_class_histogram(
dataset_dicts, MetadataCatalog.get(dataset_name).thing_classes
)
all_datasets_dicts.append(dataset_dicts)
if keep_instance_predicate is not None:
all_datasets_dicts_plain = [
d
for d in itertools.chain.from_iterable(all_datasets_dicts)
if keep_instance_predicate(d)
]
else:
all_datasets_dicts_plain = list(itertools.chain.from_iterable(all_datasets_dicts))
return all_datasets_dicts_plain
def build_detection_train_loader(cfg: CfgNode, mapper=None):
"""
A data loader is created in a way similar to that of Detectron2.
The main differences are:
- it allows to combine data with different but compatible object category sets
The data loader is created by the following steps:
1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts.
2. Start workers to work on the dicts. Each worker will:
* Map each metadata dict into another format to be consumed by the model.
* Batch them by simply putting dicts into a list.
The batched ``list[mapped_dict]`` is what this dataloader will return.
Args:
cfg (CfgNode): the config
mapper (callable): a callable which takes a sample (dict) from dataset and
returns the format to be consumed by the model.
By default it will be `DatasetMapper(cfg, True)`.
Returns:
an infinite iterator of training data
"""
images_per_worker = _compute_num_images_per_worker(cfg)
_add_category_whitelists_to_metadata(cfg)
_add_category_maps_to_metadata(cfg)
dataset_dicts = combine_detection_dataset_dicts(
cfg.DATASETS.TRAIN,
keep_instance_predicate=_get_train_keep_instance_predicate(cfg),
proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
)
dataset = DatasetFromList(dataset_dicts, copy=False)
if mapper is None:
mapper = DatasetMapper(cfg, True)
dataset = MapDataset(dataset, mapper)
sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
logger = logging.getLogger(__name__)
logger.info("Using training sampler {}".format(sampler_name))
if sampler_name == "TrainingSampler":
sampler = samplers.TrainingSampler(len(dataset))
elif sampler_name == "RepeatFactorTrainingSampler":
sampler = samplers.RepeatFactorTrainingSampler(
dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD
)
else:
raise ValueError("Unknown training sampler: {}".format(sampler_name))
if cfg.DATALOADER.ASPECT_RATIO_GROUPING:
data_loader = torch.utils.data.DataLoader(
dataset,
sampler=sampler,
num_workers=cfg.DATALOADER.NUM_WORKERS,
batch_sampler=None,
collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements
worker_init_fn=worker_init_reset_seed,
) # yield individual mapped dict
data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker)
else:
batch_sampler = torch.utils.data.sampler.BatchSampler(
sampler, images_per_worker, drop_last=True
)
# drop_last so the batch always have the same size
data_loader = torch.utils.data.DataLoader(
dataset,
num_workers=cfg.DATALOADER.NUM_WORKERS,
batch_sampler=batch_sampler,
collate_fn=trivial_batch_collator,
worker_init_fn=worker_init_reset_seed,
)
return data_loader
def build_detection_test_loader(cfg, dataset_name, mapper=None):
"""
Similar to `build_detection_train_loader`.
But this function uses the given `dataset_name` argument (instead of the names in cfg),
and uses batch size 1.
Args:
cfg: a detectron2 CfgNode
dataset_name (str): a name of the dataset that's available in the DatasetCatalog
mapper (callable): a callable which takes a sample (dict) from dataset
and returns the format to be consumed by the model.
By default it will be `DatasetMapper(cfg, False)`.
Returns:
DataLoader: a torch DataLoader, that loads the given detection
dataset, with test-time transformation and batching.
"""
_add_category_whitelists_to_metadata(cfg)
_add_category_maps_to_metadata(cfg)
dataset_dicts = combine_detection_dataset_dicts(
[dataset_name],
keep_instance_predicate=_get_test_keep_instance_predicate(cfg),
proposal_files=[
cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]
]
if cfg.MODEL.LOAD_PROPOSALS
else None,
)
dataset = DatasetFromList(dataset_dicts)
if mapper is None:
mapper = DatasetMapper(cfg, False)
dataset = MapDataset(dataset, mapper)
sampler = samplers.InferenceSampler(len(dataset))
# Always use 1 image per worker during inference since this is the
# standard when reporting inference time in papers.
batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)
data_loader = torch.utils.data.DataLoader(
dataset,
num_workers=cfg.DATALOADER.NUM_WORKERS,
batch_sampler=batch_sampler,
collate_fn=trivial_batch_collator,
)
return data_loader

View File

@@ -0,0 +1,118 @@
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import copy
import torch
from fvcore.common.file_io import PathManager
from detectron2.data import MetadataCatalog
from detectron2.data import detection_utils as utils
from detectron2.data import transforms as T
from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
class DatasetMapper:
"""
A customized version of `detectron2.data.DatasetMapper`
"""
def __init__(self, cfg, is_train=True):
self.tfm_gens = utils.build_transform_gen(cfg, is_train)
# fmt: off
self.img_format = cfg.INPUT.FORMAT
self.mask_on = cfg.MODEL.MASK_ON
self.keypoint_on = cfg.MODEL.KEYPOINT_ON
self.densepose_on = cfg.MODEL.DENSEPOSE_ON
assert not cfg.MODEL.LOAD_PROPOSALS, "not supported yet"
# fmt: on
if self.keypoint_on and is_train:
# Flip only makes sense in training
self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
else:
self.keypoint_hflip_indices = None
if self.densepose_on:
densepose_transform_srcs = [
MetadataCatalog.get(ds).densepose_transform_src
for ds in cfg.DATASETS.TRAIN + cfg.DATASETS.TEST
]
assert len(densepose_transform_srcs) > 0
# TODO: check that DensePose transformation data is the same for
# all the data. Otherwise one would have to pass DB ID with
# each entry to select proper transformation data. For now, since
# all DensePose annotated data uses the same data semantics, we
# omit this check.
densepose_transform_data_fpath = PathManager.get_local_path(densepose_transform_srcs[0])
self.densepose_transform_data = DensePoseTransformData.load(
densepose_transform_data_fpath
)
self.is_train = is_train
def __call__(self, dataset_dict):
"""
Args:
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
Returns:
dict: a format that builtin models in detectron2 accept
"""
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
utils.check_image_size(dataset_dict, image)
image, transforms = T.apply_transform_gens(self.tfm_gens, image)
image_shape = image.shape[:2] # h, w
dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
if not self.is_train:
dataset_dict.pop("annotations", None)
return dataset_dict
for anno in dataset_dict["annotations"]:
if not self.mask_on:
anno.pop("segmentation", None)
if not self.keypoint_on:
anno.pop("keypoints", None)
# USER: Implement additional transformations if you have other types of data
# USER: Don't call transpose_densepose if you don't need
annos = [
self._transform_densepose(
utils.transform_instance_annotations(
obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
),
transforms,
)
for obj in dataset_dict.pop("annotations")
if obj.get("iscrowd", 0) == 0
]
instances = utils.annotations_to_instances(annos, image_shape)
if len(annos) and "densepose" in annos[0]:
gt_densepose = [obj["densepose"] for obj in annos]
instances.gt_densepose = DensePoseList(gt_densepose, instances.gt_boxes, image_shape)
dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()]
return dataset_dict
def _transform_densepose(self, annotation, transforms):
if not self.densepose_on:
return annotation
# Handle densepose annotations
is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
if is_valid:
densepose_data = DensePoseDataRelative(annotation, cleanup=True)
densepose_data.apply_transform(transforms, self.densepose_transform_data)
annotation["densepose"] = densepose_data
else:
# logger = logging.getLogger(__name__)
# logger.debug("Could not load DensePose annotation: {}".format(reason_not_valid))
DensePoseDataRelative.cleanup_annotation(annotation)
# NOTE: annotations for certain instances may be unavailable.
# 'None' is accepted by the DensePostList data structure.
annotation["densepose"] = None
return annotation

View File

@@ -0,0 +1,5 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from . import builtin # ensure the builtin data are registered
__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")]

View File

@@ -0,0 +1,10 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .coco import BASE_DATASETS as BASE_COCO_DATASETS
from .coco import DATASETS as COCO_DATASETS
from .coco import register_datasets as register_coco_datasets
DEFAULT_DATASETS_ROOT = "data"
register_coco_datasets(COCO_DATASETS, DEFAULT_DATASETS_ROOT)
register_coco_datasets(BASE_COCO_DATASETS, DEFAULT_DATASETS_ROOT)

View File

@@ -0,0 +1,314 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import contextlib
import io
import logging
import os
from dataclasses import dataclass
from typing import Any, Dict, Iterable, List, Optional
from fvcore.common.file_io import PathManager
from fvcore.common.timer import Timer
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
DENSEPOSE_MASK_KEY = "dp_masks"
DENSEPOSE_KEYS_WITHOUT_MASK = ["dp_x", "dp_y", "dp_I", "dp_U", "dp_V"]
DENSEPOSE_KEYS = DENSEPOSE_KEYS_WITHOUT_MASK + [DENSEPOSE_MASK_KEY]
DENSEPOSE_METADATA_URL_PREFIX = "https://dl.fbaipublicfiles.com/densepose/data/"
@dataclass
class CocoDatasetInfo:
name: str
images_root: str
annotations_fpath: str
DATASETS = [
CocoDatasetInfo(
name="densepose_coco_2014_train",
images_root="coco/train2014",
annotations_fpath="coco/annotations/densepose_train2014.json",
),
CocoDatasetInfo(
name="densepose_coco_2014_minival",
images_root="coco/val2014",
annotations_fpath="coco/annotations/densepose_minival2014.json",
),
CocoDatasetInfo(
name="densepose_coco_2014_minival_100",
images_root="coco/val2014",
annotations_fpath="coco/annotations/densepose_minival2014_100.json",
),
CocoDatasetInfo(
name="densepose_coco_2014_valminusminival",
images_root="coco/val2014",
annotations_fpath="coco/annotations/densepose_valminusminival2014.json",
),
CocoDatasetInfo(
name="densepose_chimps",
images_root="densepose_evolution/densepose_chimps",
annotations_fpath="densepose_evolution/annotations/densepose_chimps_densepose.json",
),
]
BASE_DATASETS = [
CocoDatasetInfo(
name="base_coco_2017_train",
images_root="coco/train2017",
annotations_fpath="coco/annotations/instances_train2017.json",
),
CocoDatasetInfo(
name="base_coco_2017_val",
images_root="coco/val2017",
annotations_fpath="coco/annotations/instances_val2017.json",
),
CocoDatasetInfo(
name="base_coco_2017_val_100",
images_root="coco/val2017",
annotations_fpath="coco/annotations/instances_val2017_100.json",
),
]
def _is_relative_local_path(path: os.PathLike):
path_str = os.fsdecode(path)
return ("://" not in path_str) and not os.path.isabs(path)
def _maybe_prepend_base_path(base_path: Optional[os.PathLike], path: os.PathLike):
"""
Prepends the provided path with a base path prefix if:
1) base path is not None;
2) path is a local path
"""
if base_path is None:
return path
if _is_relative_local_path(path):
return os.path.join(base_path, path)
return path
def get_metadata(base_path: Optional[os.PathLike]) -> Dict[str, Any]:
"""
Returns metadata associated with COCO DensePose data
Args:
base_path: Optional[os.PathLike]
Base path used to load metadata from
Returns:
Dict[str, Any]
Metadata in the form of a dictionary
"""
meta = {
"densepose_transform_src": _maybe_prepend_base_path(
base_path, "UV_symmetry_transforms.mat"
),
"densepose_smpl_subdiv": _maybe_prepend_base_path(base_path, "SMPL_subdiv.mat"),
"densepose_smpl_subdiv_transform": _maybe_prepend_base_path(
base_path, "SMPL_SUBDIV_TRANSFORM.mat"
),
}
return meta
def _load_coco_annotations(json_file: str):
"""
Load COCO annotations from a JSON file
Args:
json_file: str
Path to the file to load annotations from
Returns:
Instance of `pycocotools.coco.COCO` that provides access to annotations
data
"""
from pycocotools.coco import COCO
logger = logging.getLogger(__name__)
timer = Timer()
with contextlib.redirect_stdout(io.StringIO()):
coco_api = COCO(json_file)
if timer.seconds() > 1:
logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
return coco_api
def _add_categories_metadata(dataset_name: str, categories: Dict[str, Any]):
meta = MetadataCatalog.get(dataset_name)
meta.categories = {c["id"]: c["name"] for c in categories}
logger = logging.getLogger(__name__)
logger.info("Dataset {} categories: {}".format(dataset_name, categories))
def _verify_annotations_have_unique_ids(json_file: str, anns: List[List[Dict[str, Any]]]):
if "minival" in json_file:
# Skip validation on COCO2014 valminusminival and minival annotations
# The ratio of buggy annotations there is tiny and does not affect accuracy
# Therefore we explicitly white-list them
return
ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
json_file
)
def _maybe_add_bbox(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
if "bbox" not in ann_dict:
return
obj["bbox"] = ann_dict["bbox"]
obj["bbox_mode"] = BoxMode.XYWH_ABS
def _maybe_add_segm(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
if "segmentation" not in ann_dict:
return
segm = ann_dict["segmentation"]
if not isinstance(segm, dict):
# filter out invalid polygons (< 3 points)
segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
if len(segm) == 0:
return
obj["segmentation"] = segm
def _maybe_add_keypoints(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
if "keypoints" not in ann_dict:
return
keypts = ann_dict["keypoints"] # list[int]
for idx, v in enumerate(keypts):
if idx % 3 != 2:
# COCO's segmentation coordinates are floating points in [0, H or W],
# but keypoint coordinates are integers in [0, H-1 or W-1]
# Therefore we assume the coordinates are "pixel indices" and
# add 0.5 to convert to floating point coordinates.
keypts[idx] = v + 0.5
obj["keypoints"] = keypts
def _maybe_add_densepose(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
for key in DENSEPOSE_KEYS:
if key in ann_dict:
obj[key] = ann_dict[key]
def _combine_images_with_annotations(
dataset_name: str,
image_root: str,
img_datas: Iterable[Dict[str, Any]],
ann_datas: Iterable[Iterable[Dict[str, Any]]],
):
ann_keys = ["iscrowd", "category_id"]
dataset_dicts = []
for img_dict, ann_dicts in zip(img_datas, ann_datas):
record = {}
record["file_name"] = os.path.join(image_root, img_dict["file_name"])
record["height"] = img_dict["height"]
record["width"] = img_dict["width"]
record["image_id"] = img_dict["id"]
record["dataset"] = dataset_name
objs = []
for ann_dict in ann_dicts:
assert ann_dict["image_id"] == record["image_id"]
assert ann_dict.get("ignore", 0) == 0
obj = {key: ann_dict[key] for key in ann_keys if key in ann_dict}
_maybe_add_bbox(obj, ann_dict)
_maybe_add_segm(obj, ann_dict)
_maybe_add_keypoints(obj, ann_dict)
_maybe_add_densepose(obj, ann_dict)
objs.append(obj)
record["annotations"] = objs
dataset_dicts.append(record)
return dataset_dicts
def load_coco_json(annotations_json_file: str, image_root: str, dataset_name: str):
"""
Loads a JSON file with annotations in COCO instances format.
Replaces `detectron2.data.data.coco.load_coco_json` to handle metadata
in a more flexible way. Postpones category mapping to a later stage to be
able to combine several data with different (but coherent) sets of
categories.
Args:
annotations_json_file: str
Path to the JSON file with annotations in COCO instances format.
image_root: str
directory that contains all the images
dataset_name: str
the name that identifies a dataset, e.g. "densepose_coco_2014_train"
extra_annotation_keys: Optional[List[str]]
If provided, these keys are used to extract additional data from
the annotations.
"""
coco_api = _load_coco_annotations(PathManager.get_local_path(annotations_json_file))
_add_categories_metadata(dataset_name, coco_api.loadCats(coco_api.getCatIds()))
# sort indices for reproducible results
img_ids = sorted(coco_api.imgs.keys())
# imgs is a list of dicts, each looks something like:
# {'license': 4,
# 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
# 'file_name': 'COCO_val2014_000000001268.jpg',
# 'height': 427,
# 'width': 640,
# 'date_captured': '2013-11-17 05:57:24',
# 'id': 1268}
imgs = coco_api.loadImgs(img_ids)
logger = logging.getLogger(__name__)
logger.info("Loaded {} images in COCO format from {}".format(len(imgs), annotations_json_file))
# anns is a list[list[dict]], where each dict is an annotation
# record for an object. The inner list enumerates the objects in an image
# and the outer list enumerates over images.
anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
_verify_annotations_have_unique_ids(annotations_json_file, anns)
dataset_records = _combine_images_with_annotations(dataset_name, image_root, imgs, anns)
return dataset_records
def register_dataset(dataset_data: CocoDatasetInfo, datasets_root: Optional[os.PathLike] = None):
"""
Registers provided COCO DensePose dataset
Args:
dataset_data: CocoDatasetInfo
Dataset data
datasets_root: Optional[os.PathLike]
Datasets root folder (default: None)
"""
annotations_fpath = _maybe_prepend_base_path(datasets_root, dataset_data.annotations_fpath)
images_root = _maybe_prepend_base_path(datasets_root, dataset_data.images_root)
def load_annotations():
return load_coco_json(
annotations_json_file=annotations_fpath,
image_root=images_root,
dataset_name=dataset_data.name,
)
DatasetCatalog.register(dataset_data.name, load_annotations)
MetadataCatalog.get(dataset_data.name).set(
json_file=annotations_fpath,
image_root=images_root,
**get_metadata(DENSEPOSE_METADATA_URL_PREFIX)
)
def register_datasets(
datasets_data: Iterable[CocoDatasetInfo], datasets_root: Optional[os.PathLike] = None
):
"""
Registers provided COCO DensePose data
Args:
datasets_data: Iterable[CocoDatasetInfo]
An iterable of dataset datas
datasets_root: Optional[os.PathLike]
Datasets root folder (default: None)
"""
for dataset_data in datasets_data:
register_dataset(dataset_data, datasets_root)

View File

@@ -0,0 +1,579 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import base64
import numpy as np
from io import BytesIO
import torch
from PIL import Image
from torch.nn import functional as F
class DensePoseTransformData(object):
# Horizontal symmetry label transforms used for horizontal flip
MASK_LABEL_SYMMETRIES = [0, 1, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14]
# fmt: off
POINT_LABEL_SYMMETRIES = [ 0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24, 23] # noqa
# fmt: on
def __init__(self, uv_symmetries):
self.mask_label_symmetries = DensePoseTransformData.MASK_LABEL_SYMMETRIES
self.point_label_symmetries = DensePoseTransformData.POINT_LABEL_SYMMETRIES
self.uv_symmetries = uv_symmetries
@staticmethod
def load(fpath):
import scipy.io
uv_symmetry_map = scipy.io.loadmat(fpath)
uv_symmetry_map_torch = {}
for key in ["U_transforms", "V_transforms"]:
uv_symmetry_map_torch[key] = []
map_src = uv_symmetry_map[key]
map_dst = uv_symmetry_map_torch[key]
for i in range(map_src.shape[1]):
map_dst.append(torch.from_numpy(map_src[0, i]).to(dtype=torch.float))
uv_symmetry_map_torch[key] = torch.stack(map_dst, dim=0).to(
device=torch.cuda.current_device()
)
transform_data = DensePoseTransformData(uv_symmetry_map_torch)
return transform_data
class DensePoseDataRelative(object):
"""
Dense pose relative annotations that can be applied to any bounding box:
x - normalized X coordinates [0, 255] of annotated points
y - normalized Y coordinates [0, 255] of annotated points
i - body part labels 0,...,24 for annotated points
u - body part U coordinates [0, 1] for annotated points
v - body part V coordinates [0, 1] for annotated points
segm - 256x256 segmentation mask with values 0,...,14
To obtain absolute x and y data wrt some bounding box one needs to first
divide the data by 256, multiply by the respective bounding box size
and add bounding box offset:
x_img = x0 + x_norm * w / 256.0
y_img = y0 + y_norm * h / 256.0
Segmentation masks are typically sampled to get image-based masks.
"""
# Key for normalized X coordinates in annotation dict
X_KEY = "dp_x"
# Key for normalized Y coordinates in annotation dict
Y_KEY = "dp_y"
# Key for U part coordinates in annotation dict
U_KEY = "dp_U"
# Key for V part coordinates in annotation dict
V_KEY = "dp_V"
# Key for I point labels in annotation dict
I_KEY = "dp_I"
# Key for segmentation mask in annotation dict
S_KEY = "dp_masks"
# Number of body parts in segmentation masks
N_BODY_PARTS = 14
# Number of parts in point labels
N_PART_LABELS = 24
MASK_SIZE = 256
def __init__(self, annotation, cleanup=False):
is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
assert is_valid, "Invalid DensePose annotations: {}".format(reason_not_valid)
self.x = torch.as_tensor(annotation[DensePoseDataRelative.X_KEY])
self.y = torch.as_tensor(annotation[DensePoseDataRelative.Y_KEY])
self.i = torch.as_tensor(annotation[DensePoseDataRelative.I_KEY])
self.u = torch.as_tensor(annotation[DensePoseDataRelative.U_KEY])
self.v = torch.as_tensor(annotation[DensePoseDataRelative.V_KEY])
self.segm = DensePoseDataRelative.extract_segmentation_mask(annotation)
self.device = torch.device("cpu")
if cleanup:
DensePoseDataRelative.cleanup_annotation(annotation)
def to(self, device):
if self.device == device:
return self
new_data = DensePoseDataRelative.__new__(DensePoseDataRelative)
new_data.x = self.x
new_data.x = self.x.to(device)
new_data.y = self.y.to(device)
new_data.i = self.i.to(device)
new_data.u = self.u.to(device)
new_data.v = self.v.to(device)
new_data.segm = self.segm.to(device)
new_data.device = device
return new_data
@staticmethod
def extract_segmentation_mask(annotation):
import pycocotools.mask as mask_utils
poly_specs = annotation[DensePoseDataRelative.S_KEY]
segm = torch.zeros((DensePoseDataRelative.MASK_SIZE,) * 2, dtype=torch.float32)
for i in range(DensePoseDataRelative.N_BODY_PARTS):
poly_i = poly_specs[i]
if poly_i:
mask_i = mask_utils.decode(poly_i)
segm[mask_i > 0] = i + 1
return segm
@staticmethod
def validate_annotation(annotation):
for key in [
DensePoseDataRelative.X_KEY,
DensePoseDataRelative.Y_KEY,
DensePoseDataRelative.I_KEY,
DensePoseDataRelative.U_KEY,
DensePoseDataRelative.V_KEY,
DensePoseDataRelative.S_KEY,
]:
if key not in annotation:
return False, "no {key} data in the annotation".format(key=key)
return True, None
@staticmethod
def cleanup_annotation(annotation):
for key in [
DensePoseDataRelative.X_KEY,
DensePoseDataRelative.Y_KEY,
DensePoseDataRelative.I_KEY,
DensePoseDataRelative.U_KEY,
DensePoseDataRelative.V_KEY,
DensePoseDataRelative.S_KEY,
]:
if key in annotation:
del annotation[key]
def apply_transform(self, transforms, densepose_transform_data):
self._transform_pts(transforms, densepose_transform_data)
self._transform_segm(transforms, densepose_transform_data)
def _transform_pts(self, transforms, dp_transform_data):
import detectron2.data.transforms as T
# NOTE: This assumes that HorizFlipTransform is the only one that does flip
do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
if do_hflip:
self.x = self.segm.size(1) - self.x
self._flip_iuv_semantics(dp_transform_data)
def _flip_iuv_semantics(self, dp_transform_data: DensePoseTransformData) -> None:
i_old = self.i.clone()
uv_symmetries = dp_transform_data.uv_symmetries
pt_label_symmetries = dp_transform_data.point_label_symmetries
for i in range(self.N_PART_LABELS):
if i + 1 in i_old:
annot_indices_i = i_old == i + 1
if pt_label_symmetries[i + 1] != i + 1:
self.i[annot_indices_i] = pt_label_symmetries[i + 1]
u_loc = (self.u[annot_indices_i] * 255).long()
v_loc = (self.v[annot_indices_i] * 255).long()
self.u[annot_indices_i] = uv_symmetries["U_transforms"][i][v_loc, u_loc].to(
device=self.u.device
)
self.v[annot_indices_i] = uv_symmetries["V_transforms"][i][v_loc, u_loc].to(
device=self.v.device
)
def _transform_segm(self, transforms, dp_transform_data):
import detectron2.data.transforms as T
# NOTE: This assumes that HorizFlipTransform is the only one that does flip
do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
if do_hflip:
self.segm = torch.flip(self.segm, [1])
self._flip_segm_semantics(dp_transform_data)
def _flip_segm_semantics(self, dp_transform_data):
old_segm = self.segm.clone()
mask_label_symmetries = dp_transform_data.mask_label_symmetries
for i in range(self.N_BODY_PARTS):
if mask_label_symmetries[i + 1] != i + 1:
self.segm[old_segm == i + 1] = mask_label_symmetries[i + 1]
def normalized_coords_transform(x0, y0, w, h):
"""
Coordinates transform that maps top left corner to (-1, -1) and bottom
right corner to (1, 1). Used for torch.grid_sample to initialize the
grid
"""
def f(p):
return (2 * (p[0] - x0) / w - 1, 2 * (p[1] - y0) / h - 1)
return f
class DensePoseOutput(object):
def __init__(self, S, I, U, V, confidences):
"""
Args:
S (`torch.Tensor`): coarse segmentation tensor of size (N, A, H, W)
I (`torch.Tensor`): fine segmentation tensor of size (N, C, H, W)
U (`torch.Tensor`): U coordinates for each fine segmentation label of size (N, C, H, W)
V (`torch.Tensor`): V coordinates for each fine segmentation label of size (N, C, H, W)
confidences (dict of str -> `torch.Tensor`) estimated confidence model parameters
"""
self.S = S
self.I = I # noqa: E741
self.U = U
self.V = V
self.confidences = confidences
self._check_output_dims(S, I, U, V)
def _check_output_dims(self, S, I, U, V):
assert (
len(S.size()) == 4
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
S.size()
)
assert (
len(I.size()) == 4
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
S.size()
)
assert (
len(U.size()) == 4
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
S.size()
)
assert (
len(V.size()) == 4
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
S.size()
)
assert len(S) == len(I), (
"Number of output segmentation planes {} "
"should be equal to the number of output part index "
"planes {}".format(len(S), len(I))
)
assert S.size()[2:] == I.size()[2:], (
"Output segmentation plane size {} "
"should be equal to the output part index "
"plane size {}".format(S.size()[2:], I.size()[2:])
)
assert I.size() == U.size(), (
"Part index output shape {} "
"should be the same as U coordinates output shape {}".format(I.size(), U.size())
)
assert I.size() == V.size(), (
"Part index output shape {} "
"should be the same as V coordinates output shape {}".format(I.size(), V.size())
)
def resize(self, image_size_hw):
# do nothing - outputs are invariant to resize
pass
def _crop(self, S, I, U, V, bbox_old_xywh, bbox_new_xywh):
"""
Resample S, I, U, V from bbox_old to the cropped bbox_new
"""
x0old, y0old, wold, hold = bbox_old_xywh
x0new, y0new, wnew, hnew = bbox_new_xywh
tr_coords = normalized_coords_transform(x0old, y0old, wold, hold)
topleft = (x0new, y0new)
bottomright = (x0new + wnew, y0new + hnew)
topleft_norm = tr_coords(topleft)
bottomright_norm = tr_coords(bottomright)
hsize = S.size(1)
wsize = S.size(2)
grid = torch.meshgrid(
torch.arange(
topleft_norm[1],
bottomright_norm[1],
(bottomright_norm[1] - topleft_norm[1]) / hsize,
)[:hsize],
torch.arange(
topleft_norm[0],
bottomright_norm[0],
(bottomright_norm[0] - topleft_norm[0]) / wsize,
)[:wsize],
)
grid = torch.stack(grid, dim=2).to(S.device)
assert (
grid.size(0) == hsize
), "Resampled grid expected " "height={}, actual height={}".format(hsize, grid.size(0))
assert grid.size(1) == wsize, "Resampled grid expected " "width={}, actual width={}".format(
wsize, grid.size(1)
)
S_new = F.grid_sample(
S.unsqueeze(0),
torch.unsqueeze(grid, 0),
mode="bilinear",
padding_mode="border",
align_corners=True,
).squeeze(0)
I_new = F.grid_sample(
I.unsqueeze(0),
torch.unsqueeze(grid, 0),
mode="bilinear",
padding_mode="border",
align_corners=True,
).squeeze(0)
U_new = F.grid_sample(
U.unsqueeze(0),
torch.unsqueeze(grid, 0),
mode="bilinear",
padding_mode="border",
align_corners=True,
).squeeze(0)
V_new = F.grid_sample(
V.unsqueeze(0),
torch.unsqueeze(grid, 0),
mode="bilinear",
padding_mode="border",
align_corners=True,
).squeeze(0)
return S_new, I_new, U_new, V_new
def crop(self, indices_cropped, bboxes_old, bboxes_new):
"""
Crop outputs for selected bounding boxes to the new bounding boxes.
"""
# VK: cropping is ignored for now
# for i, ic in enumerate(indices_cropped):
# self.S[ic], self.I[ic], self.U[ic], self.V[ic] = \
# self._crop(self.S[ic], self.I[ic], self.U[ic], self.V[ic],
# bboxes_old[i], bboxes_new[i])
pass
def hflip(self, transform_data: DensePoseTransformData) -> None:
"""
Change S, I, U and V to take into account a Horizontal flip.
"""
if self.I.shape[0] > 0:
for el in "SIUV":
self.__dict__[el] = torch.flip(self.__dict__[el], [3])
self._flip_iuv_semantics_tensor(transform_data)
self._flip_segm_semantics_tensor(transform_data)
def _flip_iuv_semantics_tensor(self, dp_transform_data: DensePoseTransformData) -> None:
point_label_symmetries = dp_transform_data.point_label_symmetries
uv_symmetries = dp_transform_data.uv_symmetries
N, C, H, W = self.U.shape
u_loc = (self.U[:, 1:, :, :].clamp(0, 1) * 255).long()
v_loc = (self.V[:, 1:, :, :].clamp(0, 1) * 255).long()
Iindex = torch.arange(C - 1, device=self.U.device)[None, :, None, None].expand(
N, C - 1, H, W
)
self.U[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc].to(
device=self.U.device
)
self.V[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc].to(
device=self.V.device
)
for el in "IUV":
self.__dict__[el] = self.__dict__[el][:, point_label_symmetries, :, :]
def _flip_segm_semantics_tensor(self, dp_transform_data):
if self.S.shape[1] == DensePoseDataRelative.N_BODY_PARTS + 1:
self.S = self.S[:, dp_transform_data.mask_label_symmetries, :, :]
def to_result(self, boxes_xywh):
"""
Convert DensePose outputs to results format. Results are more compact,
but cannot be resampled any more
"""
result = DensePoseResult(boxes_xywh, self.S, self.I, self.U, self.V)
return result
def __getitem__(self, item):
if isinstance(item, int):
S_selected = self.S[item].unsqueeze(0)
I_selected = self.I[item].unsqueeze(0)
U_selected = self.U[item].unsqueeze(0)
V_selected = self.V[item].unsqueeze(0)
conf_selected = {}
for key in self.confidences:
conf_selected[key] = self.confidences[key][item].unsqueeze(0)
else:
S_selected = self.S[item]
I_selected = self.I[item]
U_selected = self.U[item]
V_selected = self.V[item]
conf_selected = {}
for key in self.confidences:
conf_selected[key] = self.confidences[key][item]
return DensePoseOutput(S_selected, I_selected, U_selected, V_selected, conf_selected)
def __str__(self):
s = "DensePoseOutput S {}, I {}, U {}, V {}".format(
list(self.S.size()), list(self.I.size()), list(self.U.size()), list(self.V.size())
)
s_conf = "confidences: [{}]".format(
", ".join([f"{key} {list(self.confidences[key].size())}" for key in self.confidences])
)
return ", ".join([s, s_conf])
def __len__(self):
return self.S.size(0)
class DensePoseResult(object):
def __init__(self, boxes_xywh, S, I, U, V):
self.results = []
self.boxes_xywh = boxes_xywh.cpu().tolist()
assert len(boxes_xywh.size()) == 2
assert boxes_xywh.size(1) == 4
for i, box_xywh in enumerate(boxes_xywh):
result_i = self._output_to_result(box_xywh, S[[i]], I[[i]], U[[i]], V[[i]])
result_numpy_i = result_i.cpu().numpy()
result_encoded_i = DensePoseResult.encode_png_data(result_numpy_i)
result_encoded_with_shape_i = (result_numpy_i.shape, result_encoded_i)
self.results.append(result_encoded_with_shape_i)
def __str__(self):
s = "DensePoseResult: N={} [{}]".format(
len(self.results), ", ".join([str(list(r[0])) for r in self.results])
)
return s
def _output_to_result(self, box_xywh, S, I, U, V):
x, y, w, h = box_xywh
w = max(int(w), 1)
h = max(int(h), 1)
result = torch.zeros([3, h, w], dtype=torch.uint8, device=U.device)
assert (
len(S.size()) == 4
), "AnnIndex tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
s_bbox = F.interpolate(S, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
assert (
len(I.size()) == 4
), "IndexUV tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
i_bbox = (
F.interpolate(I, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
* (s_bbox > 0).long()
).squeeze(0)
assert len(U.size()) == 4, "U tensor size should have {} " "dimensions but has {}".format(
4, len(U.size())
)
u_bbox = F.interpolate(U, (h, w), mode="bilinear", align_corners=False)
assert len(V.size()) == 4, "V tensor size should have {} " "dimensions but has {}".format(
4, len(V.size())
)
v_bbox = F.interpolate(V, (h, w), mode="bilinear", align_corners=False)
result[0] = i_bbox
for part_id in range(1, u_bbox.size(1)):
result[1][i_bbox == part_id] = (
(u_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
)
result[2][i_bbox == part_id] = (
(v_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
)
assert (
result.size(1) == h
), "Results height {} should be equal" "to bounding box height {}".format(result.size(1), h)
assert (
result.size(2) == w
), "Results width {} should be equal" "to bounding box width {}".format(result.size(2), w)
return result
@staticmethod
def encode_png_data(arr):
"""
Encode array data as a PNG image using the highest compression rate
@param arr [in] Data stored in an array of size (3, M, N) of type uint8
@return Base64-encoded string containing PNG-compressed data
"""
assert len(arr.shape) == 3, "Expected a 3D array as an input," " got a {0}D array".format(
len(arr.shape)
)
assert arr.shape[0] == 3, "Expected first array dimension of size 3," " got {0}".format(
arr.shape[0]
)
assert arr.dtype == np.uint8, "Expected an array of type np.uint8, " " got {0}".format(
arr.dtype
)
data = np.moveaxis(arr, 0, -1)
im = Image.fromarray(data)
fstream = BytesIO()
im.save(fstream, format="png", optimize=True)
s = base64.encodebytes(fstream.getvalue()).decode()
return s
@staticmethod
def decode_png_data(shape, s):
"""
Decode array data from a string that contains PNG-compressed data
@param Base64-encoded string containing PNG-compressed data
@return Data stored in an array of size (3, M, N) of type uint8
"""
fstream = BytesIO(base64.decodebytes(s.encode()))
im = Image.open(fstream)
data = np.moveaxis(np.array(im.getdata(), dtype=np.uint8), -1, 0)
return data.reshape(shape)
def __len__(self):
return len(self.results)
def __getitem__(self, item):
result_encoded = self.results[item]
bbox_xywh = self.boxes_xywh[item]
return result_encoded, bbox_xywh
class DensePoseList(object):
_TORCH_DEVICE_CPU = torch.device("cpu")
def __init__(self, densepose_datas, boxes_xyxy_abs, image_size_hw, device=_TORCH_DEVICE_CPU):
assert len(densepose_datas) == len(
boxes_xyxy_abs
), "Attempt to initialize DensePoseList with {} DensePose datas " "and {} boxes".format(
len(densepose_datas), len(boxes_xyxy_abs)
)
self.densepose_datas = []
for densepose_data in densepose_datas:
assert isinstance(densepose_data, DensePoseDataRelative) or densepose_data is None, (
"Attempt to initialize DensePoseList with DensePose datas "
"of type {}, expected DensePoseDataRelative".format(type(densepose_data))
)
densepose_data_ondevice = (
densepose_data.to(device) if densepose_data is not None else None
)
self.densepose_datas.append(densepose_data_ondevice)
self.boxes_xyxy_abs = boxes_xyxy_abs.to(device)
self.image_size_hw = image_size_hw
self.device = device
def to(self, device):
if self.device == device:
return self
return DensePoseList(self.densepose_datas, self.boxes_xyxy_abs, self.image_size_hw, device)
def __iter__(self):
return iter(self.densepose_datas)
def __len__(self):
return len(self.densepose_datas)
def __repr__(self):
s = self.__class__.__name__ + "("
s += "num_instances={}, ".format(len(self.densepose_datas))
s += "image_width={}, ".format(self.image_size_hw[1])
s += "image_height={})".format(self.image_size_hw[0])
return s
def __getitem__(self, item):
if isinstance(item, int):
densepose_data_rel = self.densepose_datas[item]
return densepose_data_rel
elif isinstance(item, slice):
densepose_datas_rel = self.densepose_datas[item]
boxes_xyxy_abs = self.boxes_xyxy_abs[item]
return DensePoseList(
densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
)
elif isinstance(item, torch.Tensor) and (item.dtype == torch.bool):
densepose_datas_rel = [self.densepose_datas[i] for i, x in enumerate(item) if x > 0]
boxes_xyxy_abs = self.boxes_xyxy_abs[item]
return DensePoseList(
densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
)
else:
densepose_datas_rel = [self.densepose_datas[i] for i in item]
boxes_xyxy_abs = self.boxes_xyxy_abs[item]
return DensePoseList(
densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
)

View File

@@ -0,0 +1,158 @@
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import contextlib
import copy
import io
import itertools
import json
import logging
import os
from collections import OrderedDict
import torch
from fvcore.common.file_io import PathManager
from pycocotools.coco import COCO
from detectron2.data import MetadataCatalog
from detectron2.evaluation import DatasetEvaluator
from detectron2.structures import BoxMode
from detectron2.utils.comm import all_gather, is_main_process, synchronize
from detectron2.utils.logger import create_small_table
from .densepose_coco_evaluation import DensePoseCocoEval, DensePoseEvalMode
class DensePoseCOCOEvaluator(DatasetEvaluator):
def __init__(self, dataset_name, distributed, output_dir=None):
self._distributed = distributed
self._output_dir = output_dir
self._cpu_device = torch.device("cpu")
self._logger = logging.getLogger(__name__)
self._metadata = MetadataCatalog.get(dataset_name)
json_file = PathManager.get_local_path(self._metadata.json_file)
with contextlib.redirect_stdout(io.StringIO()):
self._coco_api = COCO(json_file)
def reset(self):
self._predictions = []
def process(self, inputs, outputs):
"""
Args:
inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
It is a list of dict. Each dict corresponds to an image and
contains keys like "height", "width", "file_name", "image_id".
outputs: the outputs of a COCO model. It is a list of dicts with key
"instances" that contains :class:`Instances`.
The :class:`Instances` object needs to have `densepose` field.
"""
for input, output in zip(inputs, outputs):
instances = output["instances"].to(self._cpu_device)
boxes = instances.pred_boxes.tensor.clone()
boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
instances.pred_densepose = instances.pred_densepose.to_result(boxes)
json_results = prediction_to_json(instances, input["image_id"])
self._predictions.extend(json_results)
def evaluate(self):
if self._distributed:
synchronize()
predictions = all_gather(self._predictions)
predictions = list(itertools.chain(*predictions))
if not is_main_process():
return
else:
predictions = self._predictions
return copy.deepcopy(self._eval_predictions(predictions))
def _eval_predictions(self, predictions):
"""
Evaluate predictions on densepose.
Return results with the metrics of the tasks.
"""
self._logger.info("Preparing results for COCO format ...")
if self._output_dir:
file_path = os.path.join(self._output_dir, "coco_densepose_results.json")
with open(file_path, "w") as f:
json.dump(predictions, f)
f.flush()
os.fsync(f.fileno())
self._logger.info("Evaluating predictions ...")
res = OrderedDict()
results_gps, results_gpsm = _evaluate_predictions_on_coco(self._coco_api, predictions)
res["densepose_gps"] = results_gps
res["densepose_gpsm"] = results_gpsm
return res
def prediction_to_json(instances, img_id):
"""
Args:
instances (Instances): the output of the model
img_id (str): the image id in COCO
Returns:
list[dict]: the results in densepose evaluation format
"""
scores = instances.scores.tolist()
results = []
for k in range(len(instances)):
densepose = instances.pred_densepose[k]
result = {
"image_id": img_id,
"category_id": 1, # densepose only has one class
"bbox": densepose[1],
"score": scores[k],
"densepose": densepose,
}
results.append(result)
return results
def _evaluate_predictions_on_coco(coco_gt, coco_results):
metrics = ["AP", "AP50", "AP75", "APm", "APl"]
logger = logging.getLogger(__name__)
if len(coco_results) == 0: # cocoapi does not handle empty results very well
logger.warn("No predictions from the model! Set scores to -1")
results_gps = {metric: -1 for metric in metrics}
results_gpsm = {metric: -1 for metric in metrics}
return results_gps, results_gpsm
coco_dt = coco_gt.loadRes(coco_results)
results_gps = _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics)
logger.info(
"Evaluation results for densepose, GPS metric: \n" + create_small_table(results_gps)
)
results_gpsm = _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics)
logger.info(
"Evaluation results for densepose, GPSm metric: \n" + create_small_table(results_gpsm)
)
return results_gps, results_gpsm
def _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics):
coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPS)
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
return results
def _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics):
coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPSM)
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
return results

View File

@@ -0,0 +1,75 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from detectron2.modeling.test_time_augmentation import GeneralizedRCNNWithTTA
class DensePoseGeneralizedRCNNWithTTA(GeneralizedRCNNWithTTA):
def __init__(self, cfg, model, transform_data, tta_mapper=None, batch_size=1):
"""
Args:
cfg (CfgNode):
model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on.
transform_data (DensePoseTransformData): contains symmetry label
transforms used for horizontal flip
tta_mapper (callable): takes a dataset dict and returns a list of
augmented versions of the dataset dict. Defaults to
`DatasetMapperTTA(cfg)`.
batch_size (int): batch the augmented images into this batch size for inference.
"""
self._transform_data = transform_data
super().__init__(cfg=cfg, model=model, tta_mapper=tta_mapper, batch_size=batch_size)
# the implementation follows closely the one from detectron2/modeling
def _inference_one_image(self, input):
"""
Args:
input (dict): one dataset dict
Returns:
dict: one output dict
"""
augmented_inputs, aug_vars = self._get_augmented_inputs(input)
# Detect boxes from all augmented versions
with self._turn_off_roi_heads(["mask_on", "keypoint_on", "densepose_on"]):
# temporarily disable roi heads
all_boxes, all_scores, all_classes = self._get_augmented_boxes(
augmented_inputs, aug_vars
)
merged_instances = self._merge_detections(
all_boxes, all_scores, all_classes, (aug_vars["height"], aug_vars["width"])
)
if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.DENSEPOSE_ON:
# Use the detected boxes to obtain new fields
augmented_instances = self._rescale_detected_boxes(
augmented_inputs, merged_instances, aug_vars
)
# run forward on the detected boxes
outputs = self._batch_inference(
augmented_inputs, augmented_instances, do_postprocess=False
)
# Delete now useless variables to avoid being out of memory
del augmented_inputs, augmented_instances, merged_instances
# average the predictions
if self.cfg.MODEL.MASK_ON:
outputs[0].pred_masks = self._reduce_pred_masks(outputs, aug_vars)
if self.cfg.MODEL.DENSEPOSE_ON:
outputs[0].pred_densepose = self._reduce_pred_densepose(outputs, aug_vars)
# postprocess
output = self._detector_postprocess(outputs[0], aug_vars)
return {"instances": output}
else:
return {"instances": merged_instances}
def _reduce_pred_densepose(self, outputs, aug_vars):
for idx, output in enumerate(outputs):
if aug_vars["do_hflip"][idx]:
output.pred_densepose.hflip(self._transform_data)
# Less memory-intensive averaging
for attr in "SIUV":
setattr(
outputs[0].pred_densepose,
attr,
sum(getattr(o.pred_densepose, attr) for o in outputs) / len(outputs),
)
return outputs[0].pred_densepose

View File

@@ -0,0 +1,213 @@
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import numpy as np
from typing import Dict
import fvcore.nn.weight_init as weight_init
import torch
import torch.nn as nn
from torch.nn import functional as F
from detectron2.layers import Conv2d, ShapeSpec, get_norm
from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
from detectron2.modeling.poolers import ROIPooler
from detectron2.modeling.roi_heads import select_foreground_proposals
from .densepose_head import (
build_densepose_data_filter,
build_densepose_head,
build_densepose_losses,
build_densepose_predictor,
densepose_inference,
)
class Decoder(nn.Module):
"""
A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper
(https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from
all levels of the FPN into single output.
"""
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features):
super(Decoder, self).__init__()
# fmt: off
self.in_features = in_features
feature_strides = {k: v.stride for k, v in input_shape.items()}
feature_channels = {k: v.channels for k, v in input_shape.items()}
num_classes = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
conv_dims = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
self.common_stride = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
# fmt: on
self.scale_heads = []
for in_feature in self.in_features:
head_ops = []
head_length = max(
1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
)
for k in range(head_length):
conv = Conv2d(
feature_channels[in_feature] if k == 0 else conv_dims,
conv_dims,
kernel_size=3,
stride=1,
padding=1,
bias=not norm,
norm=get_norm(norm, conv_dims),
activation=F.relu,
)
weight_init.c2_msra_fill(conv)
head_ops.append(conv)
if feature_strides[in_feature] != self.common_stride:
head_ops.append(
nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
)
self.scale_heads.append(nn.Sequential(*head_ops))
self.add_module(in_feature, self.scale_heads[-1])
self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
weight_init.c2_msra_fill(self.predictor)
def forward(self, features):
for i, _ in enumerate(self.in_features):
if i == 0:
x = self.scale_heads[i](features[i])
else:
x = x + self.scale_heads[i](features[i])
x = self.predictor(x)
return x
@ROI_HEADS_REGISTRY.register()
class DensePoseROIHeads(StandardROIHeads):
"""
A Standard ROIHeads which contains an addition of DensePose head.
"""
def __init__(self, cfg, input_shape):
super().__init__(cfg, input_shape)
self._init_densepose_head(cfg, input_shape)
def _init_densepose_head(self, cfg, input_shape):
# fmt: off
self.densepose_on = cfg.MODEL.DENSEPOSE_ON
if not self.densepose_on:
return
self.densepose_data_filter = build_densepose_data_filter(cfg)
dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION
dp_pooler_sampling_ratio = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO
dp_pooler_type = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE
self.use_decoder = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON
# fmt: on
if self.use_decoder:
dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,)
else:
dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features)
in_channels = [input_shape[f].channels for f in self.in_features][0]
if self.use_decoder:
self.decoder = Decoder(cfg, input_shape, self.in_features)
self.densepose_pooler = ROIPooler(
output_size=dp_pooler_resolution,
scales=dp_pooler_scales,
sampling_ratio=dp_pooler_sampling_ratio,
pooler_type=dp_pooler_type,
)
self.densepose_head = build_densepose_head(cfg, in_channels)
self.densepose_predictor = build_densepose_predictor(
cfg, self.densepose_head.n_out_channels
)
self.densepose_losses = build_densepose_losses(cfg)
def _forward_densepose(self, features, instances):
"""
Forward logic of the densepose prediction branch.
Args:
features (list[Tensor]): #level input features for densepose prediction
instances (list[Instances]): the per-image instances to train/predict densepose.
In training, they can be the proposals.
In inference, they can be the predicted boxes.
Returns:
In training, a dict of losses.
In inference, update `instances` with new fields "densepose" and return it.
"""
if not self.densepose_on:
return {} if self.training else instances
features = [features[f] for f in self.in_features]
if self.training:
proposals, _ = select_foreground_proposals(instances, self.num_classes)
proposals_dp = self.densepose_data_filter(proposals)
if len(proposals_dp) > 0:
# NOTE may deadlock in DDP if certain workers have empty proposals_dp
proposal_boxes = [x.proposal_boxes for x in proposals_dp]
if self.use_decoder:
features = [self.decoder(features)]
features_dp = self.densepose_pooler(features, proposal_boxes)
densepose_head_outputs = self.densepose_head(features_dp)
densepose_outputs, _, confidences, _ = self.densepose_predictor(
densepose_head_outputs
)
densepose_loss_dict = self.densepose_losses(
proposals_dp, densepose_outputs, confidences
)
return densepose_loss_dict
else:
pred_boxes = [x.pred_boxes for x in instances]
if self.use_decoder:
features = [self.decoder(features)]
features_dp = self.densepose_pooler(features, pred_boxes)
if len(features_dp) > 0:
densepose_head_outputs = self.densepose_head(features_dp)
densepose_outputs, _, confidences, _ = self.densepose_predictor(
densepose_head_outputs
)
else:
# If no detection occurred instances
# set densepose_outputs to empty tensors
empty_tensor = torch.zeros(size=(0, 0, 0, 0), device=features_dp.device)
densepose_outputs = tuple([empty_tensor] * 4)
confidences = tuple([empty_tensor] * 4)
densepose_inference(densepose_outputs, confidences, instances)
return instances
def forward(self, images, features, proposals, targets=None):
instances, losses = super().forward(images, features, proposals, targets)
del targets, images
if self.training:
losses.update(self._forward_densepose(features, instances))
return instances, losses
def forward_with_given_boxes(self, features, instances):
"""
Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
This is useful for downstream tasks where a box is known, but need to obtain
other attributes (outputs of other heads).
Test-time augmentation also uses this.
Args:
features: same as in `forward()`
instances (list[Instances]): instances to predict other outputs. Expect the keys
"pred_boxes" and "pred_classes" to exist.
Returns:
instances (list[Instances]):
the same `Instances` objects, with extra
fields such as `pred_masks` or `pred_keypoints`.
"""
instances = super().forward_with_given_boxes(features, instances)
instances = self._forward_densepose(features, instances)
return instances

View File

@@ -0,0 +1,145 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from typing import Any, Dict, Optional, Tuple
class EntrySelector(object):
"""
Base class for entry selectors
"""
@staticmethod
def from_string(spec: str) -> "EntrySelector":
if spec == "*":
return AllEntrySelector()
return FieldEntrySelector(spec)
class AllEntrySelector(EntrySelector):
"""
Selector that accepts all entries
"""
SPECIFIER = "*"
def __call__(self, entry):
return True
class FieldEntrySelector(EntrySelector):
"""
Selector that accepts only entries that match provided field
specifier(s). Only a limited set of specifiers is supported for now:
<specifiers>::=<specifier>[<comma><specifiers>]
<specifier>::=<field_name>[<type_delim><type>]<equal><value_or_range>
<field_name> is a valid identifier
<type> ::= "int" | "str"
<equal> ::= "="
<comma> ::= ","
<type_delim> ::= ":"
<value_or_range> ::= <value> | <range>
<range> ::= <value><range_delim><value>
<range_delim> ::= "-"
<value> is a string without spaces and special symbols
(e.g. <comma>, <equal>, <type_delim>, <range_delim>)
"""
_SPEC_DELIM = ","
_TYPE_DELIM = ":"
_RANGE_DELIM = "-"
_EQUAL = "="
_ERROR_PREFIX = "Invalid field selector specifier"
class _FieldEntryValuePredicate(object):
"""
Predicate that checks strict equality for the specified entry field
"""
def __init__(self, name: str, typespec: str, value: str):
import builtins
self.name = name
self.type = getattr(builtins, typespec) if typespec is not None else str
self.value = value
def __call__(self, entry):
return entry[self.name] == self.type(self.value)
class _FieldEntryRangePredicate(object):
"""
Predicate that checks whether an entry field falls into the specified range
"""
def __init__(self, name: str, typespec: str, vmin: str, vmax: str):
import builtins
self.name = name
self.type = getattr(builtins, typespec) if typespec is not None else str
self.vmin = vmin
self.vmax = vmax
def __call__(self, entry):
return (entry[self.name] >= self.type(self.vmin)) and (
entry[self.name] <= self.type(self.vmax)
)
def __init__(self, spec: str):
self._predicates = self._parse_specifier_into_predicates(spec)
def __call__(self, entry: Dict[str, Any]):
for predicate in self._predicates:
if not predicate(entry):
return False
return True
def _parse_specifier_into_predicates(self, spec: str):
predicates = []
specs = spec.split(self._SPEC_DELIM)
for subspec in specs:
eq_idx = subspec.find(self._EQUAL)
if eq_idx > 0:
field_name_with_type = subspec[:eq_idx]
field_name, field_type = self._parse_field_name_type(field_name_with_type)
field_value_or_range = subspec[eq_idx + 1 :]
if self._is_range_spec(field_value_or_range):
vmin, vmax = self._get_range_spec(field_value_or_range)
predicate = FieldEntrySelector._FieldEntryRangePredicate(
field_name, field_type, vmin, vmax
)
else:
predicate = FieldEntrySelector._FieldEntryValuePredicate(
field_name, field_type, field_value_or_range
)
predicates.append(predicate)
elif eq_idx == 0:
self._parse_error(f'"{subspec}", field name is empty!')
else:
self._parse_error(f'"{subspec}", should have format ' "<field>=<value_or_range>!")
return predicates
def _parse_field_name_type(self, field_name_with_type: str) -> Tuple[str, Optional[str]]:
type_delim_idx = field_name_with_type.find(self._TYPE_DELIM)
if type_delim_idx > 0:
field_name = field_name_with_type[:type_delim_idx]
field_type = field_name_with_type[type_delim_idx + 1 :]
elif type_delim_idx == 0:
self._parse_error(f'"{field_name_with_type}", field name is empty!')
else:
field_name = field_name_with_type
field_type = None
return field_name, field_type
def _is_range_spec(self, field_value_or_range):
delim_idx = field_value_or_range.find(self._RANGE_DELIM)
return delim_idx > 0
def _get_range_spec(self, field_value_or_range):
if self._is_range_spec(field_value_or_range):
delim_idx = field_value_or_range.find(self._RANGE_DELIM)
vmin = field_value_or_range[:delim_idx]
vmax = field_value_or_range[delim_idx + 1 :]
return vmin, vmax
else:
self._parse_error('"field_value_or_range", range of values expected!')
def _parse_error(self, msg):
raise ValueError(f"{self._ERROR_PREFIX}: {msg}")

View File

@@ -0,0 +1,13 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging
def verbosity_to_level(verbosity):
if verbosity is not None:
if verbosity == 0:
return logging.WARNING
elif verbosity == 1:
return logging.INFO
elif verbosity >= 2:
return logging.DEBUG
return logging.WARNING

View File

@@ -0,0 +1,16 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from fvcore.common.file_io import PathManager
from detectron2.data import MetadataCatalog
from densepose import DensePoseTransformData
def load_for_dataset(dataset_name):
path = MetadataCatalog.get(dataset_name).densepose_transform_src
densepose_transform_data_fpath = PathManager.get_local_path(path)
return DensePoseTransformData.load(densepose_transform_data_fpath)
def load_from_cfg(cfg):
return load_for_dataset(cfg.DATASETS.TEST[0])

View File

@@ -0,0 +1,191 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging
import numpy as np
import cv2
import torch
Image = np.ndarray
Boxes = torch.Tensor
class MatrixVisualizer(object):
"""
Base visualizer for matrix data
"""
def __init__(
self,
inplace=True,
cmap=cv2.COLORMAP_PARULA,
val_scale=1.0,
alpha=0.7,
interp_method_matrix=cv2.INTER_LINEAR,
interp_method_mask=cv2.INTER_NEAREST,
):
self.inplace = inplace
self.cmap = cmap
self.val_scale = val_scale
self.alpha = alpha
self.interp_method_matrix = interp_method_matrix
self.interp_method_mask = interp_method_mask
def visualize(self, image_bgr, mask, matrix, bbox_xywh):
self._check_image(image_bgr)
self._check_mask_matrix(mask, matrix)
if self.inplace:
image_target_bgr = image_bgr
else:
image_target_bgr = image_bgr * 0
x, y, w, h = [int(v) for v in bbox_xywh]
if w <= 0 or h <= 0:
return image_bgr
mask, matrix = self._resize(mask, matrix, w, h)
mask_bg = np.tile((mask == 0)[:, :, np.newaxis], [1, 1, 3])
matrix_scaled = matrix.astype(np.float32) * self.val_scale
_EPSILON = 1e-6
if np.any(matrix_scaled > 255 + _EPSILON):
logger = logging.getLogger(__name__)
logger.warning(
f"Matrix has values > {255 + _EPSILON} after " f"scaling, clipping to [0..255]"
)
matrix_scaled_8u = matrix_scaled.clip(0, 255).astype(np.uint8)
matrix_vis = cv2.applyColorMap(matrix_scaled_8u, self.cmap)
matrix_vis[mask_bg] = image_target_bgr[y : y + h, x : x + w, :][mask_bg]
image_target_bgr[y : y + h, x : x + w, :] = (
image_target_bgr[y : y + h, x : x + w, :] * (1.0 - self.alpha) + matrix_vis * self.alpha
)
return image_target_bgr.astype(np.uint8)
def _resize(self, mask, matrix, w, h):
if (w != mask.shape[1]) or (h != mask.shape[0]):
mask = cv2.resize(mask, (w, h), self.interp_method_mask)
if (w != matrix.shape[1]) or (h != matrix.shape[0]):
matrix = cv2.resize(matrix, (w, h), self.interp_method_matrix)
return mask, matrix
def _check_image(self, image_rgb):
assert len(image_rgb.shape) == 3
assert image_rgb.shape[2] == 3
assert image_rgb.dtype == np.uint8
def _check_mask_matrix(self, mask, matrix):
assert len(matrix.shape) == 2
assert len(mask.shape) == 2
assert mask.dtype == np.uint8
class RectangleVisualizer(object):
_COLOR_GREEN = (18, 127, 15)
def __init__(self, color=_COLOR_GREEN, thickness=1):
self.color = color
self.thickness = thickness
def visualize(self, image_bgr, bbox_xywh, color=None, thickness=None):
x, y, w, h = bbox_xywh
color = color or self.color
thickness = thickness or self.thickness
cv2.rectangle(image_bgr, (int(x), int(y)), (int(x + w), int(y + h)), color, thickness)
return image_bgr
class PointsVisualizer(object):
_COLOR_GREEN = (18, 127, 15)
def __init__(self, color_bgr=_COLOR_GREEN, r=5):
self.color_bgr = color_bgr
self.r = r
def visualize(self, image_bgr, pts_xy, colors_bgr=None, rs=None):
for j, pt_xy in enumerate(pts_xy):
x, y = pt_xy
color_bgr = colors_bgr[j] if colors_bgr is not None else self.color_bgr
r = rs[j] if rs is not None else self.r
cv2.circle(image_bgr, (x, y), r, color_bgr, -1)
return image_bgr
class TextVisualizer(object):
_COLOR_GRAY = (218, 227, 218)
_COLOR_WHITE = (255, 255, 255)
def __init__(
self,
font_face=cv2.FONT_HERSHEY_SIMPLEX,
font_color_bgr=_COLOR_GRAY,
font_scale=0.35,
font_line_type=cv2.LINE_AA,
font_line_thickness=1,
fill_color_bgr=_COLOR_WHITE,
fill_color_transparency=1.0,
frame_color_bgr=_COLOR_WHITE,
frame_color_transparency=1.0,
frame_thickness=1,
):
self.font_face = font_face
self.font_color_bgr = font_color_bgr
self.font_scale = font_scale
self.font_line_type = font_line_type
self.font_line_thickness = font_line_thickness
self.fill_color_bgr = fill_color_bgr
self.fill_color_transparency = fill_color_transparency
self.frame_color_bgr = frame_color_bgr
self.frame_color_transparency = frame_color_transparency
self.frame_thickness = frame_thickness
def visualize(self, image_bgr, txt, topleft_xy):
txt_w, txt_h = self.get_text_size_wh(txt)
topleft_xy = tuple(map(int, topleft_xy))
x, y = topleft_xy
if self.frame_color_transparency < 1.0:
t = self.frame_thickness
image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :] = (
image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :]
* self.frame_color_transparency
+ np.array(self.frame_color_bgr) * (1.0 - self.frame_color_transparency)
).astype(np.float)
if self.fill_color_transparency < 1.0:
image_bgr[y : y + txt_h, x : x + txt_w, :] = (
image_bgr[y : y + txt_h, x : x + txt_w, :] * self.fill_color_transparency
+ np.array(self.fill_color_bgr) * (1.0 - self.fill_color_transparency)
).astype(np.float)
cv2.putText(
image_bgr,
txt,
topleft_xy,
self.font_face,
self.font_scale,
self.font_color_bgr,
self.font_line_thickness,
self.font_line_type,
)
return image_bgr
def get_text_size_wh(self, txt):
((txt_w, txt_h), _) = cv2.getTextSize(
txt, self.font_face, self.font_scale, self.font_line_thickness
)
return txt_w, txt_h
class CompoundVisualizer(object):
def __init__(self, visualizers):
self.visualizers = visualizers
def visualize(self, image_bgr, data):
assert len(data) == len(
self.visualizers
), "The number of datas {} should match the number of visualizers" " {}".format(
len(data), len(self.visualizers)
)
image = image_bgr
for i, visualizer in enumerate(self.visualizers):
image = visualizer.visualize(image, data[i])
return image
def __str__(self):
visualizer_str = ", ".join([str(v) for v in self.visualizers])
return "Compound Visualizer [{}]".format(visualizer_str)

View File

@@ -0,0 +1,37 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .base import RectangleVisualizer, TextVisualizer
class BoundingBoxVisualizer(object):
def __init__(self):
self.rectangle_visualizer = RectangleVisualizer()
def visualize(self, image_bgr, boxes_xywh):
for bbox_xywh in boxes_xywh:
image_bgr = self.rectangle_visualizer.visualize(image_bgr, bbox_xywh)
return image_bgr
class ScoredBoundingBoxVisualizer(object):
def __init__(self, bbox_visualizer_params=None, score_visualizer_params=None):
if bbox_visualizer_params is None:
bbox_visualizer_params = {}
if score_visualizer_params is None:
score_visualizer_params = {}
self.visualizer_bbox = RectangleVisualizer(**bbox_visualizer_params)
self.visualizer_score = TextVisualizer(**score_visualizer_params)
def visualize(self, image_bgr, scored_bboxes):
boxes_xywh, box_scores = scored_bboxes
assert len(boxes_xywh) == len(
box_scores
), "Number of bounding boxes {} should be equal to the number of scores {}".format(
len(boxes_xywh), len(box_scores)
)
for i, box_xywh in enumerate(boxes_xywh):
score_i = box_scores[i]
image_bgr = self.visualizer_bbox.visualize(image_bgr, box_xywh)
score_txt = "{0:6.4f}".format(score_i)
topleft_xy = box_xywh[0], box_xywh[1]
image_bgr = self.visualizer_score.visualize(image_bgr, score_txt, topleft_xy)
return image_bgr

View File

@@ -0,0 +1,593 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging
import numpy as np
from typing import Iterable, Optional, Tuple
import cv2
from ..data.structures import DensePoseDataRelative, DensePoseOutput, DensePoseResult
from .base import Boxes, Image, MatrixVisualizer, PointsVisualizer
class DensePoseResultsVisualizer(object):
def visualize(self, image_bgr: Image, densepose_result: Optional[DensePoseResult]) -> Image:
if densepose_result is None:
return image_bgr
context = self.create_visualization_context(image_bgr)
for i, result_encoded_w_shape in enumerate(densepose_result.results):
iuv_arr = DensePoseResult.decode_png_data(*result_encoded_w_shape)
bbox_xywh = densepose_result.boxes_xywh[i]
self.visualize_iuv_arr(context, iuv_arr, bbox_xywh)
image_bgr = self.context_to_image_bgr(context)
return image_bgr
class DensePoseMaskedColormapResultsVisualizer(DensePoseResultsVisualizer):
def __init__(
self,
data_extractor,
segm_extractor,
inplace=True,
cmap=cv2.COLORMAP_PARULA,
alpha=0.7,
val_scale=1.0,
):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha
)
self.data_extractor = data_extractor
self.segm_extractor = segm_extractor
def create_visualization_context(self, image_bgr: Image):
return image_bgr
def context_to_image_bgr(self, context):
return context
def get_image_bgr_from_context(self, context):
return context
def visualize_iuv_arr(self, context, iuv_arr, bbox_xywh):
image_bgr = self.get_image_bgr_from_context(context)
matrix = self.data_extractor(iuv_arr)
segm = self.segm_extractor(iuv_arr)
mask = np.zeros(matrix.shape, dtype=np.uint8)
mask[segm > 0] = 1
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
return image_bgr
def _extract_i_from_iuvarr(iuv_arr):
return iuv_arr[0, :, :]
def _extract_u_from_iuvarr(iuv_arr):
return iuv_arr[1, :, :]
def _extract_v_from_iuvarr(iuv_arr):
return iuv_arr[2, :, :]
class DensePoseResultsMplContourVisualizer(DensePoseResultsVisualizer):
def __init__(self, levels=10, **kwargs):
self.levels = levels
self.plot_args = kwargs
def create_visualization_context(self, image_bgr: Image):
import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
context = {}
context["image_bgr"] = image_bgr
dpi = 100
height_inches = float(image_bgr.shape[0]) / dpi
width_inches = float(image_bgr.shape[1]) / dpi
fig = plt.figure(figsize=(width_inches, height_inches), dpi=dpi)
plt.axes([0, 0, 1, 1])
plt.axis("off")
context["fig"] = fig
canvas = FigureCanvas(fig)
context["canvas"] = canvas
extent = (0, image_bgr.shape[1], image_bgr.shape[0], 0)
plt.imshow(image_bgr[:, :, ::-1], extent=extent)
return context
def context_to_image_bgr(self, context):
fig = context["fig"]
w, h = map(int, fig.get_size_inches() * fig.get_dpi())
canvas = context["canvas"]
canvas.draw()
image_1d = np.fromstring(canvas.tostring_rgb(), dtype="uint8")
image_rgb = image_1d.reshape(h, w, 3)
image_bgr = image_rgb[:, :, ::-1].copy()
return image_bgr
def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> Image:
import matplotlib.pyplot as plt
u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
extent = (
bbox_xywh[0],
bbox_xywh[0] + bbox_xywh[2],
bbox_xywh[1],
bbox_xywh[1] + bbox_xywh[3],
)
plt.contour(u, self.levels, extent=extent, **self.plot_args)
plt.contour(v, self.levels, extent=extent, **self.plot_args)
class DensePoseResultsCustomContourVisualizer(DensePoseResultsVisualizer):
"""
Contour visualization using marching squares
"""
def __init__(self, levels=10, **kwargs):
# TODO: colormap is hardcoded
cmap = cv2.COLORMAP_PARULA
if isinstance(levels, int):
self.levels = np.linspace(0, 1, levels)
else:
self.levels = levels
if "linewidths" in kwargs:
self.linewidths = kwargs["linewidths"]
else:
self.linewidths = [1] * len(self.levels)
self.plot_args = kwargs
img_colors_bgr = cv2.applyColorMap((self.levels * 255).astype(np.uint8), cmap)
self.level_colors_bgr = [
[int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
]
def create_visualization_context(self, image_bgr: Image):
return image_bgr
def context_to_image_bgr(self, context):
return context
def get_image_bgr_from_context(self, context):
return context
def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> Image:
image_bgr = self.get_image_bgr_from_context(context)
segm = _extract_i_from_iuvarr(iuv_arr)
u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
self._contours(image_bgr, u, segm, bbox_xywh)
self._contours(image_bgr, v, segm, bbox_xywh)
def _contours(self, image_bgr, arr, segm, bbox_xywh):
for part_idx in range(1, DensePoseDataRelative.N_PART_LABELS + 1):
mask = segm == part_idx
if not np.any(mask):
continue
arr_min = np.amin(arr[mask])
arr_max = np.amax(arr[mask])
I, J = np.nonzero(mask)
i0 = np.amin(I)
i1 = np.amax(I) + 1
j0 = np.amin(J)
j1 = np.amax(J) + 1
if (j1 == j0 + 1) or (i1 == i0 + 1):
continue
Nw = arr.shape[1] - 1
Nh = arr.shape[0] - 1
for level_idx, level in enumerate(self.levels):
if (level < arr_min) or (level > arr_max):
continue
vp = arr[i0:i1, j0:j1] >= level
bin_codes = vp[:-1, :-1] + vp[1:, :-1] * 2 + vp[1:, 1:] * 4 + vp[:-1, 1:] * 8
mp = mask[i0:i1, j0:j1]
bin_mask_codes = mp[:-1, :-1] + mp[1:, :-1] * 2 + mp[1:, 1:] * 4 + mp[:-1, 1:] * 8
it = np.nditer(bin_codes, flags=["multi_index"])
color_bgr = self.level_colors_bgr[level_idx]
linewidth = self.linewidths[level_idx]
while not it.finished:
if (it[0] != 0) and (it[0] != 15):
i, j = it.multi_index
if bin_mask_codes[i, j] != 0:
self._draw_line(
image_bgr,
arr,
mask,
level,
color_bgr,
linewidth,
it[0],
it.multi_index,
bbox_xywh,
Nw,
Nh,
(i0, j0),
)
it.iternext()
def _draw_line(
self,
image_bgr,
arr,
mask,
v,
color_bgr,
linewidth,
bin_code,
multi_idx,
bbox_xywh,
Nw,
Nh,
offset,
):
lines = self._bin_code_2_lines(arr, v, bin_code, multi_idx, Nw, Nh, offset)
x0, y0, w, h = bbox_xywh
x1 = x0 + w
y1 = y0 + h
for line in lines:
x0r, y0r = line[0]
x1r, y1r = line[1]
pt0 = (int(x0 + x0r * (x1 - x0)), int(y0 + y0r * (y1 - y0)))
pt1 = (int(x0 + x1r * (x1 - x0)), int(y0 + y1r * (y1 - y0)))
cv2.line(image_bgr, pt0, pt1, color_bgr, linewidth)
def _bin_code_2_lines(self, arr, v, bin_code, multi_idx, Nw, Nh, offset):
i0, j0 = offset
i, j = multi_idx
i += i0
j += j0
v0, v1, v2, v3 = arr[i, j], arr[i + 1, j], arr[i + 1, j + 1], arr[i, j + 1]
x0i = float(j) / Nw
y0j = float(i) / Nh
He = 1.0 / Nh
We = 1.0 / Nw
if (bin_code == 1) or (bin_code == 14):
a = (v - v0) / (v1 - v0)
b = (v - v0) / (v3 - v0)
pt1 = (x0i, y0j + a * He)
pt2 = (x0i + b * We, y0j)
return [(pt1, pt2)]
elif (bin_code == 2) or (bin_code == 13):
a = (v - v0) / (v1 - v0)
b = (v - v1) / (v2 - v1)
pt1 = (x0i, y0j + a * He)
pt2 = (x0i + b * We, y0j + He)
return [(pt1, pt2)]
elif (bin_code == 3) or (bin_code == 12):
a = (v - v0) / (v3 - v0)
b = (v - v1) / (v2 - v1)
pt1 = (x0i + a * We, y0j)
pt2 = (x0i + b * We, y0j + He)
return [(pt1, pt2)]
elif (bin_code == 4) or (bin_code == 11):
a = (v - v1) / (v2 - v1)
b = (v - v3) / (v2 - v3)
pt1 = (x0i + a * We, y0j + He)
pt2 = (x0i + We, y0j + b * He)
return [(pt1, pt2)]
elif (bin_code == 6) or (bin_code == 9):
a = (v - v0) / (v1 - v0)
b = (v - v3) / (v2 - v3)
pt1 = (x0i, y0j + a * He)
pt2 = (x0i + We, y0j + b * He)
return [(pt1, pt2)]
elif (bin_code == 7) or (bin_code == 8):
a = (v - v0) / (v3 - v0)
b = (v - v3) / (v2 - v3)
pt1 = (x0i + a * We, y0j)
pt2 = (x0i + We, y0j + b * He)
return [(pt1, pt2)]
elif bin_code == 5:
a1 = (v - v0) / (v1 - v0)
b1 = (v - v1) / (v2 - v1)
pt11 = (x0i, y0j + a1 * He)
pt12 = (x0i + b1 * We, y0j + He)
a2 = (v - v0) / (v3 - v0)
b2 = (v - v3) / (v2 - v3)
pt21 = (x0i + a2 * We, y0j)
pt22 = (x0i + We, y0j + b2 * He)
return [(pt11, pt12), (pt21, pt22)]
elif bin_code == 10:
a1 = (v - v0) / (v3 - v0)
b1 = (v - v0) / (v1 - v0)
pt11 = (x0i + a1 * We, y0j)
pt12 = (x0i, y0j + b1 * He)
a2 = (v - v1) / (v2 - v1)
b2 = (v - v3) / (v2 - v3)
pt21 = (x0i + a2 * We, y0j + He)
pt22 = (x0i + We, y0j + b2 * He)
return [(pt11, pt12), (pt21, pt22)]
return []
try:
import matplotlib
matplotlib.use("Agg")
DensePoseResultsContourVisualizer = DensePoseResultsMplContourVisualizer
except ModuleNotFoundError:
logger = logging.getLogger(__name__)
logger.warning("Could not import matplotlib, using custom contour visualizer")
DensePoseResultsContourVisualizer = DensePoseResultsCustomContourVisualizer
class DensePoseResultsFineSegmentationVisualizer(DensePoseMaskedColormapResultsVisualizer):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
super(DensePoseResultsFineSegmentationVisualizer, self).__init__(
_extract_i_from_iuvarr,
_extract_i_from_iuvarr,
inplace,
cmap,
alpha,
val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
)
class DensePoseResultsUVisualizer(DensePoseMaskedColormapResultsVisualizer):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
super(DensePoseResultsUVisualizer, self).__init__(
_extract_u_from_iuvarr, _extract_i_from_iuvarr, inplace, cmap, alpha, val_scale=1.0
)
class DensePoseResultsVVisualizer(DensePoseMaskedColormapResultsVisualizer):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
super(DensePoseResultsVVisualizer, self).__init__(
_extract_v_from_iuvarr, _extract_i_from_iuvarr, inplace, cmap, alpha, val_scale=1.0
)
class DensePoseOutputsFineSegmentationVisualizer(object):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace,
cmap=cmap,
val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
alpha=alpha,
)
def visualize(
self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
) -> Image:
if dp_output_with_bboxes is None:
return image_bgr
densepose_output, bboxes_xywh = dp_output_with_bboxes
S = densepose_output.S
I = densepose_output.I # noqa
U = densepose_output.U
V = densepose_output.V
N = S.size(0)
assert N == I.size(
0
), "densepose outputs S {} and I {}" " should have equal first dim size".format(
S.size(), I.size()
)
assert N == U.size(
0
), "densepose outputs S {} and U {}" " should have equal first dim size".format(
S.size(), U.size()
)
assert N == V.size(
0
), "densepose outputs S {} and V {}" " should have equal first dim size".format(
S.size(), V.size()
)
assert N == len(
bboxes_xywh
), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
len(bboxes_xywh), N
)
for n in range(N):
Sn = S[n].argmax(dim=0)
In = I[n].argmax(dim=0) * (Sn > 0).long()
matrix = In.cpu().numpy().astype(np.uint8)
mask = np.zeros(matrix.shape, dtype=np.uint8)
mask[matrix > 0] = 1
bbox_xywh = bboxes_xywh[n]
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
return image_bgr
class DensePoseOutputsUVisualizer(object):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha
)
def visualize(
self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
) -> Image:
if dp_output_with_bboxes is None:
return image_bgr
densepose_output, bboxes_xywh = dp_output_with_bboxes
assert isinstance(
densepose_output, DensePoseOutput
), "DensePoseOutput expected, {} encountered".format(type(densepose_output))
S = densepose_output.S
I = densepose_output.I # noqa
U = densepose_output.U
V = densepose_output.V
N = S.size(0)
assert N == I.size(
0
), "densepose outputs S {} and I {}" " should have equal first dim size".format(
S.size(), I.size()
)
assert N == U.size(
0
), "densepose outputs S {} and U {}" " should have equal first dim size".format(
S.size(), U.size()
)
assert N == V.size(
0
), "densepose outputs S {} and V {}" " should have equal first dim size".format(
S.size(), V.size()
)
assert N == len(
bboxes_xywh
), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
len(bboxes_xywh), N
)
for n in range(N):
Sn = S[n].argmax(dim=0)
In = I[n].argmax(dim=0) * (Sn > 0).long()
segmentation = In.cpu().numpy().astype(np.uint8)
mask = np.zeros(segmentation.shape, dtype=np.uint8)
mask[segmentation > 0] = 1
Un = U[n].cpu().numpy().astype(np.float32)
Uvis = np.zeros(segmentation.shape, dtype=np.float32)
for partId in range(Un.shape[0]):
Uvis[segmentation == partId] = Un[partId][segmentation == partId].clip(0, 1) * 255
bbox_xywh = bboxes_xywh[n]
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, Uvis, bbox_xywh)
return image_bgr
class DensePoseOutputsVVisualizer(object):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha
)
def visualize(
self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
) -> Image:
if dp_output_with_bboxes is None:
return image_bgr
densepose_output, bboxes_xywh = dp_output_with_bboxes
assert isinstance(
densepose_output, DensePoseOutput
), "DensePoseOutput expected, {} encountered".format(type(densepose_output))
S = densepose_output.S
I = densepose_output.I # noqa
U = densepose_output.U
V = densepose_output.V
N = S.size(0)
assert N == I.size(
0
), "densepose outputs S {} and I {}" " should have equal first dim size".format(
S.size(), I.size()
)
assert N == U.size(
0
), "densepose outputs S {} and U {}" " should have equal first dim size".format(
S.size(), U.size()
)
assert N == V.size(
0
), "densepose outputs S {} and V {}" " should have equal first dim size".format(
S.size(), V.size()
)
assert N == len(
bboxes_xywh
), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
len(bboxes_xywh), N
)
for n in range(N):
Sn = S[n].argmax(dim=0)
In = I[n].argmax(dim=0) * (Sn > 0).long()
segmentation = In.cpu().numpy().astype(np.uint8)
mask = np.zeros(segmentation.shape, dtype=np.uint8)
mask[segmentation > 0] = 1
Vn = V[n].cpu().numpy().astype(np.float32)
Vvis = np.zeros(segmentation.shape, dtype=np.float32)
for partId in range(Vn.size(0)):
Vvis[segmentation == partId] = Vn[partId][segmentation == partId].clip(0, 1) * 255
bbox_xywh = bboxes_xywh[n]
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, Vvis, bbox_xywh)
return image_bgr
class DensePoseDataCoarseSegmentationVisualizer(object):
"""
Visualizer for ground truth segmentation
"""
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace,
cmap=cmap,
val_scale=255.0 / DensePoseDataRelative.N_BODY_PARTS,
alpha=alpha,
)
def visualize(
self,
image_bgr: Image,
bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
) -> Image:
if bbox_densepose_datas is None:
return image_bgr
for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
matrix = densepose_data.segm.numpy()
mask = np.zeros(matrix.shape, dtype=np.uint8)
mask[matrix > 0] = 1
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh.numpy())
return image_bgr
class DensePoseDataPointsVisualizer(object):
def __init__(self, densepose_data_to_value_fn=None, cmap=cv2.COLORMAP_PARULA):
self.points_visualizer = PointsVisualizer()
self.densepose_data_to_value_fn = densepose_data_to_value_fn
self.cmap = cmap
def visualize(
self,
image_bgr: Image,
bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
) -> Image:
if bbox_densepose_datas is None:
return image_bgr
for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
x0, y0, w, h = bbox_xywh.numpy()
x = densepose_data.x.numpy() * w / 255.0 + x0
y = densepose_data.y.numpy() * h / 255.0 + y0
pts_xy = zip(x, y)
if self.densepose_data_to_value_fn is None:
image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy)
else:
v = self.densepose_data_to_value_fn(densepose_data)
img_colors_bgr = cv2.applyColorMap(v, self.cmap)
colors_bgr = [
[int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
]
image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy, colors_bgr)
return image_bgr
def _densepose_data_u_for_cmap(densepose_data):
u = np.clip(densepose_data.u.numpy(), 0, 1) * 255.0
return u.astype(np.uint8)
def _densepose_data_v_for_cmap(densepose_data):
v = np.clip(densepose_data.v.numpy(), 0, 1) * 255.0
return v.astype(np.uint8)
def _densepose_data_i_for_cmap(densepose_data):
i = (
np.clip(densepose_data.i.numpy(), 0.0, DensePoseDataRelative.N_PART_LABELS)
* 255.0
/ DensePoseDataRelative.N_PART_LABELS
)
return i.astype(np.uint8)
class DensePoseDataPointsUVisualizer(DensePoseDataPointsVisualizer):
def __init__(self):
super(DensePoseDataPointsUVisualizer, self).__init__(
densepose_data_to_value_fn=_densepose_data_u_for_cmap
)
class DensePoseDataPointsVVisualizer(DensePoseDataPointsVisualizer):
def __init__(self):
super(DensePoseDataPointsVVisualizer, self).__init__(
densepose_data_to_value_fn=_densepose_data_v_for_cmap
)
class DensePoseDataPointsIVisualizer(DensePoseDataPointsVisualizer):
def __init__(self):
super(DensePoseDataPointsIVisualizer, self).__init__(
densepose_data_to_value_fn=_densepose_data_i_for_cmap
)

View File

@@ -0,0 +1,152 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging
from typing import Sequence
import torch
from detectron2.layers.nms import batched_nms
from detectron2.structures.instances import Instances
from densepose.vis.bounding_box import BoundingBoxVisualizer, ScoredBoundingBoxVisualizer
from densepose.vis.densepose import DensePoseResultsVisualizer
from .base import CompoundVisualizer
Scores = Sequence[float]
def extract_scores_from_instances(instances: Instances, select=None):
if instances.has("scores"):
return instances.scores if select is None else instances.scores[select]
return None
def extract_boxes_xywh_from_instances(instances: Instances, select=None):
if instances.has("pred_boxes"):
boxes_xywh = instances.pred_boxes.tensor.clone()
boxes_xywh[:, 2] -= boxes_xywh[:, 0]
boxes_xywh[:, 3] -= boxes_xywh[:, 1]
return boxes_xywh if select is None else boxes_xywh[select]
return None
def create_extractor(visualizer: object):
"""
Create an extractor for the provided visualizer
"""
if isinstance(visualizer, CompoundVisualizer):
extractors = [create_extractor(v) for v in visualizer.visualizers]
return CompoundExtractor(extractors)
elif isinstance(visualizer, DensePoseResultsVisualizer):
return DensePoseResultExtractor()
elif isinstance(visualizer, ScoredBoundingBoxVisualizer):
return CompoundExtractor([extract_boxes_xywh_from_instances, extract_scores_from_instances])
elif isinstance(visualizer, BoundingBoxVisualizer):
return extract_boxes_xywh_from_instances
else:
logger = logging.getLogger(__name__)
logger.error(f"Could not create extractor for {visualizer}")
return None
class BoundingBoxExtractor(object):
"""
Extracts bounding boxes from instances
"""
def __call__(self, instances: Instances):
boxes_xywh = extract_boxes_xywh_from_instances(instances)
return boxes_xywh
class ScoredBoundingBoxExtractor(object):
"""
Extracts bounding boxes from instances
"""
def __call__(self, instances: Instances, select=None):
scores = extract_scores_from_instances(instances)
boxes_xywh = extract_boxes_xywh_from_instances(instances)
if (scores is None) or (boxes_xywh is None):
return (boxes_xywh, scores)
if select is not None:
scores = scores[select]
boxes_xywh = boxes_xywh[select]
return (boxes_xywh, scores)
class DensePoseResultExtractor(object):
"""
Extracts DensePose result from instances
"""
def __call__(self, instances: Instances, select=None):
boxes_xywh = extract_boxes_xywh_from_instances(instances)
if instances.has("pred_densepose") and (boxes_xywh is not None):
dpout = instances.pred_densepose
if select is not None:
dpout = dpout[select]
boxes_xywh = boxes_xywh[select]
return dpout.to_result(boxes_xywh)
else:
return None
class CompoundExtractor(object):
"""
Extracts data for CompoundVisualizer
"""
def __init__(self, extractors):
self.extractors = extractors
def __call__(self, instances: Instances, select=None):
datas = []
for extractor in self.extractors:
data = extractor(instances, select)
datas.append(data)
return datas
class NmsFilteredExtractor(object):
"""
Extracts data in the format accepted by NmsFilteredVisualizer
"""
def __init__(self, extractor, iou_threshold):
self.extractor = extractor
self.iou_threshold = iou_threshold
def __call__(self, instances: Instances, select=None):
scores = extract_scores_from_instances(instances)
boxes_xywh = extract_boxes_xywh_from_instances(instances)
if boxes_xywh is None:
return None
select_local_idx = batched_nms(
boxes_xywh,
scores,
torch.zeros(len(scores), dtype=torch.int32),
iou_threshold=self.iou_threshold,
).squeeze()
select_local = torch.zeros(len(boxes_xywh), dtype=torch.bool, device=boxes_xywh.device)
select_local[select_local_idx] = True
select = select_local if select is None else (select & select_local)
return self.extractor(instances, select=select)
class ScoreThresholdedExtractor(object):
"""
Extracts data in the format accepted by ScoreThresholdedVisualizer
"""
def __init__(self, extractor, min_score):
self.extractor = extractor
self.min_score = min_score
def __call__(self, instances: Instances, select=None):
scores = extract_scores_from_instances(instances)
if scores is None:
return None
select_local = scores > self.min_score
select = select_local if select is None else (select & select_local)
data = self.extractor(instances, select=select)
return data