Add at new repo again

This commit is contained in:
2025-01-28 21:48:35 +00:00
commit 6e660ddb3c
564 changed files with 75575 additions and 0 deletions

View File

@@ -0,0 +1,54 @@
# DensePose in Detectron2
**Dense Human Pose Estimation In The Wild**
_Rıza Alp Güler, Natalia Neverova, Iasonas Kokkinos_
[[`densepose.org`](https://densepose.org)] [[`arXiv`](https://arxiv.org/abs/1802.00434)] [[`BibTeX`](#CitingDensePose)]
Dense human pose estimation aims at mapping all human pixels of an RGB image to the 3D surface of the human body.
<div align="center">
<img src="https://drive.google.com/uc?export=view&id=1qfSOkpueo1kVZbXOuQJJhyagKjMgepsz" width="700px" />
</div>
In this repository, we provide the code to train and evaluate DensePose-RCNN. We also provide tools to visualize
DensePose annotation and results.
# Quick Start
See [ Getting Started ](doc/GETTING_STARTED.md)
# Model Zoo and Baselines
We provide a number of baseline results and trained models available for download. See [Model Zoo](doc/MODEL_ZOO.md) for details.
# License
Detectron2 is released under the [Apache 2.0 license](../../LICENSE)
## <a name="CitingDensePose"></a>Citing DensePose
If you use DensePose, please take the references from the following BibTeX entries:
For DensePose with estimated confidences:
```
@InProceedings{Neverova2019DensePoseConfidences,
title = {Correlated Uncertainty for Learning Dense Correspondences from Noisy Labels},
author = {Neverova, Natalia and Novotny, David and Vedaldi, Andrea},
journal = {Advances in Neural Information Processing Systems},
year = {2019},
}
```
For the original DensePose:
```
@InProceedings{Guler2018DensePose,
title={DensePose: Dense Human Pose Estimation In The Wild},
author={R\{i}za Alp G\"uler, Natalia Neverova, Iasonas Kokkinos},
journal={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year={2018}
}
```

View File

@@ -0,0 +1,318 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import argparse
import glob
import logging
import os
import pickle
import sys
from typing import Any, ClassVar, Dict, List
import torch
from detectron2.config import get_cfg
from detectron2.data.detection_utils import read_image
from detectron2.engine.defaults import DefaultPredictor
from detectron2.structures.boxes import BoxMode
from detectron2.structures.instances import Instances
from detectron2.utils.logger import setup_logger
from densepose import add_densepose_config
from densepose.utils.logger import verbosity_to_level
from densepose.vis.base import CompoundVisualizer
from densepose.vis.bounding_box import ScoredBoundingBoxVisualizer
from densepose.vis.densepose import (
DensePoseResultsContourVisualizer,
DensePoseResultsFineSegmentationVisualizer,
DensePoseResultsUVisualizer,
DensePoseResultsVVisualizer,
)
from densepose.vis.extractor import CompoundExtractor, create_extractor
DOC = """Apply Net - a tool to print / visualize DensePose results
"""
LOGGER_NAME = "apply_net"
logger = logging.getLogger(LOGGER_NAME)
_ACTION_REGISTRY: Dict[str, "Action"] = {}
class Action(object):
@classmethod
def add_arguments(cls: type, parser: argparse.ArgumentParser):
parser.add_argument(
"-v",
"--verbosity",
action="count",
help="Verbose mode. Multiple -v options increase the verbosity.",
)
def register_action(cls: type):
"""
Decorator for action classes to automate action registration
"""
global _ACTION_REGISTRY
_ACTION_REGISTRY[cls.COMMAND] = cls
return cls
class InferenceAction(Action):
@classmethod
def add_arguments(cls: type, parser: argparse.ArgumentParser):
super(InferenceAction, cls).add_arguments(parser)
parser.add_argument("cfg", metavar="<config>", help="Config file")
parser.add_argument("model", metavar="<model>", help="Model file")
parser.add_argument("input", metavar="<input>", help="Input data")
parser.add_argument(
"--opts",
help="Modify config options using the command-line 'KEY VALUE' pairs",
default=[],
nargs=argparse.REMAINDER,
)
@classmethod
def execute(cls: type, args: argparse.Namespace):
logger.info(f"Loading config from {args.cfg}")
opts = []
cfg = cls.setup_config(args.cfg, args.model, args, opts)
logger.info(f"Loading model from {args.model}")
predictor = DefaultPredictor(cfg)
logger.info(f"Loading data from {args.input}")
file_list = cls._get_input_file_list(args.input)
if len(file_list) == 0:
logger.warning(f"No input images for {args.input}")
return
context = cls.create_context(args)
for file_name in file_list:
img = read_image(file_name, format="BGR") # predictor expects BGR image.
with torch.no_grad():
outputs = predictor(img)["instances"]
cls.execute_on_outputs(context, {"file_name": file_name, "image": img}, outputs)
cls.postexecute(context)
@classmethod
def setup_config(
cls: type, config_fpath: str, model_fpath: str, args: argparse.Namespace, opts: List[str]
):
cfg = get_cfg()
add_densepose_config(cfg)
cfg.merge_from_file(config_fpath)
cfg.merge_from_list(args.opts)
if opts:
cfg.merge_from_list(opts)
cfg.MODEL.WEIGHTS = model_fpath
cfg.freeze()
return cfg
@classmethod
def _get_input_file_list(cls: type, input_spec: str):
if os.path.isdir(input_spec):
file_list = [
os.path.join(input_spec, fname)
for fname in os.listdir(input_spec)
if os.path.isfile(os.path.join(input_spec, fname))
]
elif os.path.isfile(input_spec):
file_list = [input_spec]
else:
file_list = glob.glob(input_spec)
return file_list
@register_action
class DumpAction(InferenceAction):
"""
Dump action that outputs results to a pickle file
"""
COMMAND: ClassVar[str] = "dump"
@classmethod
def add_parser(cls: type, subparsers: argparse._SubParsersAction):
parser = subparsers.add_parser(cls.COMMAND, help="Dump model outputs to a file.")
cls.add_arguments(parser)
parser.set_defaults(func=cls.execute)
@classmethod
def add_arguments(cls: type, parser: argparse.ArgumentParser):
super(DumpAction, cls).add_arguments(parser)
parser.add_argument(
"--output",
metavar="<dump_file>",
default="results.pkl",
help="File name to save dump to",
)
@classmethod
def execute_on_outputs(
cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances
):
image_fpath = entry["file_name"]
logger.info(f"Processing {image_fpath}")
result = {"file_name": image_fpath}
if outputs.has("scores"):
result["scores"] = outputs.get("scores").cpu()
if outputs.has("pred_boxes"):
result["pred_boxes_XYXY"] = outputs.get("pred_boxes").tensor.cpu()
if outputs.has("pred_densepose"):
boxes_XYWH = BoxMode.convert(
result["pred_boxes_XYXY"], BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
)
result["pred_densepose"] = outputs.get("pred_densepose").to_result(boxes_XYWH)
context["results"].append(result)
@classmethod
def create_context(cls: type, args: argparse.Namespace):
context = {"results": [], "out_fname": args.output}
return context
@classmethod
def postexecute(cls: type, context: Dict[str, Any]):
out_fname = context["out_fname"]
out_dir = os.path.dirname(out_fname)
if len(out_dir) > 0 and not os.path.exists(out_dir):
os.makedirs(out_dir)
with open(out_fname, "wb") as hFile:
pickle.dump(context["results"], hFile)
logger.info(f"Output saved to {out_fname}")
@register_action
class ShowAction(InferenceAction):
"""
Show action that visualizes selected entries on an image
"""
COMMAND: ClassVar[str] = "show"
VISUALIZERS: ClassVar[Dict[str, object]] = {
"dp_contour": DensePoseResultsContourVisualizer,
"dp_segm": DensePoseResultsFineSegmentationVisualizer,
"dp_u": DensePoseResultsUVisualizer,
"dp_v": DensePoseResultsVVisualizer,
"bbox": ScoredBoundingBoxVisualizer,
}
@classmethod
def add_parser(cls: type, subparsers: argparse._SubParsersAction):
parser = subparsers.add_parser(cls.COMMAND, help="Visualize selected entries")
cls.add_arguments(parser)
parser.set_defaults(func=cls.execute)
@classmethod
def add_arguments(cls: type, parser: argparse.ArgumentParser):
super(ShowAction, cls).add_arguments(parser)
parser.add_argument(
"visualizations",
metavar="<visualizations>",
help="Comma separated list of visualizations, possible values: "
"[{}]".format(",".join(sorted(cls.VISUALIZERS.keys()))),
)
parser.add_argument(
"--min_score",
metavar="<score>",
default=0.8,
type=float,
help="Minimum detection score to visualize",
)
parser.add_argument(
"--nms_thresh", metavar="<threshold>", default=None, type=float, help="NMS threshold"
)
parser.add_argument(
"--output",
metavar="<image_file>",
default="outputres.png",
help="File name to save output to",
)
@classmethod
def setup_config(
cls: type, config_fpath: str, model_fpath: str, args: argparse.Namespace, opts: List[str]
):
opts.append("MODEL.ROI_HEADS.SCORE_THRESH_TEST")
opts.append(str(args.min_score))
if args.nms_thresh is not None:
opts.append("MODEL.ROI_HEADS.NMS_THRESH_TEST")
opts.append(str(args.nms_thresh))
cfg = super(ShowAction, cls).setup_config(config_fpath, model_fpath, args, opts)
return cfg
@classmethod
def execute_on_outputs(
cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances
):
import cv2
import numpy as np
visualizer = context["visualizer"]
extractor = context["extractor"]
image_fpath = entry["file_name"]
logger.info(f"Processing {image_fpath}")
image = cv2.cvtColor(entry["image"], cv2.COLOR_BGR2GRAY)
image = np.tile(image[:, :, np.newaxis], [1, 1, 3])
data = extractor(outputs)
image_vis = visualizer.visualize(image, data)
entry_idx = context["entry_idx"] + 1
out_fname = cls._get_out_fname(entry_idx, context["out_fname"])
out_dir = os.path.dirname(out_fname)
if len(out_dir) > 0 and not os.path.exists(out_dir):
os.makedirs(out_dir)
cv2.imwrite(out_fname, image_vis)
logger.info(f"Output saved to {out_fname}")
context["entry_idx"] += 1
@classmethod
def postexecute(cls: type, context: Dict[str, Any]):
pass
@classmethod
def _get_out_fname(cls: type, entry_idx: int, fname_base: str):
base, ext = os.path.splitext(fname_base)
return base + ".{0:04d}".format(entry_idx) + ext
@classmethod
def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]:
vis_specs = args.visualizations.split(",")
visualizers = []
extractors = []
for vis_spec in vis_specs:
vis = cls.VISUALIZERS[vis_spec]()
visualizers.append(vis)
extractor = create_extractor(vis)
extractors.append(extractor)
visualizer = CompoundVisualizer(visualizers)
extractor = CompoundExtractor(extractors)
context = {
"extractor": extractor,
"visualizer": visualizer,
"out_fname": args.output,
"entry_idx": 0,
}
return context
def create_argument_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description=DOC,
formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120),
)
parser.set_defaults(func=lambda _: parser.print_help(sys.stdout))
subparsers = parser.add_subparsers(title="Actions")
for _, action in _ACTION_REGISTRY.items():
action.add_parser(subparsers)
return parser
def main():
parser = create_argument_parser()
args = parser.parse_args()
verbosity = args.verbosity if hasattr(args, "verbosity") else None
global logger
logger = setup_logger(name=LOGGER_NAME)
logger.setLevel(verbosity_to_level(verbosity))
args.func(args)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,47 @@
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
BACKBONE:
NAME: "build_resnet_fpn_backbone"
RESNETS:
OUT_FEATURES: ["res2", "res3", "res4", "res5"]
FPN:
IN_FEATURES: ["res2", "res3", "res4", "res5"]
ANCHOR_GENERATOR:
SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
RPN:
IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
PRE_NMS_TOPK_TEST: 1000 # Per FPN level
# Detectron1 uses 2000 proposals per-batch,
# (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
# which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
POST_NMS_TOPK_TRAIN: 1000
POST_NMS_TOPK_TEST: 1000
DENSEPOSE_ON: True
ROI_HEADS:
NAME: "DensePoseROIHeads"
IN_FEATURES: ["p2", "p3", "p4", "p5"]
NUM_CLASSES: 1
ROI_BOX_HEAD:
NAME: "FastRCNNConvFCHead"
NUM_FC: 2
POOLER_RESOLUTION: 7
POOLER_SAMPLING_RATIO: 2
POOLER_TYPE: "ROIAlign"
ROI_DENSEPOSE_HEAD:
NAME: "DensePoseV1ConvXHead"
POOLER_TYPE: "ROIAlign"
NUM_COARSE_SEGM_CHANNELS: 2
DATASETS:
TRAIN: ("densepose_coco_2014_train", "densepose_coco_2014_valminusminival")
TEST: ("densepose_coco_2014_minival",)
SOLVER:
IMS_PER_BATCH: 16
BASE_LR: 0.01
STEPS: (60000, 80000)
MAX_ITER: 90000
WARMUP_FACTOR: 0.1
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)

View File

@@ -0,0 +1,16 @@
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
RESNETS:
DEPTH: 101
ROI_DENSEPOSE_HEAD:
NAME: "DensePoseDeepLabHead"
UV_CONFIDENCE:
ENABLED: True
TYPE: "iid_iso"
POINT_REGRESSION_WEIGHTS: 0.0005
SOLVER:
CLIP_GRADIENTS:
ENABLED: True
MAX_ITER: 130000
STEPS: (100000, 120000)

View File

@@ -0,0 +1,16 @@
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
RESNETS:
DEPTH: 101
ROI_DENSEPOSE_HEAD:
NAME: "DensePoseDeepLabHead"
UV_CONFIDENCE:
ENABLED: True
TYPE: "indep_aniso"
POINT_REGRESSION_WEIGHTS: 0.0005
SOLVER:
CLIP_GRADIENTS:
ENABLED: True
MAX_ITER: 130000
STEPS: (100000, 120000)

View File

@@ -0,0 +1,10 @@
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
RESNETS:
DEPTH: 101
ROI_DENSEPOSE_HEAD:
NAME: "DensePoseDeepLabHead"
SOLVER:
MAX_ITER: 130000
STEPS: (100000, 120000)

View File

@@ -0,0 +1,16 @@
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
RESNETS:
DEPTH: 101
ROI_DENSEPOSE_HEAD:
UV_CONFIDENCE:
ENABLED: True
TYPE: "iid_iso"
POINT_REGRESSION_WEIGHTS: 0.0005
SOLVER:
CLIP_GRADIENTS:
ENABLED: True
MAX_ITER: 130000
STEPS: (100000, 120000)
WARMUP_FACTOR: 0.025

View File

@@ -0,0 +1,16 @@
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
RESNETS:
DEPTH: 101
ROI_DENSEPOSE_HEAD:
UV_CONFIDENCE:
ENABLED: True
TYPE: "indep_aniso"
POINT_REGRESSION_WEIGHTS: 0.0005
SOLVER:
CLIP_GRADIENTS:
ENABLED: True
MAX_ITER: 130000
STEPS: (100000, 120000)
WARMUP_FACTOR: 0.025

View File

@@ -0,0 +1,8 @@
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
RESNETS:
DEPTH: 101
SOLVER:
MAX_ITER: 130000
STEPS: (100000, 120000)

View File

@@ -0,0 +1,17 @@
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
RESNETS:
DEPTH: 101
ROI_DENSEPOSE_HEAD:
NUM_COARSE_SEGM_CHANNELS: 15
POOLER_RESOLUTION: 14
HEATMAP_SIZE: 56
INDEX_WEIGHTS: 2.0
PART_WEIGHTS: 0.3
POINT_REGRESSION_WEIGHTS: 0.1
DECODER_ON: False
SOLVER:
BASE_LR: 0.002
MAX_ITER: 130000
STEPS: (100000, 120000)

View File

@@ -0,0 +1,16 @@
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
RESNETS:
DEPTH: 50
ROI_DENSEPOSE_HEAD:
NAME: "DensePoseDeepLabHead"
UV_CONFIDENCE:
ENABLED: True
TYPE: "iid_iso"
POINT_REGRESSION_WEIGHTS: 0.0005
SOLVER:
CLIP_GRADIENTS:
ENABLED: True
MAX_ITER: 130000
STEPS: (100000, 120000)

View File

@@ -0,0 +1,16 @@
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
RESNETS:
DEPTH: 50
ROI_DENSEPOSE_HEAD:
NAME: "DensePoseDeepLabHead"
UV_CONFIDENCE:
ENABLED: True
TYPE: "indep_aniso"
POINT_REGRESSION_WEIGHTS: 0.0005
SOLVER:
CLIP_GRADIENTS:
ENABLED: True
MAX_ITER: 130000
STEPS: (100000, 120000)

View File

@@ -0,0 +1,10 @@
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
RESNETS:
DEPTH: 50
ROI_DENSEPOSE_HEAD:
NAME: "DensePoseDeepLabHead"
SOLVER:
MAX_ITER: 130000
STEPS: (100000, 120000)

View File

@@ -0,0 +1,16 @@
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
RESNETS:
DEPTH: 50
ROI_DENSEPOSE_HEAD:
UV_CONFIDENCE:
ENABLED: True
TYPE: "iid_iso"
POINT_REGRESSION_WEIGHTS: 0.0005
SOLVER:
CLIP_GRADIENTS:
ENABLED: True
MAX_ITER: 130000
STEPS: (100000, 120000)
WARMUP_FACTOR: 0.025

View File

@@ -0,0 +1,16 @@
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
RESNETS:
DEPTH: 50
ROI_DENSEPOSE_HEAD:
UV_CONFIDENCE:
ENABLED: True
TYPE: "indep_aniso"
POINT_REGRESSION_WEIGHTS: 0.0005
SOLVER:
CLIP_GRADIENTS:
ENABLED: True
MAX_ITER: 130000
STEPS: (100000, 120000)
WARMUP_FACTOR: 0.025

View File

@@ -0,0 +1,8 @@
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
RESNETS:
DEPTH: 50
SOLVER:
MAX_ITER: 130000
STEPS: (100000, 120000)

View File

@@ -0,0 +1,17 @@
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
RESNETS:
DEPTH: 50
ROI_DENSEPOSE_HEAD:
NUM_COARSE_SEGM_CHANNELS: 15
POOLER_RESOLUTION: 14
HEATMAP_SIZE: 56
INDEX_WEIGHTS: 2.0
PART_WEIGHTS: 0.3
POINT_REGRESSION_WEIGHTS: 0.1
DECODER_ON: False
SOLVER:
BASE_LR: 0.002
MAX_ITER: 130000
STEPS: (100000, 120000)

View File

@@ -0,0 +1,91 @@
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
BACKBONE:
NAME: "build_resnet_fpn_backbone"
RESNETS:
OUT_FEATURES: ["res2", "res3", "res4", "res5"]
FPN:
IN_FEATURES: ["res2", "res3", "res4", "res5"]
ANCHOR_GENERATOR:
SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
RPN:
IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
PRE_NMS_TOPK_TEST: 1000 # Per FPN level
# Detectron1 uses 2000 proposals per-batch,
# (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
# which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
POST_NMS_TOPK_TRAIN: 1000
POST_NMS_TOPK_TEST: 1000
ROI_HEADS:
NAME: "StandardROIHeads"
IN_FEATURES: ["p2", "p3", "p4", "p5"]
NUM_CLASSES: 1
ROI_BOX_HEAD:
NAME: "FastRCNNConvFCHead"
NUM_FC: 2
POOLER_RESOLUTION: 7
ROI_MASK_HEAD:
NAME: "MaskRCNNConvUpsampleHead"
NUM_CONV: 4
POOLER_RESOLUTION: 14
DATASETS:
TRAIN: ("base_coco_2017_train",)
TEST: ("base_coco_2017_val", "densepose_chimps")
CATEGORY_MAPS:
"base_coco_2017_train":
"16": 1 # bird -> person
"17": 1 # cat -> person
"18": 1 # dog -> person
"19": 1 # horse -> person
"20": 1 # sheep -> person
"21": 1 # cow -> person
"22": 1 # elephant -> person
"23": 1 # bear -> person
"24": 1 # zebra -> person
"25": 1 # girafe -> person
"base_coco_2017_val":
"16": 1 # bird -> person
"17": 1 # cat -> person
"18": 1 # dog -> person
"19": 1 # horse -> person
"20": 1 # sheep -> person
"21": 1 # cow -> person
"22": 1 # elephant -> person
"23": 1 # bear -> person
"24": 1 # zebra -> person
"25": 1 # girafe -> person
WHITELISTED_CATEGORIES:
"base_coco_2017_train":
- 1 # person
- 16 # bird
- 17 # cat
- 18 # dog
- 19 # horse
- 20 # sheep
- 21 # cow
- 22 # elephant
- 23 # bear
- 24 # zebra
- 25 # girafe
"base_coco_2017_val":
- 1 # person
- 16 # bird
- 17 # cat
- 18 # dog
- 19 # horse
- 20 # sheep
- 21 # cow
- 22 # elephant
- 23 # bear
- 24 # zebra
- 25 # girafe
SOLVER:
IMS_PER_BATCH: 16
BASE_LR: 0.02
STEPS: (60000, 80000)
MAX_ITER: 90000
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
VERSION: 2

View File

@@ -0,0 +1,7 @@
_BASE_: "Base-RCNN-FPN-MC.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: False
DENSEPOSE_ON: False
RESNETS:
DEPTH: 50

View File

@@ -0,0 +1,11 @@
_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
ROI_DENSEPOSE_HEAD:
NAME: "DensePoseDeepLabHead"
DATASETS:
TRAIN: ("densepose_coco_2014_minival_100",)
TEST: ("densepose_coco_2014_minival_100",)
SOLVER:
MAX_ITER: 40
STEPS: (30,)

View File

@@ -0,0 +1,13 @@
_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml"
MODEL:
WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl"
DATASETS:
TRAIN: ()
TEST: ("densepose_coco_2014_minival_100",)
TEST:
AUG:
ENABLED: True
MIN_SIZES: (400, 500, 600, 700, 800, 900, 1000, 1100, 1200)
MAX_SIZE: 4000
FLIP: True
EXPECTED_RESULTS: [["bbox_TTA", "AP", 61.74, 0.03], ["densepose_gps_TTA", "AP", 60.22, 0.03], ["densepose_gpsm_TTA", "AP", 63.85, 0.03]]

View File

@@ -0,0 +1,19 @@
_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
RESNETS:
DEPTH: 50
ROI_DENSEPOSE_HEAD:
UV_CONFIDENCE:
ENABLED: True
TYPE: "iid_iso"
POINT_REGRESSION_WEIGHTS: 0.0005
DATASETS:
TRAIN: ("densepose_coco_2014_minival_100",)
TEST: ("densepose_coco_2014_minival_100",)
SOLVER:
CLIP_GRADIENTS:
ENABLED: True
MAX_ITER: 40
STEPS: (30,)
WARMUP_FACTOR: 0.025

View File

@@ -0,0 +1,19 @@
_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
RESNETS:
DEPTH: 50
ROI_DENSEPOSE_HEAD:
UV_CONFIDENCE:
ENABLED: True
TYPE: "indep_aniso"
POINT_REGRESSION_WEIGHTS: 0.0005
DATASETS:
TRAIN: ("densepose_coco_2014_minival_100",)
TEST: ("densepose_coco_2014_minival_100",)
SOLVER:
CLIP_GRADIENTS:
ENABLED: True
MAX_ITER: 40
STEPS: (30,)
WARMUP_FACTOR: 0.025

View File

@@ -0,0 +1,8 @@
_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml"
MODEL:
WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl"
DATASETS:
TRAIN: ()
TEST: ("densepose_coco_2014_minival_100",)
TEST:
EXPECTED_RESULTS: [["bbox", "AP", 59.27, 0.025], ["densepose_gps", "AP", 60.11, 0.02], ["densepose_gpsm", "AP", 64.20, 0.02]]

View File

@@ -0,0 +1,9 @@
_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
DATASETS:
TRAIN: ("densepose_coco_2014_minival_100",)
TEST: ("densepose_coco_2014_minival_100",)
SOLVER:
MAX_ITER: 40
STEPS: (30,)

View File

@@ -0,0 +1,14 @@
_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
ROI_HEADS:
NUM_CLASSES: 1
DATASETS:
TRAIN: ("densepose_coco_2014_minival",)
TEST: ("densepose_coco_2014_minival",)
SOLVER:
MAX_ITER: 6000
STEPS: (5500, 5800)
TEST:
EXPECTED_RESULTS: [["bbox", "AP", 58.27, 1.0], ["densepose_gps", "AP", 42.47, 1.5], ["densepose_gpsm", "AP", 49.20, 1.5]]

View File

@@ -0,0 +1,9 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .data.datasets import builtin # just to register data
from .config import add_densepose_config, add_dataset_category_config
from .densepose_head import ROI_DENSEPOSE_HEAD_REGISTRY
from .evaluator import DensePoseCOCOEvaluator
from .roi_head import DensePoseROIHeads
from .data.structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
from .modeling.test_time_augmentation import DensePoseGeneralizedRCNNWithTTA
from .utils.transform import load_from_cfg

View File

@@ -0,0 +1,68 @@
# -*- coding = utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from detectron2.config import CfgNode as CN
def add_dataset_category_config(cfg: CN):
"""
Add config for additional category-related dataset options
- category whitelisting
- category mapping
"""
_C = cfg
_C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True)
_C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True)
def add_densepose_config(cfg: CN):
"""
Add config for densepose head.
"""
_C = cfg
_C.MODEL.DENSEPOSE_ON = True
_C.MODEL.ROI_DENSEPOSE_HEAD = CN()
_C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
# Number of parts used for point labels
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
_C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
_C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
_C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
_C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
_C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2 # 15 or 2
# Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
_C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
# Loss weights for annotation masks.(14 Parts)
_C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0
# Loss weights for surface parts. (24 Parts)
_C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0
# Loss weights for UV regression.
_C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01
# For Decoder
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = ""
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4
# For DeepLab head
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN()
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN"
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0
# Confidences
# Enable learning confidences (variances) along with the actual values
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False})
# UV confidence lower bound
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01
# Statistical model type for confidence learning, possible values:
# - "iid_iso": statistically independent identically distributed residuals
# with isotropic covariance
# - "indep_aniso": statistically independent residuals with anisotropic
# covariances
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso"

View File

@@ -0,0 +1,9 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .build import build_detection_test_loader, build_detection_train_loader
from .dataset_mapper import DatasetMapper
# ensure the builtin data are registered
from . import datasets
__all__ = [k for k in globals().keys() if not k.startswith("_")]

View File

@@ -0,0 +1,405 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import itertools
import logging
import numpy as np
import operator
from typing import Any, Callable, Collection, Dict, Iterable, List, Optional
import torch
from detectron2.config import CfgNode
from detectron2.data import samplers
from detectron2.data.build import (
load_proposals_into_dataset,
print_instances_class_histogram,
trivial_batch_collator,
worker_init_reset_seed,
)
from detectron2.data.catalog import DatasetCatalog, MetadataCatalog
from detectron2.data.common import AspectRatioGroupedDataset, DatasetFromList, MapDataset
from detectron2.utils.comm import get_world_size
from .dataset_mapper import DatasetMapper
from .datasets.coco import DENSEPOSE_KEYS_WITHOUT_MASK as DENSEPOSE_COCO_KEYS_WITHOUT_MASK
from .datasets.coco import DENSEPOSE_MASK_KEY as DENSEPOSE_COCO_MASK_KEY
__all__ = ["build_detection_train_loader", "build_detection_test_loader"]
Instance = Dict[str, Any]
InstancePredicate = Callable[[Instance], bool]
def _compute_num_images_per_worker(cfg: CfgNode):
num_workers = get_world_size()
images_per_batch = cfg.SOLVER.IMS_PER_BATCH
assert (
images_per_batch % num_workers == 0
), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
images_per_batch, num_workers
)
assert (
images_per_batch >= num_workers
), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
images_per_batch, num_workers
)
images_per_worker = images_per_batch // num_workers
return images_per_worker
def _map_category_id_to_contiguous_id(dataset_name: str, dataset_dicts: Iterable[Instance]):
meta = MetadataCatalog.get(dataset_name)
for dataset_dict in dataset_dicts:
for ann in dataset_dict["annotations"]:
ann["category_id"] = meta.thing_dataset_id_to_contiguous_id[ann["category_id"]]
def _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names: Iterable[str]):
# merge categories for all data
merged_categories = {}
for dataset_name in dataset_names:
meta = MetadataCatalog.get(dataset_name)
for cat_id, cat_name in meta.categories.items():
if cat_id not in merged_categories:
merged_categories[cat_id] = (cat_name, dataset_name)
continue
cat_name_other, dataset_name_other = merged_categories[cat_id]
if cat_name_other != cat_name:
raise ValueError(
f"Incompatible categories for category ID {cat_id}: "
f'dataset {dataset_name} value "{cat_name}", '
f'dataset {dataset_name_other} value "{cat_name_other}"'
)
merged_cat_id_to_cont_id = {}
for i, cat_id in enumerate(sorted(merged_categories.keys())):
merged_cat_id_to_cont_id[cat_id] = i
# add category maps to metadata
for dataset_name in dataset_names:
meta = MetadataCatalog.get(dataset_name)
categories = meta.get("categories")
meta.thing_classes = [categories[cat_id] for cat_id in sorted(categories.keys())]
meta.thing_dataset_id_to_contiguous_id = {
cat_id: merged_cat_id_to_cont_id[cat_id] for cat_id in sorted(categories.keys())
}
meta.thing_contiguous_id_to_dataset_id = {
merged_cat_id_to_cont_id[cat_id]: cat_id for cat_id in sorted(categories.keys())
}
def _maybe_create_general_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
def has_annotations(instance: Instance) -> bool:
return "annotations" in instance
def has_only_crowd_anotations(instance: Instance) -> bool:
for ann in instance["annotations"]:
if ann.get("is_crowd", 0) == 0:
return False
return True
def general_keep_instance_predicate(instance: Instance) -> bool:
return has_annotations(instance) and not has_only_crowd_anotations(instance)
if not cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS:
return None
return general_keep_instance_predicate
def _maybe_create_keypoints_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
min_num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
def has_sufficient_num_keypoints(instance: Instance) -> bool:
num_kpts = sum(
(np.array(ann["keypoints"][2::3]) > 0).sum()
for ann in instance["annotations"]
if "keypoints" in ann
)
return num_kpts >= min_num_keypoints
if cfg.MODEL.KEYPOINT_ON and (min_num_keypoints > 0):
return has_sufficient_num_keypoints
return None
def _maybe_create_mask_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
if not cfg.MODEL.MASK_ON:
return None
def has_mask_annotations(instance: Instance) -> bool:
return any("segmentation" in ann for ann in instance["annotations"])
return has_mask_annotations
def _maybe_create_densepose_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
if not cfg.MODEL.DENSEPOSE_ON:
return None
def has_densepose_annotations(instance: Instance) -> bool:
for ann in instance["annotations"]:
if all(key in ann for key in DENSEPOSE_COCO_KEYS_WITHOUT_MASK) and (
(DENSEPOSE_COCO_MASK_KEY in ann) or ("segmentation" in ann)
):
return True
return False
return has_densepose_annotations
def _maybe_create_specific_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
specific_predicate_creators = [
_maybe_create_keypoints_keep_instance_predicate,
_maybe_create_mask_keep_instance_predicate,
_maybe_create_densepose_keep_instance_predicate,
]
predicates = [creator(cfg) for creator in specific_predicate_creators]
predicates = [p for p in predicates if p is not None]
if not predicates:
return None
def combined_predicate(instance: Instance) -> bool:
return any(p(instance) for p in predicates)
return combined_predicate
def _get_train_keep_instance_predicate(cfg: CfgNode):
general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
combined_specific_keep_predicate = _maybe_create_specific_keep_instance_predicate(cfg)
def combined_general_specific_keep_predicate(instance: Instance) -> bool:
return general_keep_predicate(instance) and combined_specific_keep_predicate(instance)
if (general_keep_predicate is None) and (combined_specific_keep_predicate is None):
return None
if general_keep_predicate is None:
return combined_specific_keep_predicate
if combined_specific_keep_predicate is None:
return general_keep_predicate
return combined_general_specific_keep_predicate
def _get_test_keep_instance_predicate(cfg: CfgNode):
general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
return general_keep_predicate
def _maybe_filter_and_map_categories(
dataset_name: str, dataset_dicts: List[Instance]
) -> List[Instance]:
meta = MetadataCatalog.get(dataset_name)
whitelisted_categories = meta.get("whitelisted_categories")
category_map = meta.get("category_map", {})
if whitelisted_categories is None and not category_map:
return dataset_dicts
filtered_dataset_dicts = []
for dataset_dict in dataset_dicts:
anns = []
for ann in dataset_dict["annotations"]:
cat_id = ann["category_id"]
if whitelisted_categories is not None and cat_id not in whitelisted_categories:
continue
ann["category_id"] = category_map.get(cat_id, cat_id)
anns.append(ann)
dataset_dict["annotations"] = anns
filtered_dataset_dicts.append(dataset_dict)
return filtered_dataset_dicts
def _add_category_whitelists_to_metadata(cfg: CfgNode):
for dataset_name, whitelisted_cat_ids in cfg.DATASETS.WHITELISTED_CATEGORIES.items():
meta = MetadataCatalog.get(dataset_name)
meta.whitelisted_categories = whitelisted_cat_ids
logger = logging.getLogger(__name__)
logger.info(
"Whitelisted categories for dataset {}: {}".format(
dataset_name, meta.whitelisted_categories
)
)
def _add_category_maps_to_metadata(cfg: CfgNode):
for dataset_name, category_map in cfg.DATASETS.CATEGORY_MAPS.items():
category_map = {
int(cat_id_src): int(cat_id_dst) for cat_id_src, cat_id_dst in category_map.items()
}
meta = MetadataCatalog.get(dataset_name)
meta.category_map = category_map
logger = logging.getLogger(__name__)
logger.info("Category maps for dataset {}: {}".format(dataset_name, meta.category_map))
def combine_detection_dataset_dicts(
dataset_names: Collection[str],
keep_instance_predicate: Optional[InstancePredicate] = None,
proposal_files: Optional[Collection[str]] = None,
) -> List[Instance]:
"""
Load and prepare dataset dicts for training / testing
Args:
dataset_names (Collection[str]): a list of dataset names
keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate
applied to instance dicts which defines whether to keep the instance
proposal_files (Collection[str]): if given, a list of object proposal files
that match each dataset in `dataset_names`.
"""
assert len(dataset_names)
if proposal_files is None:
proposal_files = [None] * len(dataset_names)
assert len(dataset_names) == len(proposal_files)
# load annotations and dataset metadata
dataset_map = {}
for dataset_name in dataset_names:
dataset_dicts = DatasetCatalog.get(dataset_name)
dataset_map[dataset_name] = dataset_dicts
# initialize category maps
_add_category_id_to_contiguous_id_maps_to_metadata(dataset_names)
# apply category maps
all_datasets_dicts = []
for dataset_name, proposal_file in zip(dataset_names, proposal_files):
dataset_dicts = dataset_map[dataset_name]
assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!"
if proposal_file is not None:
dataset_dicts = load_proposals_into_dataset(dataset_dicts, proposal_file)
dataset_dicts = _maybe_filter_and_map_categories(dataset_name, dataset_dicts)
_map_category_id_to_contiguous_id(dataset_name, dataset_dicts)
print_instances_class_histogram(
dataset_dicts, MetadataCatalog.get(dataset_name).thing_classes
)
all_datasets_dicts.append(dataset_dicts)
if keep_instance_predicate is not None:
all_datasets_dicts_plain = [
d
for d in itertools.chain.from_iterable(all_datasets_dicts)
if keep_instance_predicate(d)
]
else:
all_datasets_dicts_plain = list(itertools.chain.from_iterable(all_datasets_dicts))
return all_datasets_dicts_plain
def build_detection_train_loader(cfg: CfgNode, mapper=None):
"""
A data loader is created in a way similar to that of Detectron2.
The main differences are:
- it allows to combine data with different but compatible object category sets
The data loader is created by the following steps:
1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts.
2. Start workers to work on the dicts. Each worker will:
* Map each metadata dict into another format to be consumed by the model.
* Batch them by simply putting dicts into a list.
The batched ``list[mapped_dict]`` is what this dataloader will return.
Args:
cfg (CfgNode): the config
mapper (callable): a callable which takes a sample (dict) from dataset and
returns the format to be consumed by the model.
By default it will be `DatasetMapper(cfg, True)`.
Returns:
an infinite iterator of training data
"""
images_per_worker = _compute_num_images_per_worker(cfg)
_add_category_whitelists_to_metadata(cfg)
_add_category_maps_to_metadata(cfg)
dataset_dicts = combine_detection_dataset_dicts(
cfg.DATASETS.TRAIN,
keep_instance_predicate=_get_train_keep_instance_predicate(cfg),
proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
)
dataset = DatasetFromList(dataset_dicts, copy=False)
if mapper is None:
mapper = DatasetMapper(cfg, True)
dataset = MapDataset(dataset, mapper)
sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
logger = logging.getLogger(__name__)
logger.info("Using training sampler {}".format(sampler_name))
if sampler_name == "TrainingSampler":
sampler = samplers.TrainingSampler(len(dataset))
elif sampler_name == "RepeatFactorTrainingSampler":
sampler = samplers.RepeatFactorTrainingSampler(
dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD
)
else:
raise ValueError("Unknown training sampler: {}".format(sampler_name))
if cfg.DATALOADER.ASPECT_RATIO_GROUPING:
data_loader = torch.utils.data.DataLoader(
dataset,
sampler=sampler,
num_workers=cfg.DATALOADER.NUM_WORKERS,
batch_sampler=None,
collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements
worker_init_fn=worker_init_reset_seed,
) # yield individual mapped dict
data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker)
else:
batch_sampler = torch.utils.data.sampler.BatchSampler(
sampler, images_per_worker, drop_last=True
)
# drop_last so the batch always have the same size
data_loader = torch.utils.data.DataLoader(
dataset,
num_workers=cfg.DATALOADER.NUM_WORKERS,
batch_sampler=batch_sampler,
collate_fn=trivial_batch_collator,
worker_init_fn=worker_init_reset_seed,
)
return data_loader
def build_detection_test_loader(cfg, dataset_name, mapper=None):
"""
Similar to `build_detection_train_loader`.
But this function uses the given `dataset_name` argument (instead of the names in cfg),
and uses batch size 1.
Args:
cfg: a detectron2 CfgNode
dataset_name (str): a name of the dataset that's available in the DatasetCatalog
mapper (callable): a callable which takes a sample (dict) from dataset
and returns the format to be consumed by the model.
By default it will be `DatasetMapper(cfg, False)`.
Returns:
DataLoader: a torch DataLoader, that loads the given detection
dataset, with test-time transformation and batching.
"""
_add_category_whitelists_to_metadata(cfg)
_add_category_maps_to_metadata(cfg)
dataset_dicts = combine_detection_dataset_dicts(
[dataset_name],
keep_instance_predicate=_get_test_keep_instance_predicate(cfg),
proposal_files=[
cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]
]
if cfg.MODEL.LOAD_PROPOSALS
else None,
)
dataset = DatasetFromList(dataset_dicts)
if mapper is None:
mapper = DatasetMapper(cfg, False)
dataset = MapDataset(dataset, mapper)
sampler = samplers.InferenceSampler(len(dataset))
# Always use 1 image per worker during inference since this is the
# standard when reporting inference time in papers.
batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)
data_loader = torch.utils.data.DataLoader(
dataset,
num_workers=cfg.DATALOADER.NUM_WORKERS,
batch_sampler=batch_sampler,
collate_fn=trivial_batch_collator,
)
return data_loader

View File

@@ -0,0 +1,118 @@
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import copy
import torch
from fvcore.common.file_io import PathManager
from detectron2.data import MetadataCatalog
from detectron2.data import detection_utils as utils
from detectron2.data import transforms as T
from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
class DatasetMapper:
"""
A customized version of `detectron2.data.DatasetMapper`
"""
def __init__(self, cfg, is_train=True):
self.tfm_gens = utils.build_transform_gen(cfg, is_train)
# fmt: off
self.img_format = cfg.INPUT.FORMAT
self.mask_on = cfg.MODEL.MASK_ON
self.keypoint_on = cfg.MODEL.KEYPOINT_ON
self.densepose_on = cfg.MODEL.DENSEPOSE_ON
assert not cfg.MODEL.LOAD_PROPOSALS, "not supported yet"
# fmt: on
if self.keypoint_on and is_train:
# Flip only makes sense in training
self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
else:
self.keypoint_hflip_indices = None
if self.densepose_on:
densepose_transform_srcs = [
MetadataCatalog.get(ds).densepose_transform_src
for ds in cfg.DATASETS.TRAIN + cfg.DATASETS.TEST
]
assert len(densepose_transform_srcs) > 0
# TODO: check that DensePose transformation data is the same for
# all the data. Otherwise one would have to pass DB ID with
# each entry to select proper transformation data. For now, since
# all DensePose annotated data uses the same data semantics, we
# omit this check.
densepose_transform_data_fpath = PathManager.get_local_path(densepose_transform_srcs[0])
self.densepose_transform_data = DensePoseTransformData.load(
densepose_transform_data_fpath
)
self.is_train = is_train
def __call__(self, dataset_dict):
"""
Args:
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
Returns:
dict: a format that builtin models in detectron2 accept
"""
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
utils.check_image_size(dataset_dict, image)
image, transforms = T.apply_transform_gens(self.tfm_gens, image)
image_shape = image.shape[:2] # h, w
dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
if not self.is_train:
dataset_dict.pop("annotations", None)
return dataset_dict
for anno in dataset_dict["annotations"]:
if not self.mask_on:
anno.pop("segmentation", None)
if not self.keypoint_on:
anno.pop("keypoints", None)
# USER: Implement additional transformations if you have other types of data
# USER: Don't call transpose_densepose if you don't need
annos = [
self._transform_densepose(
utils.transform_instance_annotations(
obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
),
transforms,
)
for obj in dataset_dict.pop("annotations")
if obj.get("iscrowd", 0) == 0
]
instances = utils.annotations_to_instances(annos, image_shape)
if len(annos) and "densepose" in annos[0]:
gt_densepose = [obj["densepose"] for obj in annos]
instances.gt_densepose = DensePoseList(gt_densepose, instances.gt_boxes, image_shape)
dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()]
return dataset_dict
def _transform_densepose(self, annotation, transforms):
if not self.densepose_on:
return annotation
# Handle densepose annotations
is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
if is_valid:
densepose_data = DensePoseDataRelative(annotation, cleanup=True)
densepose_data.apply_transform(transforms, self.densepose_transform_data)
annotation["densepose"] = densepose_data
else:
# logger = logging.getLogger(__name__)
# logger.debug("Could not load DensePose annotation: {}".format(reason_not_valid))
DensePoseDataRelative.cleanup_annotation(annotation)
# NOTE: annotations for certain instances may be unavailable.
# 'None' is accepted by the DensePostList data structure.
annotation["densepose"] = None
return annotation

View File

@@ -0,0 +1,5 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from . import builtin # ensure the builtin data are registered
__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")]

View File

@@ -0,0 +1,10 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .coco import BASE_DATASETS as BASE_COCO_DATASETS
from .coco import DATASETS as COCO_DATASETS
from .coco import register_datasets as register_coco_datasets
DEFAULT_DATASETS_ROOT = "data"
register_coco_datasets(COCO_DATASETS, DEFAULT_DATASETS_ROOT)
register_coco_datasets(BASE_COCO_DATASETS, DEFAULT_DATASETS_ROOT)

View File

@@ -0,0 +1,314 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import contextlib
import io
import logging
import os
from dataclasses import dataclass
from typing import Any, Dict, Iterable, List, Optional
from fvcore.common.file_io import PathManager
from fvcore.common.timer import Timer
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
DENSEPOSE_MASK_KEY = "dp_masks"
DENSEPOSE_KEYS_WITHOUT_MASK = ["dp_x", "dp_y", "dp_I", "dp_U", "dp_V"]
DENSEPOSE_KEYS = DENSEPOSE_KEYS_WITHOUT_MASK + [DENSEPOSE_MASK_KEY]
DENSEPOSE_METADATA_URL_PREFIX = "https://dl.fbaipublicfiles.com/densepose/data/"
@dataclass
class CocoDatasetInfo:
name: str
images_root: str
annotations_fpath: str
DATASETS = [
CocoDatasetInfo(
name="densepose_coco_2014_train",
images_root="coco/train2014",
annotations_fpath="coco/annotations/densepose_train2014.json",
),
CocoDatasetInfo(
name="densepose_coco_2014_minival",
images_root="coco/val2014",
annotations_fpath="coco/annotations/densepose_minival2014.json",
),
CocoDatasetInfo(
name="densepose_coco_2014_minival_100",
images_root="coco/val2014",
annotations_fpath="coco/annotations/densepose_minival2014_100.json",
),
CocoDatasetInfo(
name="densepose_coco_2014_valminusminival",
images_root="coco/val2014",
annotations_fpath="coco/annotations/densepose_valminusminival2014.json",
),
CocoDatasetInfo(
name="densepose_chimps",
images_root="densepose_evolution/densepose_chimps",
annotations_fpath="densepose_evolution/annotations/densepose_chimps_densepose.json",
),
]
BASE_DATASETS = [
CocoDatasetInfo(
name="base_coco_2017_train",
images_root="coco/train2017",
annotations_fpath="coco/annotations/instances_train2017.json",
),
CocoDatasetInfo(
name="base_coco_2017_val",
images_root="coco/val2017",
annotations_fpath="coco/annotations/instances_val2017.json",
),
CocoDatasetInfo(
name="base_coco_2017_val_100",
images_root="coco/val2017",
annotations_fpath="coco/annotations/instances_val2017_100.json",
),
]
def _is_relative_local_path(path: os.PathLike):
path_str = os.fsdecode(path)
return ("://" not in path_str) and not os.path.isabs(path)
def _maybe_prepend_base_path(base_path: Optional[os.PathLike], path: os.PathLike):
"""
Prepends the provided path with a base path prefix if:
1) base path is not None;
2) path is a local path
"""
if base_path is None:
return path
if _is_relative_local_path(path):
return os.path.join(base_path, path)
return path
def get_metadata(base_path: Optional[os.PathLike]) -> Dict[str, Any]:
"""
Returns metadata associated with COCO DensePose data
Args:
base_path: Optional[os.PathLike]
Base path used to load metadata from
Returns:
Dict[str, Any]
Metadata in the form of a dictionary
"""
meta = {
"densepose_transform_src": _maybe_prepend_base_path(
base_path, "UV_symmetry_transforms.mat"
),
"densepose_smpl_subdiv": _maybe_prepend_base_path(base_path, "SMPL_subdiv.mat"),
"densepose_smpl_subdiv_transform": _maybe_prepend_base_path(
base_path, "SMPL_SUBDIV_TRANSFORM.mat"
),
}
return meta
def _load_coco_annotations(json_file: str):
"""
Load COCO annotations from a JSON file
Args:
json_file: str
Path to the file to load annotations from
Returns:
Instance of `pycocotools.coco.COCO` that provides access to annotations
data
"""
from pycocotools.coco import COCO
logger = logging.getLogger(__name__)
timer = Timer()
with contextlib.redirect_stdout(io.StringIO()):
coco_api = COCO(json_file)
if timer.seconds() > 1:
logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
return coco_api
def _add_categories_metadata(dataset_name: str, categories: Dict[str, Any]):
meta = MetadataCatalog.get(dataset_name)
meta.categories = {c["id"]: c["name"] for c in categories}
logger = logging.getLogger(__name__)
logger.info("Dataset {} categories: {}".format(dataset_name, categories))
def _verify_annotations_have_unique_ids(json_file: str, anns: List[List[Dict[str, Any]]]):
if "minival" in json_file:
# Skip validation on COCO2014 valminusminival and minival annotations
# The ratio of buggy annotations there is tiny and does not affect accuracy
# Therefore we explicitly white-list them
return
ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
json_file
)
def _maybe_add_bbox(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
if "bbox" not in ann_dict:
return
obj["bbox"] = ann_dict["bbox"]
obj["bbox_mode"] = BoxMode.XYWH_ABS
def _maybe_add_segm(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
if "segmentation" not in ann_dict:
return
segm = ann_dict["segmentation"]
if not isinstance(segm, dict):
# filter out invalid polygons (< 3 points)
segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
if len(segm) == 0:
return
obj["segmentation"] = segm
def _maybe_add_keypoints(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
if "keypoints" not in ann_dict:
return
keypts = ann_dict["keypoints"] # list[int]
for idx, v in enumerate(keypts):
if idx % 3 != 2:
# COCO's segmentation coordinates are floating points in [0, H or W],
# but keypoint coordinates are integers in [0, H-1 or W-1]
# Therefore we assume the coordinates are "pixel indices" and
# add 0.5 to convert to floating point coordinates.
keypts[idx] = v + 0.5
obj["keypoints"] = keypts
def _maybe_add_densepose(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
for key in DENSEPOSE_KEYS:
if key in ann_dict:
obj[key] = ann_dict[key]
def _combine_images_with_annotations(
dataset_name: str,
image_root: str,
img_datas: Iterable[Dict[str, Any]],
ann_datas: Iterable[Iterable[Dict[str, Any]]],
):
ann_keys = ["iscrowd", "category_id"]
dataset_dicts = []
for img_dict, ann_dicts in zip(img_datas, ann_datas):
record = {}
record["file_name"] = os.path.join(image_root, img_dict["file_name"])
record["height"] = img_dict["height"]
record["width"] = img_dict["width"]
record["image_id"] = img_dict["id"]
record["dataset"] = dataset_name
objs = []
for ann_dict in ann_dicts:
assert ann_dict["image_id"] == record["image_id"]
assert ann_dict.get("ignore", 0) == 0
obj = {key: ann_dict[key] for key in ann_keys if key in ann_dict}
_maybe_add_bbox(obj, ann_dict)
_maybe_add_segm(obj, ann_dict)
_maybe_add_keypoints(obj, ann_dict)
_maybe_add_densepose(obj, ann_dict)
objs.append(obj)
record["annotations"] = objs
dataset_dicts.append(record)
return dataset_dicts
def load_coco_json(annotations_json_file: str, image_root: str, dataset_name: str):
"""
Loads a JSON file with annotations in COCO instances format.
Replaces `detectron2.data.data.coco.load_coco_json` to handle metadata
in a more flexible way. Postpones category mapping to a later stage to be
able to combine several data with different (but coherent) sets of
categories.
Args:
annotations_json_file: str
Path to the JSON file with annotations in COCO instances format.
image_root: str
directory that contains all the images
dataset_name: str
the name that identifies a dataset, e.g. "densepose_coco_2014_train"
extra_annotation_keys: Optional[List[str]]
If provided, these keys are used to extract additional data from
the annotations.
"""
coco_api = _load_coco_annotations(PathManager.get_local_path(annotations_json_file))
_add_categories_metadata(dataset_name, coco_api.loadCats(coco_api.getCatIds()))
# sort indices for reproducible results
img_ids = sorted(coco_api.imgs.keys())
# imgs is a list of dicts, each looks something like:
# {'license': 4,
# 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
# 'file_name': 'COCO_val2014_000000001268.jpg',
# 'height': 427,
# 'width': 640,
# 'date_captured': '2013-11-17 05:57:24',
# 'id': 1268}
imgs = coco_api.loadImgs(img_ids)
logger = logging.getLogger(__name__)
logger.info("Loaded {} images in COCO format from {}".format(len(imgs), annotations_json_file))
# anns is a list[list[dict]], where each dict is an annotation
# record for an object. The inner list enumerates the objects in an image
# and the outer list enumerates over images.
anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
_verify_annotations_have_unique_ids(annotations_json_file, anns)
dataset_records = _combine_images_with_annotations(dataset_name, image_root, imgs, anns)
return dataset_records
def register_dataset(dataset_data: CocoDatasetInfo, datasets_root: Optional[os.PathLike] = None):
"""
Registers provided COCO DensePose dataset
Args:
dataset_data: CocoDatasetInfo
Dataset data
datasets_root: Optional[os.PathLike]
Datasets root folder (default: None)
"""
annotations_fpath = _maybe_prepend_base_path(datasets_root, dataset_data.annotations_fpath)
images_root = _maybe_prepend_base_path(datasets_root, dataset_data.images_root)
def load_annotations():
return load_coco_json(
annotations_json_file=annotations_fpath,
image_root=images_root,
dataset_name=dataset_data.name,
)
DatasetCatalog.register(dataset_data.name, load_annotations)
MetadataCatalog.get(dataset_data.name).set(
json_file=annotations_fpath,
image_root=images_root,
**get_metadata(DENSEPOSE_METADATA_URL_PREFIX)
)
def register_datasets(
datasets_data: Iterable[CocoDatasetInfo], datasets_root: Optional[os.PathLike] = None
):
"""
Registers provided COCO DensePose data
Args:
datasets_data: Iterable[CocoDatasetInfo]
An iterable of dataset datas
datasets_root: Optional[os.PathLike]
Datasets root folder (default: None)
"""
for dataset_data in datasets_data:
register_dataset(dataset_data, datasets_root)

View File

@@ -0,0 +1,579 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import base64
import numpy as np
from io import BytesIO
import torch
from PIL import Image
from torch.nn import functional as F
class DensePoseTransformData(object):
# Horizontal symmetry label transforms used for horizontal flip
MASK_LABEL_SYMMETRIES = [0, 1, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14]
# fmt: off
POINT_LABEL_SYMMETRIES = [ 0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24, 23] # noqa
# fmt: on
def __init__(self, uv_symmetries):
self.mask_label_symmetries = DensePoseTransformData.MASK_LABEL_SYMMETRIES
self.point_label_symmetries = DensePoseTransformData.POINT_LABEL_SYMMETRIES
self.uv_symmetries = uv_symmetries
@staticmethod
def load(fpath):
import scipy.io
uv_symmetry_map = scipy.io.loadmat(fpath)
uv_symmetry_map_torch = {}
for key in ["U_transforms", "V_transforms"]:
uv_symmetry_map_torch[key] = []
map_src = uv_symmetry_map[key]
map_dst = uv_symmetry_map_torch[key]
for i in range(map_src.shape[1]):
map_dst.append(torch.from_numpy(map_src[0, i]).to(dtype=torch.float))
uv_symmetry_map_torch[key] = torch.stack(map_dst, dim=0).to(
device=torch.cuda.current_device()
)
transform_data = DensePoseTransformData(uv_symmetry_map_torch)
return transform_data
class DensePoseDataRelative(object):
"""
Dense pose relative annotations that can be applied to any bounding box:
x - normalized X coordinates [0, 255] of annotated points
y - normalized Y coordinates [0, 255] of annotated points
i - body part labels 0,...,24 for annotated points
u - body part U coordinates [0, 1] for annotated points
v - body part V coordinates [0, 1] for annotated points
segm - 256x256 segmentation mask with values 0,...,14
To obtain absolute x and y data wrt some bounding box one needs to first
divide the data by 256, multiply by the respective bounding box size
and add bounding box offset:
x_img = x0 + x_norm * w / 256.0
y_img = y0 + y_norm * h / 256.0
Segmentation masks are typically sampled to get image-based masks.
"""
# Key for normalized X coordinates in annotation dict
X_KEY = "dp_x"
# Key for normalized Y coordinates in annotation dict
Y_KEY = "dp_y"
# Key for U part coordinates in annotation dict
U_KEY = "dp_U"
# Key for V part coordinates in annotation dict
V_KEY = "dp_V"
# Key for I point labels in annotation dict
I_KEY = "dp_I"
# Key for segmentation mask in annotation dict
S_KEY = "dp_masks"
# Number of body parts in segmentation masks
N_BODY_PARTS = 14
# Number of parts in point labels
N_PART_LABELS = 24
MASK_SIZE = 256
def __init__(self, annotation, cleanup=False):
is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
assert is_valid, "Invalid DensePose annotations: {}".format(reason_not_valid)
self.x = torch.as_tensor(annotation[DensePoseDataRelative.X_KEY])
self.y = torch.as_tensor(annotation[DensePoseDataRelative.Y_KEY])
self.i = torch.as_tensor(annotation[DensePoseDataRelative.I_KEY])
self.u = torch.as_tensor(annotation[DensePoseDataRelative.U_KEY])
self.v = torch.as_tensor(annotation[DensePoseDataRelative.V_KEY])
self.segm = DensePoseDataRelative.extract_segmentation_mask(annotation)
self.device = torch.device("cpu")
if cleanup:
DensePoseDataRelative.cleanup_annotation(annotation)
def to(self, device):
if self.device == device:
return self
new_data = DensePoseDataRelative.__new__(DensePoseDataRelative)
new_data.x = self.x
new_data.x = self.x.to(device)
new_data.y = self.y.to(device)
new_data.i = self.i.to(device)
new_data.u = self.u.to(device)
new_data.v = self.v.to(device)
new_data.segm = self.segm.to(device)
new_data.device = device
return new_data
@staticmethod
def extract_segmentation_mask(annotation):
import pycocotools.mask as mask_utils
poly_specs = annotation[DensePoseDataRelative.S_KEY]
segm = torch.zeros((DensePoseDataRelative.MASK_SIZE,) * 2, dtype=torch.float32)
for i in range(DensePoseDataRelative.N_BODY_PARTS):
poly_i = poly_specs[i]
if poly_i:
mask_i = mask_utils.decode(poly_i)
segm[mask_i > 0] = i + 1
return segm
@staticmethod
def validate_annotation(annotation):
for key in [
DensePoseDataRelative.X_KEY,
DensePoseDataRelative.Y_KEY,
DensePoseDataRelative.I_KEY,
DensePoseDataRelative.U_KEY,
DensePoseDataRelative.V_KEY,
DensePoseDataRelative.S_KEY,
]:
if key not in annotation:
return False, "no {key} data in the annotation".format(key=key)
return True, None
@staticmethod
def cleanup_annotation(annotation):
for key in [
DensePoseDataRelative.X_KEY,
DensePoseDataRelative.Y_KEY,
DensePoseDataRelative.I_KEY,
DensePoseDataRelative.U_KEY,
DensePoseDataRelative.V_KEY,
DensePoseDataRelative.S_KEY,
]:
if key in annotation:
del annotation[key]
def apply_transform(self, transforms, densepose_transform_data):
self._transform_pts(transforms, densepose_transform_data)
self._transform_segm(transforms, densepose_transform_data)
def _transform_pts(self, transforms, dp_transform_data):
import detectron2.data.transforms as T
# NOTE: This assumes that HorizFlipTransform is the only one that does flip
do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
if do_hflip:
self.x = self.segm.size(1) - self.x
self._flip_iuv_semantics(dp_transform_data)
def _flip_iuv_semantics(self, dp_transform_data: DensePoseTransformData) -> None:
i_old = self.i.clone()
uv_symmetries = dp_transform_data.uv_symmetries
pt_label_symmetries = dp_transform_data.point_label_symmetries
for i in range(self.N_PART_LABELS):
if i + 1 in i_old:
annot_indices_i = i_old == i + 1
if pt_label_symmetries[i + 1] != i + 1:
self.i[annot_indices_i] = pt_label_symmetries[i + 1]
u_loc = (self.u[annot_indices_i] * 255).long()
v_loc = (self.v[annot_indices_i] * 255).long()
self.u[annot_indices_i] = uv_symmetries["U_transforms"][i][v_loc, u_loc].to(
device=self.u.device
)
self.v[annot_indices_i] = uv_symmetries["V_transforms"][i][v_loc, u_loc].to(
device=self.v.device
)
def _transform_segm(self, transforms, dp_transform_data):
import detectron2.data.transforms as T
# NOTE: This assumes that HorizFlipTransform is the only one that does flip
do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
if do_hflip:
self.segm = torch.flip(self.segm, [1])
self._flip_segm_semantics(dp_transform_data)
def _flip_segm_semantics(self, dp_transform_data):
old_segm = self.segm.clone()
mask_label_symmetries = dp_transform_data.mask_label_symmetries
for i in range(self.N_BODY_PARTS):
if mask_label_symmetries[i + 1] != i + 1:
self.segm[old_segm == i + 1] = mask_label_symmetries[i + 1]
def normalized_coords_transform(x0, y0, w, h):
"""
Coordinates transform that maps top left corner to (-1, -1) and bottom
right corner to (1, 1). Used for torch.grid_sample to initialize the
grid
"""
def f(p):
return (2 * (p[0] - x0) / w - 1, 2 * (p[1] - y0) / h - 1)
return f
class DensePoseOutput(object):
def __init__(self, S, I, U, V, confidences):
"""
Args:
S (`torch.Tensor`): coarse segmentation tensor of size (N, A, H, W)
I (`torch.Tensor`): fine segmentation tensor of size (N, C, H, W)
U (`torch.Tensor`): U coordinates for each fine segmentation label of size (N, C, H, W)
V (`torch.Tensor`): V coordinates for each fine segmentation label of size (N, C, H, W)
confidences (dict of str -> `torch.Tensor`) estimated confidence model parameters
"""
self.S = S
self.I = I # noqa: E741
self.U = U
self.V = V
self.confidences = confidences
self._check_output_dims(S, I, U, V)
def _check_output_dims(self, S, I, U, V):
assert (
len(S.size()) == 4
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
S.size()
)
assert (
len(I.size()) == 4
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
S.size()
)
assert (
len(U.size()) == 4
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
S.size()
)
assert (
len(V.size()) == 4
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
S.size()
)
assert len(S) == len(I), (
"Number of output segmentation planes {} "
"should be equal to the number of output part index "
"planes {}".format(len(S), len(I))
)
assert S.size()[2:] == I.size()[2:], (
"Output segmentation plane size {} "
"should be equal to the output part index "
"plane size {}".format(S.size()[2:], I.size()[2:])
)
assert I.size() == U.size(), (
"Part index output shape {} "
"should be the same as U coordinates output shape {}".format(I.size(), U.size())
)
assert I.size() == V.size(), (
"Part index output shape {} "
"should be the same as V coordinates output shape {}".format(I.size(), V.size())
)
def resize(self, image_size_hw):
# do nothing - outputs are invariant to resize
pass
def _crop(self, S, I, U, V, bbox_old_xywh, bbox_new_xywh):
"""
Resample S, I, U, V from bbox_old to the cropped bbox_new
"""
x0old, y0old, wold, hold = bbox_old_xywh
x0new, y0new, wnew, hnew = bbox_new_xywh
tr_coords = normalized_coords_transform(x0old, y0old, wold, hold)
topleft = (x0new, y0new)
bottomright = (x0new + wnew, y0new + hnew)
topleft_norm = tr_coords(topleft)
bottomright_norm = tr_coords(bottomright)
hsize = S.size(1)
wsize = S.size(2)
grid = torch.meshgrid(
torch.arange(
topleft_norm[1],
bottomright_norm[1],
(bottomright_norm[1] - topleft_norm[1]) / hsize,
)[:hsize],
torch.arange(
topleft_norm[0],
bottomright_norm[0],
(bottomright_norm[0] - topleft_norm[0]) / wsize,
)[:wsize],
)
grid = torch.stack(grid, dim=2).to(S.device)
assert (
grid.size(0) == hsize
), "Resampled grid expected " "height={}, actual height={}".format(hsize, grid.size(0))
assert grid.size(1) == wsize, "Resampled grid expected " "width={}, actual width={}".format(
wsize, grid.size(1)
)
S_new = F.grid_sample(
S.unsqueeze(0),
torch.unsqueeze(grid, 0),
mode="bilinear",
padding_mode="border",
align_corners=True,
).squeeze(0)
I_new = F.grid_sample(
I.unsqueeze(0),
torch.unsqueeze(grid, 0),
mode="bilinear",
padding_mode="border",
align_corners=True,
).squeeze(0)
U_new = F.grid_sample(
U.unsqueeze(0),
torch.unsqueeze(grid, 0),
mode="bilinear",
padding_mode="border",
align_corners=True,
).squeeze(0)
V_new = F.grid_sample(
V.unsqueeze(0),
torch.unsqueeze(grid, 0),
mode="bilinear",
padding_mode="border",
align_corners=True,
).squeeze(0)
return S_new, I_new, U_new, V_new
def crop(self, indices_cropped, bboxes_old, bboxes_new):
"""
Crop outputs for selected bounding boxes to the new bounding boxes.
"""
# VK: cropping is ignored for now
# for i, ic in enumerate(indices_cropped):
# self.S[ic], self.I[ic], self.U[ic], self.V[ic] = \
# self._crop(self.S[ic], self.I[ic], self.U[ic], self.V[ic],
# bboxes_old[i], bboxes_new[i])
pass
def hflip(self, transform_data: DensePoseTransformData) -> None:
"""
Change S, I, U and V to take into account a Horizontal flip.
"""
if self.I.shape[0] > 0:
for el in "SIUV":
self.__dict__[el] = torch.flip(self.__dict__[el], [3])
self._flip_iuv_semantics_tensor(transform_data)
self._flip_segm_semantics_tensor(transform_data)
def _flip_iuv_semantics_tensor(self, dp_transform_data: DensePoseTransformData) -> None:
point_label_symmetries = dp_transform_data.point_label_symmetries
uv_symmetries = dp_transform_data.uv_symmetries
N, C, H, W = self.U.shape
u_loc = (self.U[:, 1:, :, :].clamp(0, 1) * 255).long()
v_loc = (self.V[:, 1:, :, :].clamp(0, 1) * 255).long()
Iindex = torch.arange(C - 1, device=self.U.device)[None, :, None, None].expand(
N, C - 1, H, W
)
self.U[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc].to(
device=self.U.device
)
self.V[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc].to(
device=self.V.device
)
for el in "IUV":
self.__dict__[el] = self.__dict__[el][:, point_label_symmetries, :, :]
def _flip_segm_semantics_tensor(self, dp_transform_data):
if self.S.shape[1] == DensePoseDataRelative.N_BODY_PARTS + 1:
self.S = self.S[:, dp_transform_data.mask_label_symmetries, :, :]
def to_result(self, boxes_xywh):
"""
Convert DensePose outputs to results format. Results are more compact,
but cannot be resampled any more
"""
result = DensePoseResult(boxes_xywh, self.S, self.I, self.U, self.V)
return result
def __getitem__(self, item):
if isinstance(item, int):
S_selected = self.S[item].unsqueeze(0)
I_selected = self.I[item].unsqueeze(0)
U_selected = self.U[item].unsqueeze(0)
V_selected = self.V[item].unsqueeze(0)
conf_selected = {}
for key in self.confidences:
conf_selected[key] = self.confidences[key][item].unsqueeze(0)
else:
S_selected = self.S[item]
I_selected = self.I[item]
U_selected = self.U[item]
V_selected = self.V[item]
conf_selected = {}
for key in self.confidences:
conf_selected[key] = self.confidences[key][item]
return DensePoseOutput(S_selected, I_selected, U_selected, V_selected, conf_selected)
def __str__(self):
s = "DensePoseOutput S {}, I {}, U {}, V {}".format(
list(self.S.size()), list(self.I.size()), list(self.U.size()), list(self.V.size())
)
s_conf = "confidences: [{}]".format(
", ".join([f"{key} {list(self.confidences[key].size())}" for key in self.confidences])
)
return ", ".join([s, s_conf])
def __len__(self):
return self.S.size(0)
class DensePoseResult(object):
def __init__(self, boxes_xywh, S, I, U, V):
self.results = []
self.boxes_xywh = boxes_xywh.cpu().tolist()
assert len(boxes_xywh.size()) == 2
assert boxes_xywh.size(1) == 4
for i, box_xywh in enumerate(boxes_xywh):
result_i = self._output_to_result(box_xywh, S[[i]], I[[i]], U[[i]], V[[i]])
result_numpy_i = result_i.cpu().numpy()
result_encoded_i = DensePoseResult.encode_png_data(result_numpy_i)
result_encoded_with_shape_i = (result_numpy_i.shape, result_encoded_i)
self.results.append(result_encoded_with_shape_i)
def __str__(self):
s = "DensePoseResult: N={} [{}]".format(
len(self.results), ", ".join([str(list(r[0])) for r in self.results])
)
return s
def _output_to_result(self, box_xywh, S, I, U, V):
x, y, w, h = box_xywh
w = max(int(w), 1)
h = max(int(h), 1)
result = torch.zeros([3, h, w], dtype=torch.uint8, device=U.device)
assert (
len(S.size()) == 4
), "AnnIndex tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
s_bbox = F.interpolate(S, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
assert (
len(I.size()) == 4
), "IndexUV tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
i_bbox = (
F.interpolate(I, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
* (s_bbox > 0).long()
).squeeze(0)
assert len(U.size()) == 4, "U tensor size should have {} " "dimensions but has {}".format(
4, len(U.size())
)
u_bbox = F.interpolate(U, (h, w), mode="bilinear", align_corners=False)
assert len(V.size()) == 4, "V tensor size should have {} " "dimensions but has {}".format(
4, len(V.size())
)
v_bbox = F.interpolate(V, (h, w), mode="bilinear", align_corners=False)
result[0] = i_bbox
for part_id in range(1, u_bbox.size(1)):
result[1][i_bbox == part_id] = (
(u_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
)
result[2][i_bbox == part_id] = (
(v_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
)
assert (
result.size(1) == h
), "Results height {} should be equal" "to bounding box height {}".format(result.size(1), h)
assert (
result.size(2) == w
), "Results width {} should be equal" "to bounding box width {}".format(result.size(2), w)
return result
@staticmethod
def encode_png_data(arr):
"""
Encode array data as a PNG image using the highest compression rate
@param arr [in] Data stored in an array of size (3, M, N) of type uint8
@return Base64-encoded string containing PNG-compressed data
"""
assert len(arr.shape) == 3, "Expected a 3D array as an input," " got a {0}D array".format(
len(arr.shape)
)
assert arr.shape[0] == 3, "Expected first array dimension of size 3," " got {0}".format(
arr.shape[0]
)
assert arr.dtype == np.uint8, "Expected an array of type np.uint8, " " got {0}".format(
arr.dtype
)
data = np.moveaxis(arr, 0, -1)
im = Image.fromarray(data)
fstream = BytesIO()
im.save(fstream, format="png", optimize=True)
s = base64.encodebytes(fstream.getvalue()).decode()
return s
@staticmethod
def decode_png_data(shape, s):
"""
Decode array data from a string that contains PNG-compressed data
@param Base64-encoded string containing PNG-compressed data
@return Data stored in an array of size (3, M, N) of type uint8
"""
fstream = BytesIO(base64.decodebytes(s.encode()))
im = Image.open(fstream)
data = np.moveaxis(np.array(im.getdata(), dtype=np.uint8), -1, 0)
return data.reshape(shape)
def __len__(self):
return len(self.results)
def __getitem__(self, item):
result_encoded = self.results[item]
bbox_xywh = self.boxes_xywh[item]
return result_encoded, bbox_xywh
class DensePoseList(object):
_TORCH_DEVICE_CPU = torch.device("cpu")
def __init__(self, densepose_datas, boxes_xyxy_abs, image_size_hw, device=_TORCH_DEVICE_CPU):
assert len(densepose_datas) == len(
boxes_xyxy_abs
), "Attempt to initialize DensePoseList with {} DensePose datas " "and {} boxes".format(
len(densepose_datas), len(boxes_xyxy_abs)
)
self.densepose_datas = []
for densepose_data in densepose_datas:
assert isinstance(densepose_data, DensePoseDataRelative) or densepose_data is None, (
"Attempt to initialize DensePoseList with DensePose datas "
"of type {}, expected DensePoseDataRelative".format(type(densepose_data))
)
densepose_data_ondevice = (
densepose_data.to(device) if densepose_data is not None else None
)
self.densepose_datas.append(densepose_data_ondevice)
self.boxes_xyxy_abs = boxes_xyxy_abs.to(device)
self.image_size_hw = image_size_hw
self.device = device
def to(self, device):
if self.device == device:
return self
return DensePoseList(self.densepose_datas, self.boxes_xyxy_abs, self.image_size_hw, device)
def __iter__(self):
return iter(self.densepose_datas)
def __len__(self):
return len(self.densepose_datas)
def __repr__(self):
s = self.__class__.__name__ + "("
s += "num_instances={}, ".format(len(self.densepose_datas))
s += "image_width={}, ".format(self.image_size_hw[1])
s += "image_height={})".format(self.image_size_hw[0])
return s
def __getitem__(self, item):
if isinstance(item, int):
densepose_data_rel = self.densepose_datas[item]
return densepose_data_rel
elif isinstance(item, slice):
densepose_datas_rel = self.densepose_datas[item]
boxes_xyxy_abs = self.boxes_xyxy_abs[item]
return DensePoseList(
densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
)
elif isinstance(item, torch.Tensor) and (item.dtype == torch.bool):
densepose_datas_rel = [self.densepose_datas[i] for i, x in enumerate(item) if x > 0]
boxes_xyxy_abs = self.boxes_xyxy_abs[item]
return DensePoseList(
densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
)
else:
densepose_datas_rel = [self.densepose_datas[i] for i in item]
boxes_xyxy_abs = self.boxes_xyxy_abs[item]
return DensePoseList(
densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
)

View File

@@ -0,0 +1,158 @@
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import contextlib
import copy
import io
import itertools
import json
import logging
import os
from collections import OrderedDict
import torch
from fvcore.common.file_io import PathManager
from pycocotools.coco import COCO
from detectron2.data import MetadataCatalog
from detectron2.evaluation import DatasetEvaluator
from detectron2.structures import BoxMode
from detectron2.utils.comm import all_gather, is_main_process, synchronize
from detectron2.utils.logger import create_small_table
from .densepose_coco_evaluation import DensePoseCocoEval, DensePoseEvalMode
class DensePoseCOCOEvaluator(DatasetEvaluator):
def __init__(self, dataset_name, distributed, output_dir=None):
self._distributed = distributed
self._output_dir = output_dir
self._cpu_device = torch.device("cpu")
self._logger = logging.getLogger(__name__)
self._metadata = MetadataCatalog.get(dataset_name)
json_file = PathManager.get_local_path(self._metadata.json_file)
with contextlib.redirect_stdout(io.StringIO()):
self._coco_api = COCO(json_file)
def reset(self):
self._predictions = []
def process(self, inputs, outputs):
"""
Args:
inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
It is a list of dict. Each dict corresponds to an image and
contains keys like "height", "width", "file_name", "image_id".
outputs: the outputs of a COCO model. It is a list of dicts with key
"instances" that contains :class:`Instances`.
The :class:`Instances` object needs to have `densepose` field.
"""
for input, output in zip(inputs, outputs):
instances = output["instances"].to(self._cpu_device)
boxes = instances.pred_boxes.tensor.clone()
boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
instances.pred_densepose = instances.pred_densepose.to_result(boxes)
json_results = prediction_to_json(instances, input["image_id"])
self._predictions.extend(json_results)
def evaluate(self):
if self._distributed:
synchronize()
predictions = all_gather(self._predictions)
predictions = list(itertools.chain(*predictions))
if not is_main_process():
return
else:
predictions = self._predictions
return copy.deepcopy(self._eval_predictions(predictions))
def _eval_predictions(self, predictions):
"""
Evaluate predictions on densepose.
Return results with the metrics of the tasks.
"""
self._logger.info("Preparing results for COCO format ...")
if self._output_dir:
file_path = os.path.join(self._output_dir, "coco_densepose_results.json")
with open(file_path, "w") as f:
json.dump(predictions, f)
f.flush()
os.fsync(f.fileno())
self._logger.info("Evaluating predictions ...")
res = OrderedDict()
results_gps, results_gpsm = _evaluate_predictions_on_coco(self._coco_api, predictions)
res["densepose_gps"] = results_gps
res["densepose_gpsm"] = results_gpsm
return res
def prediction_to_json(instances, img_id):
"""
Args:
instances (Instances): the output of the model
img_id (str): the image id in COCO
Returns:
list[dict]: the results in densepose evaluation format
"""
scores = instances.scores.tolist()
results = []
for k in range(len(instances)):
densepose = instances.pred_densepose[k]
result = {
"image_id": img_id,
"category_id": 1, # densepose only has one class
"bbox": densepose[1],
"score": scores[k],
"densepose": densepose,
}
results.append(result)
return results
def _evaluate_predictions_on_coco(coco_gt, coco_results):
metrics = ["AP", "AP50", "AP75", "APm", "APl"]
logger = logging.getLogger(__name__)
if len(coco_results) == 0: # cocoapi does not handle empty results very well
logger.warn("No predictions from the model! Set scores to -1")
results_gps = {metric: -1 for metric in metrics}
results_gpsm = {metric: -1 for metric in metrics}
return results_gps, results_gpsm
coco_dt = coco_gt.loadRes(coco_results)
results_gps = _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics)
logger.info(
"Evaluation results for densepose, GPS metric: \n" + create_small_table(results_gps)
)
results_gpsm = _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics)
logger.info(
"Evaluation results for densepose, GPSm metric: \n" + create_small_table(results_gpsm)
)
return results_gps, results_gpsm
def _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics):
coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPS)
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
return results
def _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics):
coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPSM)
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
return results

View File

@@ -0,0 +1,75 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from detectron2.modeling.test_time_augmentation import GeneralizedRCNNWithTTA
class DensePoseGeneralizedRCNNWithTTA(GeneralizedRCNNWithTTA):
def __init__(self, cfg, model, transform_data, tta_mapper=None, batch_size=1):
"""
Args:
cfg (CfgNode):
model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on.
transform_data (DensePoseTransformData): contains symmetry label
transforms used for horizontal flip
tta_mapper (callable): takes a dataset dict and returns a list of
augmented versions of the dataset dict. Defaults to
`DatasetMapperTTA(cfg)`.
batch_size (int): batch the augmented images into this batch size for inference.
"""
self._transform_data = transform_data
super().__init__(cfg=cfg, model=model, tta_mapper=tta_mapper, batch_size=batch_size)
# the implementation follows closely the one from detectron2/modeling
def _inference_one_image(self, input):
"""
Args:
input (dict): one dataset dict
Returns:
dict: one output dict
"""
augmented_inputs, aug_vars = self._get_augmented_inputs(input)
# Detect boxes from all augmented versions
with self._turn_off_roi_heads(["mask_on", "keypoint_on", "densepose_on"]):
# temporarily disable roi heads
all_boxes, all_scores, all_classes = self._get_augmented_boxes(
augmented_inputs, aug_vars
)
merged_instances = self._merge_detections(
all_boxes, all_scores, all_classes, (aug_vars["height"], aug_vars["width"])
)
if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.DENSEPOSE_ON:
# Use the detected boxes to obtain new fields
augmented_instances = self._rescale_detected_boxes(
augmented_inputs, merged_instances, aug_vars
)
# run forward on the detected boxes
outputs = self._batch_inference(
augmented_inputs, augmented_instances, do_postprocess=False
)
# Delete now useless variables to avoid being out of memory
del augmented_inputs, augmented_instances, merged_instances
# average the predictions
if self.cfg.MODEL.MASK_ON:
outputs[0].pred_masks = self._reduce_pred_masks(outputs, aug_vars)
if self.cfg.MODEL.DENSEPOSE_ON:
outputs[0].pred_densepose = self._reduce_pred_densepose(outputs, aug_vars)
# postprocess
output = self._detector_postprocess(outputs[0], aug_vars)
return {"instances": output}
else:
return {"instances": merged_instances}
def _reduce_pred_densepose(self, outputs, aug_vars):
for idx, output in enumerate(outputs):
if aug_vars["do_hflip"][idx]:
output.pred_densepose.hflip(self._transform_data)
# Less memory-intensive averaging
for attr in "SIUV":
setattr(
outputs[0].pred_densepose,
attr,
sum(getattr(o.pred_densepose, attr) for o in outputs) / len(outputs),
)
return outputs[0].pred_densepose

View File

@@ -0,0 +1,213 @@
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import numpy as np
from typing import Dict
import fvcore.nn.weight_init as weight_init
import torch
import torch.nn as nn
from torch.nn import functional as F
from detectron2.layers import Conv2d, ShapeSpec, get_norm
from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
from detectron2.modeling.poolers import ROIPooler
from detectron2.modeling.roi_heads import select_foreground_proposals
from .densepose_head import (
build_densepose_data_filter,
build_densepose_head,
build_densepose_losses,
build_densepose_predictor,
densepose_inference,
)
class Decoder(nn.Module):
"""
A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper
(https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from
all levels of the FPN into single output.
"""
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features):
super(Decoder, self).__init__()
# fmt: off
self.in_features = in_features
feature_strides = {k: v.stride for k, v in input_shape.items()}
feature_channels = {k: v.channels for k, v in input_shape.items()}
num_classes = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
conv_dims = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
self.common_stride = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
# fmt: on
self.scale_heads = []
for in_feature in self.in_features:
head_ops = []
head_length = max(
1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
)
for k in range(head_length):
conv = Conv2d(
feature_channels[in_feature] if k == 0 else conv_dims,
conv_dims,
kernel_size=3,
stride=1,
padding=1,
bias=not norm,
norm=get_norm(norm, conv_dims),
activation=F.relu,
)
weight_init.c2_msra_fill(conv)
head_ops.append(conv)
if feature_strides[in_feature] != self.common_stride:
head_ops.append(
nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
)
self.scale_heads.append(nn.Sequential(*head_ops))
self.add_module(in_feature, self.scale_heads[-1])
self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
weight_init.c2_msra_fill(self.predictor)
def forward(self, features):
for i, _ in enumerate(self.in_features):
if i == 0:
x = self.scale_heads[i](features[i])
else:
x = x + self.scale_heads[i](features[i])
x = self.predictor(x)
return x
@ROI_HEADS_REGISTRY.register()
class DensePoseROIHeads(StandardROIHeads):
"""
A Standard ROIHeads which contains an addition of DensePose head.
"""
def __init__(self, cfg, input_shape):
super().__init__(cfg, input_shape)
self._init_densepose_head(cfg, input_shape)
def _init_densepose_head(self, cfg, input_shape):
# fmt: off
self.densepose_on = cfg.MODEL.DENSEPOSE_ON
if not self.densepose_on:
return
self.densepose_data_filter = build_densepose_data_filter(cfg)
dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION
dp_pooler_sampling_ratio = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO
dp_pooler_type = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE
self.use_decoder = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON
# fmt: on
if self.use_decoder:
dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,)
else:
dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features)
in_channels = [input_shape[f].channels for f in self.in_features][0]
if self.use_decoder:
self.decoder = Decoder(cfg, input_shape, self.in_features)
self.densepose_pooler = ROIPooler(
output_size=dp_pooler_resolution,
scales=dp_pooler_scales,
sampling_ratio=dp_pooler_sampling_ratio,
pooler_type=dp_pooler_type,
)
self.densepose_head = build_densepose_head(cfg, in_channels)
self.densepose_predictor = build_densepose_predictor(
cfg, self.densepose_head.n_out_channels
)
self.densepose_losses = build_densepose_losses(cfg)
def _forward_densepose(self, features, instances):
"""
Forward logic of the densepose prediction branch.
Args:
features (list[Tensor]): #level input features for densepose prediction
instances (list[Instances]): the per-image instances to train/predict densepose.
In training, they can be the proposals.
In inference, they can be the predicted boxes.
Returns:
In training, a dict of losses.
In inference, update `instances` with new fields "densepose" and return it.
"""
if not self.densepose_on:
return {} if self.training else instances
features = [features[f] for f in self.in_features]
if self.training:
proposals, _ = select_foreground_proposals(instances, self.num_classes)
proposals_dp = self.densepose_data_filter(proposals)
if len(proposals_dp) > 0:
# NOTE may deadlock in DDP if certain workers have empty proposals_dp
proposal_boxes = [x.proposal_boxes for x in proposals_dp]
if self.use_decoder:
features = [self.decoder(features)]
features_dp = self.densepose_pooler(features, proposal_boxes)
densepose_head_outputs = self.densepose_head(features_dp)
densepose_outputs, _, confidences, _ = self.densepose_predictor(
densepose_head_outputs
)
densepose_loss_dict = self.densepose_losses(
proposals_dp, densepose_outputs, confidences
)
return densepose_loss_dict
else:
pred_boxes = [x.pred_boxes for x in instances]
if self.use_decoder:
features = [self.decoder(features)]
features_dp = self.densepose_pooler(features, pred_boxes)
if len(features_dp) > 0:
densepose_head_outputs = self.densepose_head(features_dp)
densepose_outputs, _, confidences, _ = self.densepose_predictor(
densepose_head_outputs
)
else:
# If no detection occurred instances
# set densepose_outputs to empty tensors
empty_tensor = torch.zeros(size=(0, 0, 0, 0), device=features_dp.device)
densepose_outputs = tuple([empty_tensor] * 4)
confidences = tuple([empty_tensor] * 4)
densepose_inference(densepose_outputs, confidences, instances)
return instances
def forward(self, images, features, proposals, targets=None):
instances, losses = super().forward(images, features, proposals, targets)
del targets, images
if self.training:
losses.update(self._forward_densepose(features, instances))
return instances, losses
def forward_with_given_boxes(self, features, instances):
"""
Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
This is useful for downstream tasks where a box is known, but need to obtain
other attributes (outputs of other heads).
Test-time augmentation also uses this.
Args:
features: same as in `forward()`
instances (list[Instances]): instances to predict other outputs. Expect the keys
"pred_boxes" and "pred_classes" to exist.
Returns:
instances (list[Instances]):
the same `Instances` objects, with extra
fields such as `pred_masks` or `pred_keypoints`.
"""
instances = super().forward_with_given_boxes(features, instances)
instances = self._forward_densepose(features, instances)
return instances

View File

@@ -0,0 +1,145 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from typing import Any, Dict, Optional, Tuple
class EntrySelector(object):
"""
Base class for entry selectors
"""
@staticmethod
def from_string(spec: str) -> "EntrySelector":
if spec == "*":
return AllEntrySelector()
return FieldEntrySelector(spec)
class AllEntrySelector(EntrySelector):
"""
Selector that accepts all entries
"""
SPECIFIER = "*"
def __call__(self, entry):
return True
class FieldEntrySelector(EntrySelector):
"""
Selector that accepts only entries that match provided field
specifier(s). Only a limited set of specifiers is supported for now:
<specifiers>::=<specifier>[<comma><specifiers>]
<specifier>::=<field_name>[<type_delim><type>]<equal><value_or_range>
<field_name> is a valid identifier
<type> ::= "int" | "str"
<equal> ::= "="
<comma> ::= ","
<type_delim> ::= ":"
<value_or_range> ::= <value> | <range>
<range> ::= <value><range_delim><value>
<range_delim> ::= "-"
<value> is a string without spaces and special symbols
(e.g. <comma>, <equal>, <type_delim>, <range_delim>)
"""
_SPEC_DELIM = ","
_TYPE_DELIM = ":"
_RANGE_DELIM = "-"
_EQUAL = "="
_ERROR_PREFIX = "Invalid field selector specifier"
class _FieldEntryValuePredicate(object):
"""
Predicate that checks strict equality for the specified entry field
"""
def __init__(self, name: str, typespec: str, value: str):
import builtins
self.name = name
self.type = getattr(builtins, typespec) if typespec is not None else str
self.value = value
def __call__(self, entry):
return entry[self.name] == self.type(self.value)
class _FieldEntryRangePredicate(object):
"""
Predicate that checks whether an entry field falls into the specified range
"""
def __init__(self, name: str, typespec: str, vmin: str, vmax: str):
import builtins
self.name = name
self.type = getattr(builtins, typespec) if typespec is not None else str
self.vmin = vmin
self.vmax = vmax
def __call__(self, entry):
return (entry[self.name] >= self.type(self.vmin)) and (
entry[self.name] <= self.type(self.vmax)
)
def __init__(self, spec: str):
self._predicates = self._parse_specifier_into_predicates(spec)
def __call__(self, entry: Dict[str, Any]):
for predicate in self._predicates:
if not predicate(entry):
return False
return True
def _parse_specifier_into_predicates(self, spec: str):
predicates = []
specs = spec.split(self._SPEC_DELIM)
for subspec in specs:
eq_idx = subspec.find(self._EQUAL)
if eq_idx > 0:
field_name_with_type = subspec[:eq_idx]
field_name, field_type = self._parse_field_name_type(field_name_with_type)
field_value_or_range = subspec[eq_idx + 1 :]
if self._is_range_spec(field_value_or_range):
vmin, vmax = self._get_range_spec(field_value_or_range)
predicate = FieldEntrySelector._FieldEntryRangePredicate(
field_name, field_type, vmin, vmax
)
else:
predicate = FieldEntrySelector._FieldEntryValuePredicate(
field_name, field_type, field_value_or_range
)
predicates.append(predicate)
elif eq_idx == 0:
self._parse_error(f'"{subspec}", field name is empty!')
else:
self._parse_error(f'"{subspec}", should have format ' "<field>=<value_or_range>!")
return predicates
def _parse_field_name_type(self, field_name_with_type: str) -> Tuple[str, Optional[str]]:
type_delim_idx = field_name_with_type.find(self._TYPE_DELIM)
if type_delim_idx > 0:
field_name = field_name_with_type[:type_delim_idx]
field_type = field_name_with_type[type_delim_idx + 1 :]
elif type_delim_idx == 0:
self._parse_error(f'"{field_name_with_type}", field name is empty!')
else:
field_name = field_name_with_type
field_type = None
return field_name, field_type
def _is_range_spec(self, field_value_or_range):
delim_idx = field_value_or_range.find(self._RANGE_DELIM)
return delim_idx > 0
def _get_range_spec(self, field_value_or_range):
if self._is_range_spec(field_value_or_range):
delim_idx = field_value_or_range.find(self._RANGE_DELIM)
vmin = field_value_or_range[:delim_idx]
vmax = field_value_or_range[delim_idx + 1 :]
return vmin, vmax
else:
self._parse_error('"field_value_or_range", range of values expected!')
def _parse_error(self, msg):
raise ValueError(f"{self._ERROR_PREFIX}: {msg}")

View File

@@ -0,0 +1,13 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging
def verbosity_to_level(verbosity):
if verbosity is not None:
if verbosity == 0:
return logging.WARNING
elif verbosity == 1:
return logging.INFO
elif verbosity >= 2:
return logging.DEBUG
return logging.WARNING

View File

@@ -0,0 +1,16 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from fvcore.common.file_io import PathManager
from detectron2.data import MetadataCatalog
from densepose import DensePoseTransformData
def load_for_dataset(dataset_name):
path = MetadataCatalog.get(dataset_name).densepose_transform_src
densepose_transform_data_fpath = PathManager.get_local_path(path)
return DensePoseTransformData.load(densepose_transform_data_fpath)
def load_from_cfg(cfg):
return load_for_dataset(cfg.DATASETS.TEST[0])

View File

@@ -0,0 +1,191 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging
import numpy as np
import cv2
import torch
Image = np.ndarray
Boxes = torch.Tensor
class MatrixVisualizer(object):
"""
Base visualizer for matrix data
"""
def __init__(
self,
inplace=True,
cmap=cv2.COLORMAP_PARULA,
val_scale=1.0,
alpha=0.7,
interp_method_matrix=cv2.INTER_LINEAR,
interp_method_mask=cv2.INTER_NEAREST,
):
self.inplace = inplace
self.cmap = cmap
self.val_scale = val_scale
self.alpha = alpha
self.interp_method_matrix = interp_method_matrix
self.interp_method_mask = interp_method_mask
def visualize(self, image_bgr, mask, matrix, bbox_xywh):
self._check_image(image_bgr)
self._check_mask_matrix(mask, matrix)
if self.inplace:
image_target_bgr = image_bgr
else:
image_target_bgr = image_bgr * 0
x, y, w, h = [int(v) for v in bbox_xywh]
if w <= 0 or h <= 0:
return image_bgr
mask, matrix = self._resize(mask, matrix, w, h)
mask_bg = np.tile((mask == 0)[:, :, np.newaxis], [1, 1, 3])
matrix_scaled = matrix.astype(np.float32) * self.val_scale
_EPSILON = 1e-6
if np.any(matrix_scaled > 255 + _EPSILON):
logger = logging.getLogger(__name__)
logger.warning(
f"Matrix has values > {255 + _EPSILON} after " f"scaling, clipping to [0..255]"
)
matrix_scaled_8u = matrix_scaled.clip(0, 255).astype(np.uint8)
matrix_vis = cv2.applyColorMap(matrix_scaled_8u, self.cmap)
matrix_vis[mask_bg] = image_target_bgr[y : y + h, x : x + w, :][mask_bg]
image_target_bgr[y : y + h, x : x + w, :] = (
image_target_bgr[y : y + h, x : x + w, :] * (1.0 - self.alpha) + matrix_vis * self.alpha
)
return image_target_bgr.astype(np.uint8)
def _resize(self, mask, matrix, w, h):
if (w != mask.shape[1]) or (h != mask.shape[0]):
mask = cv2.resize(mask, (w, h), self.interp_method_mask)
if (w != matrix.shape[1]) or (h != matrix.shape[0]):
matrix = cv2.resize(matrix, (w, h), self.interp_method_matrix)
return mask, matrix
def _check_image(self, image_rgb):
assert len(image_rgb.shape) == 3
assert image_rgb.shape[2] == 3
assert image_rgb.dtype == np.uint8
def _check_mask_matrix(self, mask, matrix):
assert len(matrix.shape) == 2
assert len(mask.shape) == 2
assert mask.dtype == np.uint8
class RectangleVisualizer(object):
_COLOR_GREEN = (18, 127, 15)
def __init__(self, color=_COLOR_GREEN, thickness=1):
self.color = color
self.thickness = thickness
def visualize(self, image_bgr, bbox_xywh, color=None, thickness=None):
x, y, w, h = bbox_xywh
color = color or self.color
thickness = thickness or self.thickness
cv2.rectangle(image_bgr, (int(x), int(y)), (int(x + w), int(y + h)), color, thickness)
return image_bgr
class PointsVisualizer(object):
_COLOR_GREEN = (18, 127, 15)
def __init__(self, color_bgr=_COLOR_GREEN, r=5):
self.color_bgr = color_bgr
self.r = r
def visualize(self, image_bgr, pts_xy, colors_bgr=None, rs=None):
for j, pt_xy in enumerate(pts_xy):
x, y = pt_xy
color_bgr = colors_bgr[j] if colors_bgr is not None else self.color_bgr
r = rs[j] if rs is not None else self.r
cv2.circle(image_bgr, (x, y), r, color_bgr, -1)
return image_bgr
class TextVisualizer(object):
_COLOR_GRAY = (218, 227, 218)
_COLOR_WHITE = (255, 255, 255)
def __init__(
self,
font_face=cv2.FONT_HERSHEY_SIMPLEX,
font_color_bgr=_COLOR_GRAY,
font_scale=0.35,
font_line_type=cv2.LINE_AA,
font_line_thickness=1,
fill_color_bgr=_COLOR_WHITE,
fill_color_transparency=1.0,
frame_color_bgr=_COLOR_WHITE,
frame_color_transparency=1.0,
frame_thickness=1,
):
self.font_face = font_face
self.font_color_bgr = font_color_bgr
self.font_scale = font_scale
self.font_line_type = font_line_type
self.font_line_thickness = font_line_thickness
self.fill_color_bgr = fill_color_bgr
self.fill_color_transparency = fill_color_transparency
self.frame_color_bgr = frame_color_bgr
self.frame_color_transparency = frame_color_transparency
self.frame_thickness = frame_thickness
def visualize(self, image_bgr, txt, topleft_xy):
txt_w, txt_h = self.get_text_size_wh(txt)
topleft_xy = tuple(map(int, topleft_xy))
x, y = topleft_xy
if self.frame_color_transparency < 1.0:
t = self.frame_thickness
image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :] = (
image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :]
* self.frame_color_transparency
+ np.array(self.frame_color_bgr) * (1.0 - self.frame_color_transparency)
).astype(np.float)
if self.fill_color_transparency < 1.0:
image_bgr[y : y + txt_h, x : x + txt_w, :] = (
image_bgr[y : y + txt_h, x : x + txt_w, :] * self.fill_color_transparency
+ np.array(self.fill_color_bgr) * (1.0 - self.fill_color_transparency)
).astype(np.float)
cv2.putText(
image_bgr,
txt,
topleft_xy,
self.font_face,
self.font_scale,
self.font_color_bgr,
self.font_line_thickness,
self.font_line_type,
)
return image_bgr
def get_text_size_wh(self, txt):
((txt_w, txt_h), _) = cv2.getTextSize(
txt, self.font_face, self.font_scale, self.font_line_thickness
)
return txt_w, txt_h
class CompoundVisualizer(object):
def __init__(self, visualizers):
self.visualizers = visualizers
def visualize(self, image_bgr, data):
assert len(data) == len(
self.visualizers
), "The number of datas {} should match the number of visualizers" " {}".format(
len(data), len(self.visualizers)
)
image = image_bgr
for i, visualizer in enumerate(self.visualizers):
image = visualizer.visualize(image, data[i])
return image
def __str__(self):
visualizer_str = ", ".join([str(v) for v in self.visualizers])
return "Compound Visualizer [{}]".format(visualizer_str)

View File

@@ -0,0 +1,37 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .base import RectangleVisualizer, TextVisualizer
class BoundingBoxVisualizer(object):
def __init__(self):
self.rectangle_visualizer = RectangleVisualizer()
def visualize(self, image_bgr, boxes_xywh):
for bbox_xywh in boxes_xywh:
image_bgr = self.rectangle_visualizer.visualize(image_bgr, bbox_xywh)
return image_bgr
class ScoredBoundingBoxVisualizer(object):
def __init__(self, bbox_visualizer_params=None, score_visualizer_params=None):
if bbox_visualizer_params is None:
bbox_visualizer_params = {}
if score_visualizer_params is None:
score_visualizer_params = {}
self.visualizer_bbox = RectangleVisualizer(**bbox_visualizer_params)
self.visualizer_score = TextVisualizer(**score_visualizer_params)
def visualize(self, image_bgr, scored_bboxes):
boxes_xywh, box_scores = scored_bboxes
assert len(boxes_xywh) == len(
box_scores
), "Number of bounding boxes {} should be equal to the number of scores {}".format(
len(boxes_xywh), len(box_scores)
)
for i, box_xywh in enumerate(boxes_xywh):
score_i = box_scores[i]
image_bgr = self.visualizer_bbox.visualize(image_bgr, box_xywh)
score_txt = "{0:6.4f}".format(score_i)
topleft_xy = box_xywh[0], box_xywh[1]
image_bgr = self.visualizer_score.visualize(image_bgr, score_txt, topleft_xy)
return image_bgr

View File

@@ -0,0 +1,593 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging
import numpy as np
from typing import Iterable, Optional, Tuple
import cv2
from ..data.structures import DensePoseDataRelative, DensePoseOutput, DensePoseResult
from .base import Boxes, Image, MatrixVisualizer, PointsVisualizer
class DensePoseResultsVisualizer(object):
def visualize(self, image_bgr: Image, densepose_result: Optional[DensePoseResult]) -> Image:
if densepose_result is None:
return image_bgr
context = self.create_visualization_context(image_bgr)
for i, result_encoded_w_shape in enumerate(densepose_result.results):
iuv_arr = DensePoseResult.decode_png_data(*result_encoded_w_shape)
bbox_xywh = densepose_result.boxes_xywh[i]
self.visualize_iuv_arr(context, iuv_arr, bbox_xywh)
image_bgr = self.context_to_image_bgr(context)
return image_bgr
class DensePoseMaskedColormapResultsVisualizer(DensePoseResultsVisualizer):
def __init__(
self,
data_extractor,
segm_extractor,
inplace=True,
cmap=cv2.COLORMAP_PARULA,
alpha=0.7,
val_scale=1.0,
):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha
)
self.data_extractor = data_extractor
self.segm_extractor = segm_extractor
def create_visualization_context(self, image_bgr: Image):
return image_bgr
def context_to_image_bgr(self, context):
return context
def get_image_bgr_from_context(self, context):
return context
def visualize_iuv_arr(self, context, iuv_arr, bbox_xywh):
image_bgr = self.get_image_bgr_from_context(context)
matrix = self.data_extractor(iuv_arr)
segm = self.segm_extractor(iuv_arr)
mask = np.zeros(matrix.shape, dtype=np.uint8)
mask[segm > 0] = 1
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
return image_bgr
def _extract_i_from_iuvarr(iuv_arr):
return iuv_arr[0, :, :]
def _extract_u_from_iuvarr(iuv_arr):
return iuv_arr[1, :, :]
def _extract_v_from_iuvarr(iuv_arr):
return iuv_arr[2, :, :]
class DensePoseResultsMplContourVisualizer(DensePoseResultsVisualizer):
def __init__(self, levels=10, **kwargs):
self.levels = levels
self.plot_args = kwargs
def create_visualization_context(self, image_bgr: Image):
import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
context = {}
context["image_bgr"] = image_bgr
dpi = 100
height_inches = float(image_bgr.shape[0]) / dpi
width_inches = float(image_bgr.shape[1]) / dpi
fig = plt.figure(figsize=(width_inches, height_inches), dpi=dpi)
plt.axes([0, 0, 1, 1])
plt.axis("off")
context["fig"] = fig
canvas = FigureCanvas(fig)
context["canvas"] = canvas
extent = (0, image_bgr.shape[1], image_bgr.shape[0], 0)
plt.imshow(image_bgr[:, :, ::-1], extent=extent)
return context
def context_to_image_bgr(self, context):
fig = context["fig"]
w, h = map(int, fig.get_size_inches() * fig.get_dpi())
canvas = context["canvas"]
canvas.draw()
image_1d = np.fromstring(canvas.tostring_rgb(), dtype="uint8")
image_rgb = image_1d.reshape(h, w, 3)
image_bgr = image_rgb[:, :, ::-1].copy()
return image_bgr
def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> Image:
import matplotlib.pyplot as plt
u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
extent = (
bbox_xywh[0],
bbox_xywh[0] + bbox_xywh[2],
bbox_xywh[1],
bbox_xywh[1] + bbox_xywh[3],
)
plt.contour(u, self.levels, extent=extent, **self.plot_args)
plt.contour(v, self.levels, extent=extent, **self.plot_args)
class DensePoseResultsCustomContourVisualizer(DensePoseResultsVisualizer):
"""
Contour visualization using marching squares
"""
def __init__(self, levels=10, **kwargs):
# TODO: colormap is hardcoded
cmap = cv2.COLORMAP_PARULA
if isinstance(levels, int):
self.levels = np.linspace(0, 1, levels)
else:
self.levels = levels
if "linewidths" in kwargs:
self.linewidths = kwargs["linewidths"]
else:
self.linewidths = [1] * len(self.levels)
self.plot_args = kwargs
img_colors_bgr = cv2.applyColorMap((self.levels * 255).astype(np.uint8), cmap)
self.level_colors_bgr = [
[int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
]
def create_visualization_context(self, image_bgr: Image):
return image_bgr
def context_to_image_bgr(self, context):
return context
def get_image_bgr_from_context(self, context):
return context
def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> Image:
image_bgr = self.get_image_bgr_from_context(context)
segm = _extract_i_from_iuvarr(iuv_arr)
u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
self._contours(image_bgr, u, segm, bbox_xywh)
self._contours(image_bgr, v, segm, bbox_xywh)
def _contours(self, image_bgr, arr, segm, bbox_xywh):
for part_idx in range(1, DensePoseDataRelative.N_PART_LABELS + 1):
mask = segm == part_idx
if not np.any(mask):
continue
arr_min = np.amin(arr[mask])
arr_max = np.amax(arr[mask])
I, J = np.nonzero(mask)
i0 = np.amin(I)
i1 = np.amax(I) + 1
j0 = np.amin(J)
j1 = np.amax(J) + 1
if (j1 == j0 + 1) or (i1 == i0 + 1):
continue
Nw = arr.shape[1] - 1
Nh = arr.shape[0] - 1
for level_idx, level in enumerate(self.levels):
if (level < arr_min) or (level > arr_max):
continue
vp = arr[i0:i1, j0:j1] >= level
bin_codes = vp[:-1, :-1] + vp[1:, :-1] * 2 + vp[1:, 1:] * 4 + vp[:-1, 1:] * 8
mp = mask[i0:i1, j0:j1]
bin_mask_codes = mp[:-1, :-1] + mp[1:, :-1] * 2 + mp[1:, 1:] * 4 + mp[:-1, 1:] * 8
it = np.nditer(bin_codes, flags=["multi_index"])
color_bgr = self.level_colors_bgr[level_idx]
linewidth = self.linewidths[level_idx]
while not it.finished:
if (it[0] != 0) and (it[0] != 15):
i, j = it.multi_index
if bin_mask_codes[i, j] != 0:
self._draw_line(
image_bgr,
arr,
mask,
level,
color_bgr,
linewidth,
it[0],
it.multi_index,
bbox_xywh,
Nw,
Nh,
(i0, j0),
)
it.iternext()
def _draw_line(
self,
image_bgr,
arr,
mask,
v,
color_bgr,
linewidth,
bin_code,
multi_idx,
bbox_xywh,
Nw,
Nh,
offset,
):
lines = self._bin_code_2_lines(arr, v, bin_code, multi_idx, Nw, Nh, offset)
x0, y0, w, h = bbox_xywh
x1 = x0 + w
y1 = y0 + h
for line in lines:
x0r, y0r = line[0]
x1r, y1r = line[1]
pt0 = (int(x0 + x0r * (x1 - x0)), int(y0 + y0r * (y1 - y0)))
pt1 = (int(x0 + x1r * (x1 - x0)), int(y0 + y1r * (y1 - y0)))
cv2.line(image_bgr, pt0, pt1, color_bgr, linewidth)
def _bin_code_2_lines(self, arr, v, bin_code, multi_idx, Nw, Nh, offset):
i0, j0 = offset
i, j = multi_idx
i += i0
j += j0
v0, v1, v2, v3 = arr[i, j], arr[i + 1, j], arr[i + 1, j + 1], arr[i, j + 1]
x0i = float(j) / Nw
y0j = float(i) / Nh
He = 1.0 / Nh
We = 1.0 / Nw
if (bin_code == 1) or (bin_code == 14):
a = (v - v0) / (v1 - v0)
b = (v - v0) / (v3 - v0)
pt1 = (x0i, y0j + a * He)
pt2 = (x0i + b * We, y0j)
return [(pt1, pt2)]
elif (bin_code == 2) or (bin_code == 13):
a = (v - v0) / (v1 - v0)
b = (v - v1) / (v2 - v1)
pt1 = (x0i, y0j + a * He)
pt2 = (x0i + b * We, y0j + He)
return [(pt1, pt2)]
elif (bin_code == 3) or (bin_code == 12):
a = (v - v0) / (v3 - v0)
b = (v - v1) / (v2 - v1)
pt1 = (x0i + a * We, y0j)
pt2 = (x0i + b * We, y0j + He)
return [(pt1, pt2)]
elif (bin_code == 4) or (bin_code == 11):
a = (v - v1) / (v2 - v1)
b = (v - v3) / (v2 - v3)
pt1 = (x0i + a * We, y0j + He)
pt2 = (x0i + We, y0j + b * He)
return [(pt1, pt2)]
elif (bin_code == 6) or (bin_code == 9):
a = (v - v0) / (v1 - v0)
b = (v - v3) / (v2 - v3)
pt1 = (x0i, y0j + a * He)
pt2 = (x0i + We, y0j + b * He)
return [(pt1, pt2)]
elif (bin_code == 7) or (bin_code == 8):
a = (v - v0) / (v3 - v0)
b = (v - v3) / (v2 - v3)
pt1 = (x0i + a * We, y0j)
pt2 = (x0i + We, y0j + b * He)
return [(pt1, pt2)]
elif bin_code == 5:
a1 = (v - v0) / (v1 - v0)
b1 = (v - v1) / (v2 - v1)
pt11 = (x0i, y0j + a1 * He)
pt12 = (x0i + b1 * We, y0j + He)
a2 = (v - v0) / (v3 - v0)
b2 = (v - v3) / (v2 - v3)
pt21 = (x0i + a2 * We, y0j)
pt22 = (x0i + We, y0j + b2 * He)
return [(pt11, pt12), (pt21, pt22)]
elif bin_code == 10:
a1 = (v - v0) / (v3 - v0)
b1 = (v - v0) / (v1 - v0)
pt11 = (x0i + a1 * We, y0j)
pt12 = (x0i, y0j + b1 * He)
a2 = (v - v1) / (v2 - v1)
b2 = (v - v3) / (v2 - v3)
pt21 = (x0i + a2 * We, y0j + He)
pt22 = (x0i + We, y0j + b2 * He)
return [(pt11, pt12), (pt21, pt22)]
return []
try:
import matplotlib
matplotlib.use("Agg")
DensePoseResultsContourVisualizer = DensePoseResultsMplContourVisualizer
except ModuleNotFoundError:
logger = logging.getLogger(__name__)
logger.warning("Could not import matplotlib, using custom contour visualizer")
DensePoseResultsContourVisualizer = DensePoseResultsCustomContourVisualizer
class DensePoseResultsFineSegmentationVisualizer(DensePoseMaskedColormapResultsVisualizer):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
super(DensePoseResultsFineSegmentationVisualizer, self).__init__(
_extract_i_from_iuvarr,
_extract_i_from_iuvarr,
inplace,
cmap,
alpha,
val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
)
class DensePoseResultsUVisualizer(DensePoseMaskedColormapResultsVisualizer):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
super(DensePoseResultsUVisualizer, self).__init__(
_extract_u_from_iuvarr, _extract_i_from_iuvarr, inplace, cmap, alpha, val_scale=1.0
)
class DensePoseResultsVVisualizer(DensePoseMaskedColormapResultsVisualizer):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
super(DensePoseResultsVVisualizer, self).__init__(
_extract_v_from_iuvarr, _extract_i_from_iuvarr, inplace, cmap, alpha, val_scale=1.0
)
class DensePoseOutputsFineSegmentationVisualizer(object):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace,
cmap=cmap,
val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
alpha=alpha,
)
def visualize(
self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
) -> Image:
if dp_output_with_bboxes is None:
return image_bgr
densepose_output, bboxes_xywh = dp_output_with_bboxes
S = densepose_output.S
I = densepose_output.I # noqa
U = densepose_output.U
V = densepose_output.V
N = S.size(0)
assert N == I.size(
0
), "densepose outputs S {} and I {}" " should have equal first dim size".format(
S.size(), I.size()
)
assert N == U.size(
0
), "densepose outputs S {} and U {}" " should have equal first dim size".format(
S.size(), U.size()
)
assert N == V.size(
0
), "densepose outputs S {} and V {}" " should have equal first dim size".format(
S.size(), V.size()
)
assert N == len(
bboxes_xywh
), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
len(bboxes_xywh), N
)
for n in range(N):
Sn = S[n].argmax(dim=0)
In = I[n].argmax(dim=0) * (Sn > 0).long()
matrix = In.cpu().numpy().astype(np.uint8)
mask = np.zeros(matrix.shape, dtype=np.uint8)
mask[matrix > 0] = 1
bbox_xywh = bboxes_xywh[n]
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
return image_bgr
class DensePoseOutputsUVisualizer(object):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha
)
def visualize(
self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
) -> Image:
if dp_output_with_bboxes is None:
return image_bgr
densepose_output, bboxes_xywh = dp_output_with_bboxes
assert isinstance(
densepose_output, DensePoseOutput
), "DensePoseOutput expected, {} encountered".format(type(densepose_output))
S = densepose_output.S
I = densepose_output.I # noqa
U = densepose_output.U
V = densepose_output.V
N = S.size(0)
assert N == I.size(
0
), "densepose outputs S {} and I {}" " should have equal first dim size".format(
S.size(), I.size()
)
assert N == U.size(
0
), "densepose outputs S {} and U {}" " should have equal first dim size".format(
S.size(), U.size()
)
assert N == V.size(
0
), "densepose outputs S {} and V {}" " should have equal first dim size".format(
S.size(), V.size()
)
assert N == len(
bboxes_xywh
), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
len(bboxes_xywh), N
)
for n in range(N):
Sn = S[n].argmax(dim=0)
In = I[n].argmax(dim=0) * (Sn > 0).long()
segmentation = In.cpu().numpy().astype(np.uint8)
mask = np.zeros(segmentation.shape, dtype=np.uint8)
mask[segmentation > 0] = 1
Un = U[n].cpu().numpy().astype(np.float32)
Uvis = np.zeros(segmentation.shape, dtype=np.float32)
for partId in range(Un.shape[0]):
Uvis[segmentation == partId] = Un[partId][segmentation == partId].clip(0, 1) * 255
bbox_xywh = bboxes_xywh[n]
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, Uvis, bbox_xywh)
return image_bgr
class DensePoseOutputsVVisualizer(object):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha
)
def visualize(
self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
) -> Image:
if dp_output_with_bboxes is None:
return image_bgr
densepose_output, bboxes_xywh = dp_output_with_bboxes
assert isinstance(
densepose_output, DensePoseOutput
), "DensePoseOutput expected, {} encountered".format(type(densepose_output))
S = densepose_output.S
I = densepose_output.I # noqa
U = densepose_output.U
V = densepose_output.V
N = S.size(0)
assert N == I.size(
0
), "densepose outputs S {} and I {}" " should have equal first dim size".format(
S.size(), I.size()
)
assert N == U.size(
0
), "densepose outputs S {} and U {}" " should have equal first dim size".format(
S.size(), U.size()
)
assert N == V.size(
0
), "densepose outputs S {} and V {}" " should have equal first dim size".format(
S.size(), V.size()
)
assert N == len(
bboxes_xywh
), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
len(bboxes_xywh), N
)
for n in range(N):
Sn = S[n].argmax(dim=0)
In = I[n].argmax(dim=0) * (Sn > 0).long()
segmentation = In.cpu().numpy().astype(np.uint8)
mask = np.zeros(segmentation.shape, dtype=np.uint8)
mask[segmentation > 0] = 1
Vn = V[n].cpu().numpy().astype(np.float32)
Vvis = np.zeros(segmentation.shape, dtype=np.float32)
for partId in range(Vn.size(0)):
Vvis[segmentation == partId] = Vn[partId][segmentation == partId].clip(0, 1) * 255
bbox_xywh = bboxes_xywh[n]
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, Vvis, bbox_xywh)
return image_bgr
class DensePoseDataCoarseSegmentationVisualizer(object):
"""
Visualizer for ground truth segmentation
"""
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace,
cmap=cmap,
val_scale=255.0 / DensePoseDataRelative.N_BODY_PARTS,
alpha=alpha,
)
def visualize(
self,
image_bgr: Image,
bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
) -> Image:
if bbox_densepose_datas is None:
return image_bgr
for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
matrix = densepose_data.segm.numpy()
mask = np.zeros(matrix.shape, dtype=np.uint8)
mask[matrix > 0] = 1
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh.numpy())
return image_bgr
class DensePoseDataPointsVisualizer(object):
def __init__(self, densepose_data_to_value_fn=None, cmap=cv2.COLORMAP_PARULA):
self.points_visualizer = PointsVisualizer()
self.densepose_data_to_value_fn = densepose_data_to_value_fn
self.cmap = cmap
def visualize(
self,
image_bgr: Image,
bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
) -> Image:
if bbox_densepose_datas is None:
return image_bgr
for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
x0, y0, w, h = bbox_xywh.numpy()
x = densepose_data.x.numpy() * w / 255.0 + x0
y = densepose_data.y.numpy() * h / 255.0 + y0
pts_xy = zip(x, y)
if self.densepose_data_to_value_fn is None:
image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy)
else:
v = self.densepose_data_to_value_fn(densepose_data)
img_colors_bgr = cv2.applyColorMap(v, self.cmap)
colors_bgr = [
[int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
]
image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy, colors_bgr)
return image_bgr
def _densepose_data_u_for_cmap(densepose_data):
u = np.clip(densepose_data.u.numpy(), 0, 1) * 255.0
return u.astype(np.uint8)
def _densepose_data_v_for_cmap(densepose_data):
v = np.clip(densepose_data.v.numpy(), 0, 1) * 255.0
return v.astype(np.uint8)
def _densepose_data_i_for_cmap(densepose_data):
i = (
np.clip(densepose_data.i.numpy(), 0.0, DensePoseDataRelative.N_PART_LABELS)
* 255.0
/ DensePoseDataRelative.N_PART_LABELS
)
return i.astype(np.uint8)
class DensePoseDataPointsUVisualizer(DensePoseDataPointsVisualizer):
def __init__(self):
super(DensePoseDataPointsUVisualizer, self).__init__(
densepose_data_to_value_fn=_densepose_data_u_for_cmap
)
class DensePoseDataPointsVVisualizer(DensePoseDataPointsVisualizer):
def __init__(self):
super(DensePoseDataPointsVVisualizer, self).__init__(
densepose_data_to_value_fn=_densepose_data_v_for_cmap
)
class DensePoseDataPointsIVisualizer(DensePoseDataPointsVisualizer):
def __init__(self):
super(DensePoseDataPointsIVisualizer, self).__init__(
densepose_data_to_value_fn=_densepose_data_i_for_cmap
)

View File

@@ -0,0 +1,152 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging
from typing import Sequence
import torch
from detectron2.layers.nms import batched_nms
from detectron2.structures.instances import Instances
from densepose.vis.bounding_box import BoundingBoxVisualizer, ScoredBoundingBoxVisualizer
from densepose.vis.densepose import DensePoseResultsVisualizer
from .base import CompoundVisualizer
Scores = Sequence[float]
def extract_scores_from_instances(instances: Instances, select=None):
if instances.has("scores"):
return instances.scores if select is None else instances.scores[select]
return None
def extract_boxes_xywh_from_instances(instances: Instances, select=None):
if instances.has("pred_boxes"):
boxes_xywh = instances.pred_boxes.tensor.clone()
boxes_xywh[:, 2] -= boxes_xywh[:, 0]
boxes_xywh[:, 3] -= boxes_xywh[:, 1]
return boxes_xywh if select is None else boxes_xywh[select]
return None
def create_extractor(visualizer: object):
"""
Create an extractor for the provided visualizer
"""
if isinstance(visualizer, CompoundVisualizer):
extractors = [create_extractor(v) for v in visualizer.visualizers]
return CompoundExtractor(extractors)
elif isinstance(visualizer, DensePoseResultsVisualizer):
return DensePoseResultExtractor()
elif isinstance(visualizer, ScoredBoundingBoxVisualizer):
return CompoundExtractor([extract_boxes_xywh_from_instances, extract_scores_from_instances])
elif isinstance(visualizer, BoundingBoxVisualizer):
return extract_boxes_xywh_from_instances
else:
logger = logging.getLogger(__name__)
logger.error(f"Could not create extractor for {visualizer}")
return None
class BoundingBoxExtractor(object):
"""
Extracts bounding boxes from instances
"""
def __call__(self, instances: Instances):
boxes_xywh = extract_boxes_xywh_from_instances(instances)
return boxes_xywh
class ScoredBoundingBoxExtractor(object):
"""
Extracts bounding boxes from instances
"""
def __call__(self, instances: Instances, select=None):
scores = extract_scores_from_instances(instances)
boxes_xywh = extract_boxes_xywh_from_instances(instances)
if (scores is None) or (boxes_xywh is None):
return (boxes_xywh, scores)
if select is not None:
scores = scores[select]
boxes_xywh = boxes_xywh[select]
return (boxes_xywh, scores)
class DensePoseResultExtractor(object):
"""
Extracts DensePose result from instances
"""
def __call__(self, instances: Instances, select=None):
boxes_xywh = extract_boxes_xywh_from_instances(instances)
if instances.has("pred_densepose") and (boxes_xywh is not None):
dpout = instances.pred_densepose
if select is not None:
dpout = dpout[select]
boxes_xywh = boxes_xywh[select]
return dpout.to_result(boxes_xywh)
else:
return None
class CompoundExtractor(object):
"""
Extracts data for CompoundVisualizer
"""
def __init__(self, extractors):
self.extractors = extractors
def __call__(self, instances: Instances, select=None):
datas = []
for extractor in self.extractors:
data = extractor(instances, select)
datas.append(data)
return datas
class NmsFilteredExtractor(object):
"""
Extracts data in the format accepted by NmsFilteredVisualizer
"""
def __init__(self, extractor, iou_threshold):
self.extractor = extractor
self.iou_threshold = iou_threshold
def __call__(self, instances: Instances, select=None):
scores = extract_scores_from_instances(instances)
boxes_xywh = extract_boxes_xywh_from_instances(instances)
if boxes_xywh is None:
return None
select_local_idx = batched_nms(
boxes_xywh,
scores,
torch.zeros(len(scores), dtype=torch.int32),
iou_threshold=self.iou_threshold,
).squeeze()
select_local = torch.zeros(len(boxes_xywh), dtype=torch.bool, device=boxes_xywh.device)
select_local[select_local_idx] = True
select = select_local if select is None else (select & select_local)
return self.extractor(instances, select=select)
class ScoreThresholdedExtractor(object):
"""
Extracts data in the format accepted by ScoreThresholdedVisualizer
"""
def __init__(self, extractor, min_score):
self.extractor = extractor
self.min_score = min_score
def __call__(self, instances: Instances, select=None):
scores = extract_scores_from_instances(instances)
if scores is None:
return None
select_local = scores > self.min_score
select = select_local if select is None else (select & select_local)
data = self.extractor(instances, select=select)
return data

View File

@@ -0,0 +1,7 @@
## Some scripts for developers to use, include:
- `run_instant_tests.sh`: run training for a few iterations.
- `run_inference_tests.sh`: run inference on a small dataset.
- `../../dev/linter.sh`: lint the codebase before commit
- `../../dev/parse_results.sh`: parse results from log file.

View File

@@ -0,0 +1,33 @@
#!/bin/bash -e
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
BIN="python train_net.py"
OUTPUT="inference_test_output"
NUM_GPUS=2
IMS_PER_GPU=2
IMS_PER_BATCH=$(( NUM_GPUS * IMS_PER_GPU ))
CFG_LIST=( "${@:1}" )
if [ ${#CFG_LIST[@]} -eq 0 ]; then
CFG_LIST=( ./configs/quick_schedules/*inference_acc_test.yaml )
fi
echo "========================================================================"
echo "Configs to run:"
echo "${CFG_LIST[@]}"
echo "========================================================================"
for cfg in "${CFG_LIST[@]}"; do
echo "========================================================================"
echo "Running $cfg ..."
echo "========================================================================"
$BIN \
--eval-only \
--num-gpus $NUM_GPUS \
--config-file "$cfg" \
OUTPUT_DIR "$OUTPUT" \
SOLVER.IMS_PER_BATCH $IMS_PER_BATCH
rm -rf $OUTPUT
done

View File

@@ -0,0 +1,28 @@
#!/bin/bash -e
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
BIN="python train_net.py"
OUTPUT="instant_test_output"
NUM_GPUS=2
SOLVER_IMS_PER_BATCH=$((NUM_GPUS * 2))
CFG_LIST=( "${@:1}" )
if [ ${#CFG_LIST[@]} -eq 0 ]; then
CFG_LIST=( ./configs/quick_schedules/*instant_test.yaml )
fi
echo "========================================================================"
echo "Configs to run:"
echo "${CFG_LIST[@]}"
echo "========================================================================"
for cfg in "${CFG_LIST[@]}"; do
echo "========================================================================"
echo "Running $cfg ..."
echo "========================================================================"
$BIN --num-gpus $NUM_GPUS --config-file "$cfg" \
SOLVER.IMS_PER_BATCH $SOLVER_IMS_PER_BATCH \
OUTPUT_DIR "$OUTPUT"
rm -rf "$OUTPUT"
done

View File

@@ -0,0 +1,58 @@
# Getting Started with DensePose
## Inference with Pre-trained Models
1. Pick a model and its config file from [Model Zoo](MODEL_ZOO.md), for example [densepose_rcnn_R_50_FPN_s1x.yaml](../configs/densepose_rcnn_R_50_FPN_s1x.yaml)
2. Run the [Apply Net](TOOL_APPLY_NET.md) tool to visualize the results or save the to disk. For example, to use contour visualization for DensePose, one can run:
```bash
python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml densepose_rcnn_R_50_FPN_s1x.pkl image.jpg dp_contour,bbox --output image_densepose_contour.png
```
Please see [Apply Net](TOOL_APPLY_NET.md) for more details on the tool.
## Training
First, prepare the [dataset](http://densepose.org/#dataset) into the following structure under the directory you'll run training scripts:
<pre>
datasets/coco/
annotations/
densepose_{train,minival,valminusminival}2014.json
<a href="https://dl.fbaipublicfiles.com/detectron2/densepose/densepose_minival2014_100.json">densepose_minival2014_100.json </a> (optional, for testing only)
{train,val}2014/
# image files that are mentioned in the corresponding json
</pre>
To train a model one can use the [train_net.py](../train_net.py) script.
This script was used to train all DensePose models in [Model Zoo](MODEL_ZOO.md).
For example, to launch end-to-end DensePose-RCNN training with ResNet-50 FPN backbone
on 8 GPUs following the s1x schedule, one can run
```bash
python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml --num-gpus 8
```
The configs are made for 8-GPU training. To train on 1 GPU, one can apply the
[linear learning rate scaling rule](https://arxiv.org/abs/1706.02677):
```bash
python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml \
SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
```
## Evaluation
Model testing can be done in the same way as training, except for an additional flag `--eval-only` and
model location specification through `MODEL.WEIGHTS model.pth` in the command line
```bash
python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml \
--eval-only MODEL.WEIGHTS model.pth
```
## Tools
We provide tools which allow one to:
- easily view DensePose annotated data in a dataset;
- perform DensePose inference on a set of images;
- visualize DensePose model results;
`query_db` is a tool to print or visualize DensePose data in a dataset.
Please refer to [Query DB](TOOL_QUERY_DB.md) for more details on this tool
`apply_net` is a tool to print or visualize DensePose results.
Please refer to [Apply Net](TOOL_APPLY_NET.md) for more details on this tool

View File

@@ -0,0 +1,277 @@
# Model Zoo and Baselines
# Introduction
We provide baselines trained with Detectron2 DensePose. The corresponding
configuration files can be found in the [configs](../configs) directory.
All models were trained on COCO `train2014` + `valminusminival2014` and
evaluated on COCO `minival2014`. For the details on common settings in which
baselines were trained, please check [Detectron 2 Model Zoo](../../../MODEL_ZOO.md).
## License
All models available for download through this document are licensed under the
[Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/)
## COCO DensePose Baselines with DensePose-RCNN
### Legacy Models
Baselines trained using schedules from [Güler et al, 2018](https://arxiv.org/pdf/1802.00434.pdf)
<table><tbody>
<!-- START TABLE -->
<!-- TABLE HEADER -->
<th valign="bottom">Name</th>
<th valign="bottom">lr<br/>sched</th>
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
<th valign="bottom">train<br/>mem<br/>(GB)</th>
<th valign="bottom">box<br/>AP</th>
<th valign="bottom">dp. AP<br/>GPS</th>
<th valign="bottom">dp. AP<br/>GPSm</th>
<th valign="bottom">model id</th>
<th valign="bottom">download</th>
<!-- TABLE BODY -->
<!-- ROW: densepose_rcnn_R_50_FPN_s1x_legacy -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml">R_50_FPN_s1x_legacy</a></td>
<td align="center">s1x</td>
<td align="center">0.307</td>
<td align="center">0.051</td>
<td align="center">3.2</td>
<td align="center">58.1</td>
<td align="center">52.1</td>
<td align="center">54.9</td>
<td align="center">164832157</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x_legacy/164832157/model_final_d366fa.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x_legacy/164832157/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_101_FPN_s1x_legacy -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml">R_101_FPN_s1x_legacy</a></td>
<td align="center">s1x</td>
<td align="center">0.390</td>
<td align="center">0.063</td>
<td align="center">4.3</td>
<td align="center">59.5</td>
<td align="center">53.2</td>
<td align="center">56.1</td>
<td align="center">164832182</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x_legacy/164832182/model_final_10af0e.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x_legacy/164832182/metrics.json">metrics</a></td>
</tr>
</tbody></table>
### Improved Baselines, Original Fully Convolutional Haad
These models use an improved training schedule and Panoptic FPN head from [Kirillov et al, 2019](https://arxiv.org/abs/1901.02446).
<table><tbody>
<!-- START TABLE -->
<!-- TABLE HEADER -->
<th valign="bottom">Name</th>
<th valign="bottom">lr<br/>sched</th>
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
<th valign="bottom">train<br/>mem<br/>(GB)</th>
<th valign="bottom">box<br/>AP</th>
<th valign="bottom">dp. AP<br/>GPS</th>
<th valign="bottom">dp. AP<br/>GPSm</th>
<th valign="bottom">model id</th>
<th valign="bottom">download</th>
<!-- TABLE BODY -->
<!-- ROW: densepose_rcnn_R_50_FPN_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_s1x.yaml">R_50_FPN_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.359</td>
<td align="center">0.066</td>
<td align="center">4.5</td>
<td align="center">61.2</td>
<td align="center">63.7</td>
<td align="center">65.3</td>
<td align="center">165712039</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_101_FPN_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_s1x.yaml">R_101_FPN_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.428</td>
<td align="center">0.079</td>
<td align="center">5.8</td>
<td align="center">62.3</td>
<td align="center">64.5</td>
<td align="center">66.4</td>
<td align="center">165712084</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x/165712084/model_final_c6ab63.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x/165712084/metrics.json">metrics</a></td>
</tr>
</tbody></table>
### Improved Baselines, DeepLabV3 Head
These models use an improved training schedule, Panoptic FPN head from [Kirillov et al, 2019](https://arxiv.org/abs/1901.02446) and DeepLabV3 head from [Chen et al, 2017](https://arxiv.org/abs/1706.05587).
<table><tbody>
<!-- START TABLE -->
<!-- TABLE HEADER -->
<th valign="bottom">Name</th>
<th valign="bottom">lr<br/>sched</th>
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
<th valign="bottom">train<br/>mem<br/>(GB)</th>
<th valign="bottom">box<br/>AP</th>
<th valign="bottom">dp. AP<br/>GPS</th>
<th valign="bottom">dp. AP<br/>GPSm</th>
<th valign="bottom">model id</th>
<th valign="bottom">download</th>
<!-- TABLE BODY -->
<!-- ROW: densepose_rcnn_R_50_FPN_DL_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml">R_50_FPN_DL_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.392</td>
<td align="center">0.070</td>
<td align="center">6.7</td>
<td align="center">61.1</td>
<td align="center">65.6</td>
<td align="center">66.8</td>
<td align="center">165712097</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_s1x/165712097/model_final_0ed407.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_s1x/165712097/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_101_FPN_DL_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml">R_101_FPN_DL_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.478</td>
<td align="center">0.083</td>
<td align="center">7.0</td>
<td align="center">62.3</td>
<td align="center">66.3</td>
<td align="center">67.7</td>
<td align="center">165712116</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_s1x/165712116/model_final_844d15.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_s1x/165712116/metrics.json">metrics</a></td>
</tr>
</tbody></table>
### Baselines with Confidence Estimation
These models perform additional estimation of confidence in regressed UV coodrinates, along the lines of [Neverova et al., 2019](https://papers.nips.cc/paper/8378-correlated-uncertainty-for-learning-dense-correspondences-from-noisy-labels).
<table><tbody>
<!-- START TABLE -->
<!-- TABLE HEADER -->
<th valign="bottom">Name</th>
<th valign="bottom">lr<br/>sched</th>
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
<th valign="bottom">train<br/>mem<br/>(GB)</th>
<th valign="bottom">box<br/>AP</th>
<th valign="bottom">dp. AP<br/>GPS</th>
<th valign="bottom">dp. AP<br/>GPSm</th>
<th valign="bottom">model id</th>
<th valign="bottom">download</th>
<!-- TABLE BODY -->
<!-- ROW: densepose_rcnn_R_50_FPN_WC1_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml">R_50_FPN_WC1_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.353</td>
<td align="center">0.064</td>
<td align="center">4.6</td>
<td align="center">60.5</td>
<td align="center">64.2</td>
<td align="center">65.6</td>
<td align="center">173862049</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC1_s1x/173862049/model_final_289019.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC1_s1x/173862049/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_50_FPN_WC2_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml">R_50_FPN_WC2_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.364</td>
<td align="center">0.066</td>
<td align="center">4.8</td>
<td align="center">60.7</td>
<td align="center">64.2</td>
<td align="center">65.7</td>
<td align="center">173861455</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC2_s1x/173861455/model_final_3abe14.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC2_s1x/173861455/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_50_FPN_DL_WC1_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml">R_50_FPN_DL_WC1_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.397</td>
<td align="center">0.068</td>
<td align="center">6.7</td>
<td align="center">61.1</td>
<td align="center">65.8</td>
<td align="center">67.1</td>
<td align="center">173067973</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC1_s1x/173067973/model_final_b1e525.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC1_s1x/173067973/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_50_FPN_DL_WC2_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml">R_50_FPN_DL_WC2_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.410</td>
<td align="center">0.070</td>
<td align="center">6.8</td>
<td align="center">60.8</td>
<td align="center">65.6</td>
<td align="center">66.7</td>
<td align="center">173859335</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC2_s1x/173859335/model_final_60fed4.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC2_s1x/173859335/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_101_FPN_WC1_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml">R_101_FPN_WC1_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.435</td>
<td align="center">0.076</td>
<td align="center">5.7</td>
<td align="center">62.5</td>
<td align="center">64.9</td>
<td align="center">66.5</td>
<td align="center">171402969</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC1_s1x/171402969/model_final_9e47f0.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC1_s1x/171402969/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_101_FPN_WC2_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml">R_101_FPN_WC2_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.450</td>
<td align="center">0.078</td>
<td align="center">5.7</td>
<td align="center">62.3</td>
<td align="center">64.8</td>
<td align="center">66.6</td>
<td align="center">173860702</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC2_s1x/173860702/model_final_5ea023.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC2_s1x/173860702/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_101_FPN_DL_WC1_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml">R_101_FPN_DL_WC1_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.479</td>
<td align="center">0.081</td>
<td align="center">7.9</td>
<td align="center">62.0</td>
<td align="center">66.2</td>
<td align="center">67.4</td>
<td align="center">173858525</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC1_s1x/173858525/model_final_f359f3.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC1_s1x/173858525/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_101_FPN_DL_WC2_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml">R_101_FPN_DL_WC2_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.491</td>
<td align="center">0.082</td>
<td align="center">7.6</td>
<td align="center">61.7</td>
<td align="center">65.9</td>
<td align="center">67.3</td>
<td align="center">173294801</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC2_s1x/173294801/model_final_6e1ed1.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC2_s1x/173294801/metrics.json">metrics</a></td>
</tr>
</tbody></table>
## Old Baselines
It is still possible to use some baselines from [DensePose 1](https://github.com/facebookresearch/DensePose).
Below are evaluation metrics for the baselines recomputed in the current framework:
| Model | bbox AP | AP | AP50 | AP75 | APm |APl |
|-----|-----|-----|--- |--- |--- |--- |
| [`ResNet50_FPN_s1x-e2e`](https://dl.fbaipublicfiles.com/densepose/DensePose_ResNet50_FPN_s1x-e2e.pkl) | 54.673 | 48.894 | 84.963 | 50.717 | 43.132 | 50.433 |
| [`ResNet101_FPN_s1x-e2e`](https://dl.fbaipublicfiles.com/densepose/DensePose_ResNet101_FPN_s1x-e2e.pkl) | 56.032 | 51.088 | 86.250 | 55.057 | 46.542 | 52.563 |
Note: these scores are close, but not strictly equal to the ones reported in the [DensePose 1 Model Zoo](https://github.com/facebookresearch/DensePose/blob/master/MODEL_ZOO.md),
which is due to small incompatibilities between the frameworks.

View File

@@ -0,0 +1,130 @@
# Apply Net
`apply_net` is a tool to print or visualize DensePose results on a set of images.
It has two modes: `dump` to save DensePose model results to a pickle file
and `show` to visualize them on images.
## Dump Mode
The general command form is:
```bash
python apply_net.py dump [-h] [-v] [--output <dump_file>] <config> <model> <input>
```
There are three mandatory arguments:
- `<config>`, configuration file for a given model;
- `<model>`, model file with trained parameters
- `<input>`, input image file name, pattern or folder
One can additionally provide `--output` argument to define the output file name,
which defaults to `output.pkl`.
Examples:
1. Dump results of a DensePose model with ResNet-50 FPN backbone for images
in a folder `images` to file `dump.pkl`:
```bash
python apply_net.py dump configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl images --output dump.pkl -v
```
2. Dump results of a DensePose model with ResNet-50 FPN backbone for images
with file name matching a pattern `image*.jpg` to file `results.pkl`:
```bash
python apply_net.py dump configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl "image*.jpg" --output results.pkl -v
```
If you want to load the pickle file generated by the above command:
```
# make sure DensePose is in your PYTHONPATH, or use the following line to add it:
sys.path.append("/your_detectron2_path/detectron2_repo/projects/DensePose/")
f = open('/your_result_path/results.pkl', 'rb')
data = pickle.load(f)
```
The file `results.pkl` contains the list of results per image, for each image the result is a dictionary:
```
data: [{'file_name': '/your_path/image1.jpg',
'scores': tensor([0.9884]),
'pred_boxes_XYXY': tensor([[ 69.6114, 0.0000, 706.9797, 706.0000]]),
'pred_densepose': <densepose.structures.DensePoseResult object at 0x7f791b312470>},
{'file_name': '/your_path/image2.jpg',
'scores': tensor([0.9999, 0.5373, 0.3991]),
'pred_boxes_XYXY': tensor([[ 59.5734, 7.7535, 579.9311, 932.3619],
[612.9418, 686.1254, 612.9999, 704.6053],
[164.5081, 407.4034, 598.3944, 920.4266]]),
'pred_densepose': <densepose.structures.DensePoseResult object at 0x7f7071229be0>}]
```
We can use the following code, to parse the outputs of the first
detected instance on the first image.
```
img_id, instance_id = 0, 0 # Look at the first image and the first detected instance
bbox_xyxy = data[img_id]['pred_boxes_XYXY'][instance_id]
result_encoded = data[img_id]['pred_densepose'].results[instance_id]
iuv_arr = DensePoseResult.decode_png_data(*result_encoded)
```
The array `bbox_xyxy` contains (x0, y0, x1, y1) of the bounding box.
The shape of `iuv_arr` is `[3, H, W]`, where (H, W) is the shape of the bounding box.
- `iuv_arr[0,:,:]`: The patch index of image points, indicating which of the 24 surface patches the point is on.
- `iuv_arr[1,:,:]`: The U-coordinate value of image points.
- `iuv_arr[2,:,:]`: The V-coordinate value of image points.
## Visualization Mode
The general command form is:
```bash
python apply_net.py show [-h] [-v] [--min_score <score>] [--nms_thresh <threshold>] [--output <image_file>] <config> <model> <input> <visualizations>
```
There are four mandatory arguments:
- `<config>`, configuration file for a given model;
- `<model>`, model file with trained parameters
- `<input>`, input image file name, pattern or folder
- `<visualizations>`, visualizations specifier; currently available visualizations are:
* `bbox` - bounding boxes of detected persons;
* `dp_segm` - segmentation masks for detected persons;
* `dp_u` - each body part is colored according to the estimated values of the
U coordinate in part parameterization;
* `dp_v` - each body part is colored according to the estimated values of the
V coordinate in part parameterization;
* `dp_contour` - plots contours with color-coded U and V coordinates
One can additionally provide the following optional arguments:
- `--min_score` to only show detections with sufficient scores that are not lower than provided value
- `--nms_thresh` to additionally apply non-maximum suppression to detections at a given threshold
- `--output` to define visualization file name template, which defaults to `output.png`.
To distinguish output file names for different images, the tool appends 1-based entry index,
e.g. output.0001.png, output.0002.png, etc...
The following examples show how to output results of a DensePose model
with ResNet-50 FPN backbone using different visualizations for image `image.jpg`:
1. Show bounding box and segmentation:
```bash
python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg bbox,dp_segm -v
```
![Bounding Box + Segmentation Visualization](images/res_bbox_dp_segm.jpg)
2. Show bounding box and estimated U coordinates for body parts:
```bash
python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg bbox,dp_u -v
```
![Bounding Box + U Coordinate Visualization](images/res_bbox_dp_u.jpg)
3. Show bounding box and estimated V coordinates for body parts:
```bash
python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg bbox,dp_v -v
```
![Bounding Box + V Coordinate Visualization](images/res_bbox_dp_v.jpg)
4. Show bounding box and estimated U and V coordinates via contour plots:
```bash
python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg dp_contour,bbox -v
```
![Bounding Box + Contour Visualization](images/res_bbox_dp_contour.jpg)

View File

@@ -0,0 +1,105 @@
# Query Dataset
`query_db` is a tool to print or visualize DensePose data from a dataset.
It has two modes: `print` and `show` to output dataset entries to standard
output or to visualize them on images.
## Print Mode
The general command form is:
```bash
python query_db.py print [-h] [-v] [--max-entries N] <dataset> <selector>
```
There are two mandatory arguments:
- `<dataset>`, DensePose dataset specification, from which to select
the entries (e.g. `densepose_coco_2014_train`).
- `<selector>`, dataset entry selector which can be a single specification,
or a comma-separated list of specifications of the form
`field[:type]=value` for exact match with the value
or `field[:type]=min-max` for a range of values
One can additionally limit the maximum number of entries to output
by providing `--max-entries` argument.
Examples:
1. Output at most 10 first entries from the `densepose_coco_2014_train` dataset:
```bash
python query_db.py print densepose_coco_2014_train \* --max-entries 10 -v
```
2. Output all entries with `file_name` equal to `COCO_train2014_000000000036.jpg`:
```bash
python query_db.py print densepose_coco_2014_train file_name=COCO_train2014_000000000036.jpg -v
```
3. Output all entries with `image_id` between 36 and 156:
```bash
python query_db.py print densepose_coco_2014_train image_id:int=36-156 -v
```
## Visualization Mode
The general command form is:
```bash
python query_db.py show [-h] [-v] [--max-entries N] [--output <image_file>] <dataset> <selector> <visualizations>
```
There are three mandatory arguments:
- `<dataset>`, DensePose dataset specification, from which to select
the entries (e.g. `densepose_coco_2014_train`).
- `<selector>`, dataset entry selector which can be a single specification,
or a comma-separated list of specifications of the form
`field[:type]=value` for exact match with the value
or `field[:type]=min-max` for a range of values
- `<visualizations>`, visualizations specifier; currently available visualizations are:
* `bbox` - bounding boxes of annotated persons;
* `dp_i` - annotated points colored according to the containing part;
* `dp_pts` - annotated points in green color;
* `dp_segm` - segmentation masks for annotated persons;
* `dp_u` - annotated points colored according to their U coordinate in part parameterization;
* `dp_v` - annotated points colored according to their V coordinate in part parameterization;
One can additionally provide one of the two optional arguments:
- `--max_entries` to limit the maximum number of entries to visualize
- `--output` to provide visualization file name template, which defaults
to `output.png`. To distinguish file names for different dataset
entries, the tool appends 1-based entry index to the output file name,
e.g. output.0001.png, output.0002.png, etc.
The following examples show how to output different visualizations for image with `id = 322`
from `densepose_coco_2014_train` dataset:
1. Show bounding box and segmentation:
```bash
python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_segm -v
```
![Bounding Box + Segmentation Visualization](images/vis_bbox_dp_segm.jpg)
2. Show bounding box and points colored according to the containing part:
```bash
python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_i -v
```
![Bounding Box + Point Label Visualization](images/vis_bbox_dp_i.jpg)
3. Show bounding box and annotated points in green color:
```bash
python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_segm -v
```
![Bounding Box + Point Visualization](images/vis_bbox_dp_pts.jpg)
4. Show bounding box and annotated points colored according to their U coordinate in part parameterization:
```bash
python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_u -v
```
![Bounding Box + Point U Visualization](images/vis_bbox_dp_u.jpg)
5. Show bounding box and annotated points colored according to their V coordinate in part parameterization:
```bash
python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_v -v
```
![Bounding Box + Point V Visualization](images/vis_bbox_dp_v.jpg)

View File

@@ -0,0 +1,250 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import argparse
import logging
import os
import sys
from timeit import default_timer as timer
from typing import Any, ClassVar, Dict, List
import torch
from fvcore.common.file_io import PathManager
from detectron2.data.catalog import DatasetCatalog
from detectron2.utils.logger import setup_logger
from densepose.data.structures import DensePoseDataRelative
from densepose.utils.dbhelper import EntrySelector
from densepose.utils.logger import verbosity_to_level
from densepose.vis.base import CompoundVisualizer
from densepose.vis.bounding_box import BoundingBoxVisualizer
from densepose.vis.densepose import (
DensePoseDataCoarseSegmentationVisualizer,
DensePoseDataPointsIVisualizer,
DensePoseDataPointsUVisualizer,
DensePoseDataPointsVisualizer,
DensePoseDataPointsVVisualizer,
)
DOC = """Query DB - a tool to print / visualize data from a database
"""
LOGGER_NAME = "query_db"
logger = logging.getLogger(LOGGER_NAME)
_ACTION_REGISTRY: Dict[str, "Action"] = {}
class Action(object):
@classmethod
def add_arguments(cls: type, parser: argparse.ArgumentParser):
parser.add_argument(
"-v",
"--verbosity",
action="count",
help="Verbose mode. Multiple -v options increase the verbosity.",
)
def register_action(cls: type):
"""
Decorator for action classes to automate action registration
"""
global _ACTION_REGISTRY
_ACTION_REGISTRY[cls.COMMAND] = cls
return cls
class EntrywiseAction(Action):
@classmethod
def add_arguments(cls: type, parser: argparse.ArgumentParser):
super(EntrywiseAction, cls).add_arguments(parser)
parser.add_argument(
"dataset", metavar="<dataset>", help="Dataset name (e.g. densepose_coco_2014_train)"
)
parser.add_argument(
"selector",
metavar="<selector>",
help="Dataset entry selector in the form field1[:type]=value1[,"
"field2[:type]=value_min-value_max...] which selects all "
"entries from the dataset that satisfy the constraints",
)
parser.add_argument(
"--max-entries", metavar="N", help="Maximum number of entries to process", type=int
)
@classmethod
def execute(cls: type, args: argparse.Namespace):
dataset = setup_dataset(args.dataset)
entry_selector = EntrySelector.from_string(args.selector)
context = cls.create_context(args)
if args.max_entries is not None:
for _, entry in zip(range(args.max_entries), dataset):
if entry_selector(entry):
cls.execute_on_entry(entry, context)
else:
for entry in dataset:
if entry_selector(entry):
cls.execute_on_entry(entry, context)
@classmethod
def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]:
context = {}
return context
@register_action
class PrintAction(EntrywiseAction):
"""
Print action that outputs selected entries to stdout
"""
COMMAND: ClassVar[str] = "print"
@classmethod
def add_parser(cls: type, subparsers: argparse._SubParsersAction):
parser = subparsers.add_parser(cls.COMMAND, help="Output selected entries to stdout. ")
cls.add_arguments(parser)
parser.set_defaults(func=cls.execute)
@classmethod
def add_arguments(cls: type, parser: argparse.ArgumentParser):
super(PrintAction, cls).add_arguments(parser)
@classmethod
def execute_on_entry(cls: type, entry: Dict[str, Any], context: Dict[str, Any]):
import pprint
printer = pprint.PrettyPrinter(indent=2, width=200, compact=True)
printer.pprint(entry)
@register_action
class ShowAction(EntrywiseAction):
"""
Show action that visualizes selected entries on an image
"""
COMMAND: ClassVar[str] = "show"
VISUALIZERS: ClassVar[Dict[str, object]] = {
"dp_segm": DensePoseDataCoarseSegmentationVisualizer(),
"dp_i": DensePoseDataPointsIVisualizer(),
"dp_u": DensePoseDataPointsUVisualizer(),
"dp_v": DensePoseDataPointsVVisualizer(),
"dp_pts": DensePoseDataPointsVisualizer(),
"bbox": BoundingBoxVisualizer(),
}
@classmethod
def add_parser(cls: type, subparsers: argparse._SubParsersAction):
parser = subparsers.add_parser(cls.COMMAND, help="Visualize selected entries")
cls.add_arguments(parser)
parser.set_defaults(func=cls.execute)
@classmethod
def add_arguments(cls: type, parser: argparse.ArgumentParser):
super(ShowAction, cls).add_arguments(parser)
parser.add_argument(
"visualizations",
metavar="<visualizations>",
help="Comma separated list of visualizations, possible values: "
"[{}]".format(",".join(sorted(cls.VISUALIZERS.keys()))),
)
parser.add_argument(
"--output",
metavar="<image_file>",
default="output.png",
help="File name to save output to",
)
@classmethod
def execute_on_entry(cls: type, entry: Dict[str, Any], context: Dict[str, Any]):
import cv2
import numpy as np
image_fpath = PathManager.get_local_path(entry["file_name"])
image = cv2.imread(image_fpath, cv2.IMREAD_GRAYSCALE)
image = np.tile(image[:, :, np.newaxis], [1, 1, 3])
datas = cls._extract_data_for_visualizers_from_entry(context["vis_specs"], entry)
visualizer = context["visualizer"]
image_vis = visualizer.visualize(image, datas)
entry_idx = context["entry_idx"] + 1
out_fname = cls._get_out_fname(entry_idx, context["out_fname"])
cv2.imwrite(out_fname, image_vis)
logger.info(f"Output saved to {out_fname}")
context["entry_idx"] += 1
@classmethod
def _get_out_fname(cls: type, entry_idx: int, fname_base: str):
base, ext = os.path.splitext(fname_base)
return base + ".{0:04d}".format(entry_idx) + ext
@classmethod
def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]:
vis_specs = args.visualizations.split(",")
visualizers = []
for vis_spec in vis_specs:
vis = cls.VISUALIZERS[vis_spec]
visualizers.append(vis)
context = {
"vis_specs": vis_specs,
"visualizer": CompoundVisualizer(visualizers),
"out_fname": args.output,
"entry_idx": 0,
}
return context
@classmethod
def _extract_data_for_visualizers_from_entry(
cls: type, vis_specs: List[str], entry: Dict[str, Any]
):
dp_list = []
bbox_list = []
for annotation in entry["annotations"]:
is_valid, _ = DensePoseDataRelative.validate_annotation(annotation)
if not is_valid:
continue
bbox = torch.as_tensor(annotation["bbox"])
bbox_list.append(bbox)
dp_data = DensePoseDataRelative(annotation)
dp_list.append(dp_data)
datas = []
for vis_spec in vis_specs:
datas.append(bbox_list if "bbox" == vis_spec else (bbox_list, dp_list))
return datas
def setup_dataset(dataset_name):
logger.info("Loading dataset {}".format(dataset_name))
start = timer()
dataset = DatasetCatalog.get(dataset_name)
stop = timer()
logger.info("Loaded dataset {} in {:.3f}s".format(dataset_name, stop - start))
return dataset
def create_argument_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description=DOC,
formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120),
)
parser.set_defaults(func=lambda _: parser.print_help(sys.stdout))
subparsers = parser.add_subparsers(title="Actions")
for _, action in _ACTION_REGISTRY.items():
action.add_parser(subparsers)
return parser
def main():
parser = create_argument_parser()
args = parser.parse_args()
verbosity = args.verbosity if hasattr(args, "verbosity") else None
global logger
logger = setup_logger(name=LOGGER_NAME)
logger.setLevel(verbosity_to_level(verbosity))
args.func(args)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,110 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import os
import torch
from detectron2.config import get_cfg
from detectron2.engine import default_setup
from detectron2.modeling import build_model
from densepose import add_dataset_category_config, add_densepose_config
_BASE_CONFIG_DIR = "configs"
_EVOLUTION_CONFIG_SUB_DIR = "evolution"
_QUICK_SCHEDULES_CONFIG_SUB_DIR = "quick_schedules"
_BASE_CONFIG_FILE_PREFIX = "Base-"
_CONFIG_FILE_EXT = ".yaml"
def _get_base_config_dir():
"""
Return the base directory for configurations
"""
return os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", _BASE_CONFIG_DIR)
def _get_evolution_config_dir():
"""
Return the base directory for evolution configurations
"""
return os.path.join(_get_base_config_dir(), _EVOLUTION_CONFIG_SUB_DIR)
def _get_quick_schedules_config_dir():
"""
Return the base directory for quick schedules configurations
"""
return os.path.join(_get_base_config_dir(), _QUICK_SCHEDULES_CONFIG_SUB_DIR)
def _collect_config_files(config_dir):
"""
Collect all configuration files (i.e. densepose_*.yaml) directly in the specified directory
"""
start = _get_base_config_dir()
results = []
for entry in os.listdir(config_dir):
path = os.path.join(config_dir, entry)
if not os.path.isfile(path):
continue
_, ext = os.path.splitext(entry)
if ext != _CONFIG_FILE_EXT:
continue
if entry.startswith(_BASE_CONFIG_FILE_PREFIX):
continue
config_file = os.path.relpath(path, start)
results.append(config_file)
return results
def get_config_files():
"""
Get all the configuration files (relative to the base configuration directory)
"""
return _collect_config_files(_get_base_config_dir())
def get_evolution_config_files():
"""
Get all the evolution configuration files (relative to the base configuration directory)
"""
return _collect_config_files(_get_evolution_config_dir())
def get_quick_schedules_config_files():
"""
Get all the quick schedules configuration files (relative to the base configuration directory)
"""
return _collect_config_files(_get_quick_schedules_config_dir())
def _get_model_config(config_file):
"""
Load and return the configuration from the specified file (relative to the base configuration
directory)
"""
cfg = get_cfg()
add_dataset_category_config(cfg)
add_densepose_config(cfg)
path = os.path.join(_get_base_config_dir(), config_file)
cfg.merge_from_file(path)
if not torch.cuda.is_available():
cfg.MODEL_DEVICE = "cpu"
return cfg
def get_model(config_file):
"""
Get the model from the specified file (relative to the base configuration directory)
"""
cfg = _get_model_config(config_file)
return build_model(cfg)
def setup(config_file):
"""
Setup the configuration from the specified file (relative to the base configuration directory)
"""
cfg = _get_model_config(config_file)
cfg.freeze()
default_setup(cfg, {})

View File

@@ -0,0 +1,43 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import unittest
import torch
from detectron2.structures import BitMasks, Boxes, Instances
from .common import get_model
# TODO(plabatut): Modularize detectron2 tests and re-use
def make_model_inputs(image, instances=None):
if instances is None:
return {"image": image}
return {"image": image, "instances": instances}
def make_empty_instances(h, w):
instances = Instances((h, w))
instances.gt_boxes = Boxes(torch.rand(0, 4))
instances.gt_classes = torch.tensor([]).to(dtype=torch.int64)
instances.gt_masks = BitMasks(torch.rand(0, h, w))
return instances
class ModelE2ETest(unittest.TestCase):
CONFIG_PATH = ""
def setUp(self):
self.model = get_model(self.CONFIG_PATH)
def _test_eval(self, sizes):
inputs = [make_model_inputs(torch.rand(3, size[0], size[1])) for size in sizes]
self.model.eval()
self.model(inputs)
class DensePoseRCNNE2ETest(ModelE2ETest):
CONFIG_PATH = "densepose_rcnn_R_101_FPN_s1x.yaml"
def test_empty_data(self):
self._test_eval([(200, 250), (200, 249)])

View File

@@ -0,0 +1,30 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import unittest
from .common import (
get_config_files,
get_evolution_config_files,
get_quick_schedules_config_files,
setup,
)
class TestSetup(unittest.TestCase):
def _test_setup(self, config_file):
setup(config_file)
def test_setup_configs(self):
config_files = get_config_files()
for config_file in config_files:
self._test_setup(config_file)
def test_setup_evolution_configs(self):
config_files = get_evolution_config_files()
for config_file in config_files:
self._test_setup(config_file)
def test_setup_quick_schedules_configs(self):
config_files = get_quick_schedules_config_files()
for config_file in config_files:
self._test_setup(config_file)

View File

@@ -0,0 +1,25 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import unittest
from densepose.data.structures import normalized_coords_transform
class TestStructures(unittest.TestCase):
def test_normalized_coords_transform(self):
bbox = (32, 24, 288, 216)
x0, y0, w, h = bbox
xmin, ymin, xmax, ymax = x0, y0, x0 + w, y0 + h
f = normalized_coords_transform(*bbox)
# Top-left
expected_p, actual_p = (-1, -1), f((xmin, ymin))
self.assertEqual(expected_p, actual_p)
# Top-right
expected_p, actual_p = (1, -1), f((xmax, ymin))
self.assertEqual(expected_p, actual_p)
# Bottom-left
expected_p, actual_p = (-1, 1), f((xmin, ymax))
self.assertEqual(expected_p, actual_p)
# Bottom-right
expected_p, actual_p = (1, 1), f((xmax, ymax))
self.assertEqual(expected_p, actual_p)

View File

@@ -0,0 +1,122 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
DensePose Training Script.
This script is similar to the training script in detectron2/tools.
It is an example of how a user might use detectron2 for a new project.
"""
import logging
import os
from collections import OrderedDict
from fvcore.common.file_io import PathManager
import detectron2.utils.comm as comm
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import CfgNode, get_cfg
from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch
from detectron2.evaluation import COCOEvaluator, DatasetEvaluators, verify_results
from detectron2.modeling import DatasetMapperTTA
from detectron2.utils.logger import setup_logger
from densepose import (
DensePoseCOCOEvaluator,
DensePoseGeneralizedRCNNWithTTA,
add_dataset_category_config,
add_densepose_config,
load_from_cfg,
)
from densepose.data import DatasetMapper, build_detection_test_loader, build_detection_train_loader
class Trainer(DefaultTrainer):
@classmethod
def build_evaluator(cls, cfg: CfgNode, dataset_name, output_folder=None):
if output_folder is None:
output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
evaluators = [COCOEvaluator(dataset_name, cfg, True, output_folder)]
if cfg.MODEL.DENSEPOSE_ON:
evaluators.append(DensePoseCOCOEvaluator(dataset_name, True, output_folder))
return DatasetEvaluators(evaluators)
@classmethod
def build_test_loader(cls, cfg: CfgNode, dataset_name):
return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))
@classmethod
def build_train_loader(cls, cfg: CfgNode):
return build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True))
@classmethod
def test_with_TTA(cls, cfg: CfgNode, model):
logger = logging.getLogger("detectron2.trainer")
# In the end of training, run an evaluation with TTA
# Only support some R-CNN models.
logger.info("Running inference with test-time augmentation ...")
transform_data = load_from_cfg(cfg)
model = DensePoseGeneralizedRCNNWithTTA(cfg, model, transform_data, DatasetMapperTTA(cfg))
evaluators = [
cls.build_evaluator(
cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
)
for name in cfg.DATASETS.TEST
]
res = cls.test(cfg, model, evaluators)
res = OrderedDict({k + "_TTA": v for k, v in res.items()})
return res
def setup(args):
cfg = get_cfg()
add_dataset_category_config(cfg)
add_densepose_config(cfg)
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
default_setup(cfg, args)
# Setup logger for "densepose" module
setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="densepose")
return cfg
def main(args):
cfg = setup(args)
# disable strict kwargs checking: allow one to specify path handle
# hints through kwargs, like timeout in DP evaluation
PathManager.set_strict_kwargs_checking(False)
if args.eval_only:
model = Trainer.build_model(cfg)
DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
cfg.MODEL.WEIGHTS, resume=args.resume
)
res = Trainer.test(cfg, model)
if cfg.TEST.AUG.ENABLED:
res.update(Trainer.test_with_TTA(cfg, model))
if comm.is_main_process():
verify_results(cfg, res)
return res
trainer = Trainer(cfg)
trainer.resume_or_load(resume=args.resume)
if cfg.TEST.AUG.ENABLED:
trainer.register_hooks(
[hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))]
)
return trainer.train()
if __name__ == "__main__":
args = default_argument_parser().parse_args()
print("Command Line Args:", args)
launch(
main,
args.num_gpus,
num_machines=args.num_machines,
machine_rank=args.machine_rank,
dist_url=args.dist_url,
args=(args,),
)