Add at new repo again
This commit is contained in:
@@ -0,0 +1,54 @@
|
||||
# DensePose in Detectron2
|
||||
**Dense Human Pose Estimation In The Wild**
|
||||
|
||||
_Rıza Alp Güler, Natalia Neverova, Iasonas Kokkinos_
|
||||
|
||||
[[`densepose.org`](https://densepose.org)] [[`arXiv`](https://arxiv.org/abs/1802.00434)] [[`BibTeX`](#CitingDensePose)]
|
||||
|
||||
Dense human pose estimation aims at mapping all human pixels of an RGB image to the 3D surface of the human body.
|
||||
|
||||
<div align="center">
|
||||
<img src="https://drive.google.com/uc?export=view&id=1qfSOkpueo1kVZbXOuQJJhyagKjMgepsz" width="700px" />
|
||||
</div>
|
||||
|
||||
In this repository, we provide the code to train and evaluate DensePose-RCNN. We also provide tools to visualize
|
||||
DensePose annotation and results.
|
||||
|
||||
# Quick Start
|
||||
|
||||
See [ Getting Started ](doc/GETTING_STARTED.md)
|
||||
|
||||
# Model Zoo and Baselines
|
||||
|
||||
We provide a number of baseline results and trained models available for download. See [Model Zoo](doc/MODEL_ZOO.md) for details.
|
||||
|
||||
# License
|
||||
|
||||
Detectron2 is released under the [Apache 2.0 license](../../LICENSE)
|
||||
|
||||
## <a name="CitingDensePose"></a>Citing DensePose
|
||||
|
||||
If you use DensePose, please take the references from the following BibTeX entries:
|
||||
|
||||
For DensePose with estimated confidences:
|
||||
|
||||
```
|
||||
@InProceedings{Neverova2019DensePoseConfidences,
|
||||
title = {Correlated Uncertainty for Learning Dense Correspondences from Noisy Labels},
|
||||
author = {Neverova, Natalia and Novotny, David and Vedaldi, Andrea},
|
||||
journal = {Advances in Neural Information Processing Systems},
|
||||
year = {2019},
|
||||
}
|
||||
```
|
||||
|
||||
For the original DensePose:
|
||||
|
||||
```
|
||||
@InProceedings{Guler2018DensePose,
|
||||
title={DensePose: Dense Human Pose Estimation In The Wild},
|
||||
author={R\{i}za Alp G\"uler, Natalia Neverova, Iasonas Kokkinos},
|
||||
journal={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||||
year={2018}
|
||||
}
|
||||
```
|
||||
|
@@ -0,0 +1,318 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
import sys
|
||||
from typing import Any, ClassVar, Dict, List
|
||||
import torch
|
||||
|
||||
from detectron2.config import get_cfg
|
||||
from detectron2.data.detection_utils import read_image
|
||||
from detectron2.engine.defaults import DefaultPredictor
|
||||
from detectron2.structures.boxes import BoxMode
|
||||
from detectron2.structures.instances import Instances
|
||||
from detectron2.utils.logger import setup_logger
|
||||
|
||||
from densepose import add_densepose_config
|
||||
from densepose.utils.logger import verbosity_to_level
|
||||
from densepose.vis.base import CompoundVisualizer
|
||||
from densepose.vis.bounding_box import ScoredBoundingBoxVisualizer
|
||||
from densepose.vis.densepose import (
|
||||
DensePoseResultsContourVisualizer,
|
||||
DensePoseResultsFineSegmentationVisualizer,
|
||||
DensePoseResultsUVisualizer,
|
||||
DensePoseResultsVVisualizer,
|
||||
)
|
||||
from densepose.vis.extractor import CompoundExtractor, create_extractor
|
||||
|
||||
DOC = """Apply Net - a tool to print / visualize DensePose results
|
||||
"""
|
||||
|
||||
LOGGER_NAME = "apply_net"
|
||||
logger = logging.getLogger(LOGGER_NAME)
|
||||
|
||||
_ACTION_REGISTRY: Dict[str, "Action"] = {}
|
||||
|
||||
|
||||
class Action(object):
|
||||
@classmethod
|
||||
def add_arguments(cls: type, parser: argparse.ArgumentParser):
|
||||
parser.add_argument(
|
||||
"-v",
|
||||
"--verbosity",
|
||||
action="count",
|
||||
help="Verbose mode. Multiple -v options increase the verbosity.",
|
||||
)
|
||||
|
||||
|
||||
def register_action(cls: type):
|
||||
"""
|
||||
Decorator for action classes to automate action registration
|
||||
"""
|
||||
global _ACTION_REGISTRY
|
||||
_ACTION_REGISTRY[cls.COMMAND] = cls
|
||||
return cls
|
||||
|
||||
|
||||
class InferenceAction(Action):
|
||||
@classmethod
|
||||
def add_arguments(cls: type, parser: argparse.ArgumentParser):
|
||||
super(InferenceAction, cls).add_arguments(parser)
|
||||
parser.add_argument("cfg", metavar="<config>", help="Config file")
|
||||
parser.add_argument("model", metavar="<model>", help="Model file")
|
||||
parser.add_argument("input", metavar="<input>", help="Input data")
|
||||
parser.add_argument(
|
||||
"--opts",
|
||||
help="Modify config options using the command-line 'KEY VALUE' pairs",
|
||||
default=[],
|
||||
nargs=argparse.REMAINDER,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls: type, args: argparse.Namespace):
|
||||
logger.info(f"Loading config from {args.cfg}")
|
||||
opts = []
|
||||
cfg = cls.setup_config(args.cfg, args.model, args, opts)
|
||||
logger.info(f"Loading model from {args.model}")
|
||||
predictor = DefaultPredictor(cfg)
|
||||
logger.info(f"Loading data from {args.input}")
|
||||
file_list = cls._get_input_file_list(args.input)
|
||||
if len(file_list) == 0:
|
||||
logger.warning(f"No input images for {args.input}")
|
||||
return
|
||||
context = cls.create_context(args)
|
||||
for file_name in file_list:
|
||||
img = read_image(file_name, format="BGR") # predictor expects BGR image.
|
||||
with torch.no_grad():
|
||||
outputs = predictor(img)["instances"]
|
||||
cls.execute_on_outputs(context, {"file_name": file_name, "image": img}, outputs)
|
||||
cls.postexecute(context)
|
||||
|
||||
@classmethod
|
||||
def setup_config(
|
||||
cls: type, config_fpath: str, model_fpath: str, args: argparse.Namespace, opts: List[str]
|
||||
):
|
||||
cfg = get_cfg()
|
||||
add_densepose_config(cfg)
|
||||
cfg.merge_from_file(config_fpath)
|
||||
cfg.merge_from_list(args.opts)
|
||||
if opts:
|
||||
cfg.merge_from_list(opts)
|
||||
cfg.MODEL.WEIGHTS = model_fpath
|
||||
cfg.freeze()
|
||||
return cfg
|
||||
|
||||
@classmethod
|
||||
def _get_input_file_list(cls: type, input_spec: str):
|
||||
if os.path.isdir(input_spec):
|
||||
file_list = [
|
||||
os.path.join(input_spec, fname)
|
||||
for fname in os.listdir(input_spec)
|
||||
if os.path.isfile(os.path.join(input_spec, fname))
|
||||
]
|
||||
elif os.path.isfile(input_spec):
|
||||
file_list = [input_spec]
|
||||
else:
|
||||
file_list = glob.glob(input_spec)
|
||||
return file_list
|
||||
|
||||
|
||||
@register_action
|
||||
class DumpAction(InferenceAction):
|
||||
"""
|
||||
Dump action that outputs results to a pickle file
|
||||
"""
|
||||
|
||||
COMMAND: ClassVar[str] = "dump"
|
||||
|
||||
@classmethod
|
||||
def add_parser(cls: type, subparsers: argparse._SubParsersAction):
|
||||
parser = subparsers.add_parser(cls.COMMAND, help="Dump model outputs to a file.")
|
||||
cls.add_arguments(parser)
|
||||
parser.set_defaults(func=cls.execute)
|
||||
|
||||
@classmethod
|
||||
def add_arguments(cls: type, parser: argparse.ArgumentParser):
|
||||
super(DumpAction, cls).add_arguments(parser)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
metavar="<dump_file>",
|
||||
default="results.pkl",
|
||||
help="File name to save dump to",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute_on_outputs(
|
||||
cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances
|
||||
):
|
||||
image_fpath = entry["file_name"]
|
||||
logger.info(f"Processing {image_fpath}")
|
||||
result = {"file_name": image_fpath}
|
||||
if outputs.has("scores"):
|
||||
result["scores"] = outputs.get("scores").cpu()
|
||||
if outputs.has("pred_boxes"):
|
||||
result["pred_boxes_XYXY"] = outputs.get("pred_boxes").tensor.cpu()
|
||||
if outputs.has("pred_densepose"):
|
||||
boxes_XYWH = BoxMode.convert(
|
||||
result["pred_boxes_XYXY"], BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
|
||||
)
|
||||
result["pred_densepose"] = outputs.get("pred_densepose").to_result(boxes_XYWH)
|
||||
context["results"].append(result)
|
||||
|
||||
@classmethod
|
||||
def create_context(cls: type, args: argparse.Namespace):
|
||||
context = {"results": [], "out_fname": args.output}
|
||||
return context
|
||||
|
||||
@classmethod
|
||||
def postexecute(cls: type, context: Dict[str, Any]):
|
||||
out_fname = context["out_fname"]
|
||||
out_dir = os.path.dirname(out_fname)
|
||||
if len(out_dir) > 0 and not os.path.exists(out_dir):
|
||||
os.makedirs(out_dir)
|
||||
with open(out_fname, "wb") as hFile:
|
||||
pickle.dump(context["results"], hFile)
|
||||
logger.info(f"Output saved to {out_fname}")
|
||||
|
||||
|
||||
@register_action
|
||||
class ShowAction(InferenceAction):
|
||||
"""
|
||||
Show action that visualizes selected entries on an image
|
||||
"""
|
||||
|
||||
COMMAND: ClassVar[str] = "show"
|
||||
VISUALIZERS: ClassVar[Dict[str, object]] = {
|
||||
"dp_contour": DensePoseResultsContourVisualizer,
|
||||
"dp_segm": DensePoseResultsFineSegmentationVisualizer,
|
||||
"dp_u": DensePoseResultsUVisualizer,
|
||||
"dp_v": DensePoseResultsVVisualizer,
|
||||
"bbox": ScoredBoundingBoxVisualizer,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def add_parser(cls: type, subparsers: argparse._SubParsersAction):
|
||||
parser = subparsers.add_parser(cls.COMMAND, help="Visualize selected entries")
|
||||
cls.add_arguments(parser)
|
||||
parser.set_defaults(func=cls.execute)
|
||||
|
||||
@classmethod
|
||||
def add_arguments(cls: type, parser: argparse.ArgumentParser):
|
||||
super(ShowAction, cls).add_arguments(parser)
|
||||
parser.add_argument(
|
||||
"visualizations",
|
||||
metavar="<visualizations>",
|
||||
help="Comma separated list of visualizations, possible values: "
|
||||
"[{}]".format(",".join(sorted(cls.VISUALIZERS.keys()))),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--min_score",
|
||||
metavar="<score>",
|
||||
default=0.8,
|
||||
type=float,
|
||||
help="Minimum detection score to visualize",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--nms_thresh", metavar="<threshold>", default=None, type=float, help="NMS threshold"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
metavar="<image_file>",
|
||||
default="outputres.png",
|
||||
help="File name to save output to",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def setup_config(
|
||||
cls: type, config_fpath: str, model_fpath: str, args: argparse.Namespace, opts: List[str]
|
||||
):
|
||||
opts.append("MODEL.ROI_HEADS.SCORE_THRESH_TEST")
|
||||
opts.append(str(args.min_score))
|
||||
if args.nms_thresh is not None:
|
||||
opts.append("MODEL.ROI_HEADS.NMS_THRESH_TEST")
|
||||
opts.append(str(args.nms_thresh))
|
||||
cfg = super(ShowAction, cls).setup_config(config_fpath, model_fpath, args, opts)
|
||||
return cfg
|
||||
|
||||
@classmethod
|
||||
def execute_on_outputs(
|
||||
cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances
|
||||
):
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
visualizer = context["visualizer"]
|
||||
extractor = context["extractor"]
|
||||
image_fpath = entry["file_name"]
|
||||
logger.info(f"Processing {image_fpath}")
|
||||
image = cv2.cvtColor(entry["image"], cv2.COLOR_BGR2GRAY)
|
||||
image = np.tile(image[:, :, np.newaxis], [1, 1, 3])
|
||||
data = extractor(outputs)
|
||||
image_vis = visualizer.visualize(image, data)
|
||||
entry_idx = context["entry_idx"] + 1
|
||||
out_fname = cls._get_out_fname(entry_idx, context["out_fname"])
|
||||
out_dir = os.path.dirname(out_fname)
|
||||
if len(out_dir) > 0 and not os.path.exists(out_dir):
|
||||
os.makedirs(out_dir)
|
||||
cv2.imwrite(out_fname, image_vis)
|
||||
logger.info(f"Output saved to {out_fname}")
|
||||
context["entry_idx"] += 1
|
||||
|
||||
@classmethod
|
||||
def postexecute(cls: type, context: Dict[str, Any]):
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def _get_out_fname(cls: type, entry_idx: int, fname_base: str):
|
||||
base, ext = os.path.splitext(fname_base)
|
||||
return base + ".{0:04d}".format(entry_idx) + ext
|
||||
|
||||
@classmethod
|
||||
def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]:
|
||||
vis_specs = args.visualizations.split(",")
|
||||
visualizers = []
|
||||
extractors = []
|
||||
for vis_spec in vis_specs:
|
||||
vis = cls.VISUALIZERS[vis_spec]()
|
||||
visualizers.append(vis)
|
||||
extractor = create_extractor(vis)
|
||||
extractors.append(extractor)
|
||||
visualizer = CompoundVisualizer(visualizers)
|
||||
extractor = CompoundExtractor(extractors)
|
||||
context = {
|
||||
"extractor": extractor,
|
||||
"visualizer": visualizer,
|
||||
"out_fname": args.output,
|
||||
"entry_idx": 0,
|
||||
}
|
||||
return context
|
||||
|
||||
|
||||
def create_argument_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
description=DOC,
|
||||
formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120),
|
||||
)
|
||||
parser.set_defaults(func=lambda _: parser.print_help(sys.stdout))
|
||||
subparsers = parser.add_subparsers(title="Actions")
|
||||
for _, action in _ACTION_REGISTRY.items():
|
||||
action.add_parser(subparsers)
|
||||
return parser
|
||||
|
||||
|
||||
def main():
|
||||
parser = create_argument_parser()
|
||||
args = parser.parse_args()
|
||||
verbosity = args.verbosity if hasattr(args, "verbosity") else None
|
||||
global logger
|
||||
logger = setup_logger(name=LOGGER_NAME)
|
||||
logger.setLevel(verbosity_to_level(verbosity))
|
||||
args.func(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -0,0 +1,47 @@
|
||||
MODEL:
|
||||
META_ARCHITECTURE: "GeneralizedRCNN"
|
||||
BACKBONE:
|
||||
NAME: "build_resnet_fpn_backbone"
|
||||
RESNETS:
|
||||
OUT_FEATURES: ["res2", "res3", "res4", "res5"]
|
||||
FPN:
|
||||
IN_FEATURES: ["res2", "res3", "res4", "res5"]
|
||||
ANCHOR_GENERATOR:
|
||||
SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
|
||||
ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
|
||||
RPN:
|
||||
IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
|
||||
PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
|
||||
PRE_NMS_TOPK_TEST: 1000 # Per FPN level
|
||||
# Detectron1 uses 2000 proposals per-batch,
|
||||
# (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
|
||||
# which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
|
||||
POST_NMS_TOPK_TRAIN: 1000
|
||||
POST_NMS_TOPK_TEST: 1000
|
||||
|
||||
DENSEPOSE_ON: True
|
||||
ROI_HEADS:
|
||||
NAME: "DensePoseROIHeads"
|
||||
IN_FEATURES: ["p2", "p3", "p4", "p5"]
|
||||
NUM_CLASSES: 1
|
||||
ROI_BOX_HEAD:
|
||||
NAME: "FastRCNNConvFCHead"
|
||||
NUM_FC: 2
|
||||
POOLER_RESOLUTION: 7
|
||||
POOLER_SAMPLING_RATIO: 2
|
||||
POOLER_TYPE: "ROIAlign"
|
||||
ROI_DENSEPOSE_HEAD:
|
||||
NAME: "DensePoseV1ConvXHead"
|
||||
POOLER_TYPE: "ROIAlign"
|
||||
NUM_COARSE_SEGM_CHANNELS: 2
|
||||
DATASETS:
|
||||
TRAIN: ("densepose_coco_2014_train", "densepose_coco_2014_valminusminival")
|
||||
TEST: ("densepose_coco_2014_minival",)
|
||||
SOLVER:
|
||||
IMS_PER_BATCH: 16
|
||||
BASE_LR: 0.01
|
||||
STEPS: (60000, 80000)
|
||||
MAX_ITER: 90000
|
||||
WARMUP_FACTOR: 0.1
|
||||
INPUT:
|
||||
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
|
@@ -0,0 +1,16 @@
|
||||
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
|
||||
RESNETS:
|
||||
DEPTH: 101
|
||||
ROI_DENSEPOSE_HEAD:
|
||||
NAME: "DensePoseDeepLabHead"
|
||||
UV_CONFIDENCE:
|
||||
ENABLED: True
|
||||
TYPE: "iid_iso"
|
||||
POINT_REGRESSION_WEIGHTS: 0.0005
|
||||
SOLVER:
|
||||
CLIP_GRADIENTS:
|
||||
ENABLED: True
|
||||
MAX_ITER: 130000
|
||||
STEPS: (100000, 120000)
|
@@ -0,0 +1,16 @@
|
||||
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
|
||||
RESNETS:
|
||||
DEPTH: 101
|
||||
ROI_DENSEPOSE_HEAD:
|
||||
NAME: "DensePoseDeepLabHead"
|
||||
UV_CONFIDENCE:
|
||||
ENABLED: True
|
||||
TYPE: "indep_aniso"
|
||||
POINT_REGRESSION_WEIGHTS: 0.0005
|
||||
SOLVER:
|
||||
CLIP_GRADIENTS:
|
||||
ENABLED: True
|
||||
MAX_ITER: 130000
|
||||
STEPS: (100000, 120000)
|
@@ -0,0 +1,10 @@
|
||||
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
|
||||
RESNETS:
|
||||
DEPTH: 101
|
||||
ROI_DENSEPOSE_HEAD:
|
||||
NAME: "DensePoseDeepLabHead"
|
||||
SOLVER:
|
||||
MAX_ITER: 130000
|
||||
STEPS: (100000, 120000)
|
@@ -0,0 +1,16 @@
|
||||
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
|
||||
RESNETS:
|
||||
DEPTH: 101
|
||||
ROI_DENSEPOSE_HEAD:
|
||||
UV_CONFIDENCE:
|
||||
ENABLED: True
|
||||
TYPE: "iid_iso"
|
||||
POINT_REGRESSION_WEIGHTS: 0.0005
|
||||
SOLVER:
|
||||
CLIP_GRADIENTS:
|
||||
ENABLED: True
|
||||
MAX_ITER: 130000
|
||||
STEPS: (100000, 120000)
|
||||
WARMUP_FACTOR: 0.025
|
@@ -0,0 +1,16 @@
|
||||
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
|
||||
RESNETS:
|
||||
DEPTH: 101
|
||||
ROI_DENSEPOSE_HEAD:
|
||||
UV_CONFIDENCE:
|
||||
ENABLED: True
|
||||
TYPE: "indep_aniso"
|
||||
POINT_REGRESSION_WEIGHTS: 0.0005
|
||||
SOLVER:
|
||||
CLIP_GRADIENTS:
|
||||
ENABLED: True
|
||||
MAX_ITER: 130000
|
||||
STEPS: (100000, 120000)
|
||||
WARMUP_FACTOR: 0.025
|
@@ -0,0 +1,8 @@
|
||||
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
|
||||
RESNETS:
|
||||
DEPTH: 101
|
||||
SOLVER:
|
||||
MAX_ITER: 130000
|
||||
STEPS: (100000, 120000)
|
@@ -0,0 +1,17 @@
|
||||
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
|
||||
RESNETS:
|
||||
DEPTH: 101
|
||||
ROI_DENSEPOSE_HEAD:
|
||||
NUM_COARSE_SEGM_CHANNELS: 15
|
||||
POOLER_RESOLUTION: 14
|
||||
HEATMAP_SIZE: 56
|
||||
INDEX_WEIGHTS: 2.0
|
||||
PART_WEIGHTS: 0.3
|
||||
POINT_REGRESSION_WEIGHTS: 0.1
|
||||
DECODER_ON: False
|
||||
SOLVER:
|
||||
BASE_LR: 0.002
|
||||
MAX_ITER: 130000
|
||||
STEPS: (100000, 120000)
|
@@ -0,0 +1,16 @@
|
||||
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
RESNETS:
|
||||
DEPTH: 50
|
||||
ROI_DENSEPOSE_HEAD:
|
||||
NAME: "DensePoseDeepLabHead"
|
||||
UV_CONFIDENCE:
|
||||
ENABLED: True
|
||||
TYPE: "iid_iso"
|
||||
POINT_REGRESSION_WEIGHTS: 0.0005
|
||||
SOLVER:
|
||||
CLIP_GRADIENTS:
|
||||
ENABLED: True
|
||||
MAX_ITER: 130000
|
||||
STEPS: (100000, 120000)
|
@@ -0,0 +1,16 @@
|
||||
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
RESNETS:
|
||||
DEPTH: 50
|
||||
ROI_DENSEPOSE_HEAD:
|
||||
NAME: "DensePoseDeepLabHead"
|
||||
UV_CONFIDENCE:
|
||||
ENABLED: True
|
||||
TYPE: "indep_aniso"
|
||||
POINT_REGRESSION_WEIGHTS: 0.0005
|
||||
SOLVER:
|
||||
CLIP_GRADIENTS:
|
||||
ENABLED: True
|
||||
MAX_ITER: 130000
|
||||
STEPS: (100000, 120000)
|
@@ -0,0 +1,10 @@
|
||||
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
RESNETS:
|
||||
DEPTH: 50
|
||||
ROI_DENSEPOSE_HEAD:
|
||||
NAME: "DensePoseDeepLabHead"
|
||||
SOLVER:
|
||||
MAX_ITER: 130000
|
||||
STEPS: (100000, 120000)
|
@@ -0,0 +1,16 @@
|
||||
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
RESNETS:
|
||||
DEPTH: 50
|
||||
ROI_DENSEPOSE_HEAD:
|
||||
UV_CONFIDENCE:
|
||||
ENABLED: True
|
||||
TYPE: "iid_iso"
|
||||
POINT_REGRESSION_WEIGHTS: 0.0005
|
||||
SOLVER:
|
||||
CLIP_GRADIENTS:
|
||||
ENABLED: True
|
||||
MAX_ITER: 130000
|
||||
STEPS: (100000, 120000)
|
||||
WARMUP_FACTOR: 0.025
|
@@ -0,0 +1,16 @@
|
||||
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
RESNETS:
|
||||
DEPTH: 50
|
||||
ROI_DENSEPOSE_HEAD:
|
||||
UV_CONFIDENCE:
|
||||
ENABLED: True
|
||||
TYPE: "indep_aniso"
|
||||
POINT_REGRESSION_WEIGHTS: 0.0005
|
||||
SOLVER:
|
||||
CLIP_GRADIENTS:
|
||||
ENABLED: True
|
||||
MAX_ITER: 130000
|
||||
STEPS: (100000, 120000)
|
||||
WARMUP_FACTOR: 0.025
|
@@ -0,0 +1,8 @@
|
||||
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
RESNETS:
|
||||
DEPTH: 50
|
||||
SOLVER:
|
||||
MAX_ITER: 130000
|
||||
STEPS: (100000, 120000)
|
@@ -0,0 +1,17 @@
|
||||
_BASE_: "Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
RESNETS:
|
||||
DEPTH: 50
|
||||
ROI_DENSEPOSE_HEAD:
|
||||
NUM_COARSE_SEGM_CHANNELS: 15
|
||||
POOLER_RESOLUTION: 14
|
||||
HEATMAP_SIZE: 56
|
||||
INDEX_WEIGHTS: 2.0
|
||||
PART_WEIGHTS: 0.3
|
||||
POINT_REGRESSION_WEIGHTS: 0.1
|
||||
DECODER_ON: False
|
||||
SOLVER:
|
||||
BASE_LR: 0.002
|
||||
MAX_ITER: 130000
|
||||
STEPS: (100000, 120000)
|
@@ -0,0 +1,91 @@
|
||||
MODEL:
|
||||
META_ARCHITECTURE: "GeneralizedRCNN"
|
||||
BACKBONE:
|
||||
NAME: "build_resnet_fpn_backbone"
|
||||
RESNETS:
|
||||
OUT_FEATURES: ["res2", "res3", "res4", "res5"]
|
||||
FPN:
|
||||
IN_FEATURES: ["res2", "res3", "res4", "res5"]
|
||||
ANCHOR_GENERATOR:
|
||||
SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
|
||||
ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
|
||||
RPN:
|
||||
IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
|
||||
PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
|
||||
PRE_NMS_TOPK_TEST: 1000 # Per FPN level
|
||||
# Detectron1 uses 2000 proposals per-batch,
|
||||
# (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
|
||||
# which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
|
||||
POST_NMS_TOPK_TRAIN: 1000
|
||||
POST_NMS_TOPK_TEST: 1000
|
||||
ROI_HEADS:
|
||||
NAME: "StandardROIHeads"
|
||||
IN_FEATURES: ["p2", "p3", "p4", "p5"]
|
||||
NUM_CLASSES: 1
|
||||
ROI_BOX_HEAD:
|
||||
NAME: "FastRCNNConvFCHead"
|
||||
NUM_FC: 2
|
||||
POOLER_RESOLUTION: 7
|
||||
ROI_MASK_HEAD:
|
||||
NAME: "MaskRCNNConvUpsampleHead"
|
||||
NUM_CONV: 4
|
||||
POOLER_RESOLUTION: 14
|
||||
DATASETS:
|
||||
TRAIN: ("base_coco_2017_train",)
|
||||
TEST: ("base_coco_2017_val", "densepose_chimps")
|
||||
CATEGORY_MAPS:
|
||||
"base_coco_2017_train":
|
||||
"16": 1 # bird -> person
|
||||
"17": 1 # cat -> person
|
||||
"18": 1 # dog -> person
|
||||
"19": 1 # horse -> person
|
||||
"20": 1 # sheep -> person
|
||||
"21": 1 # cow -> person
|
||||
"22": 1 # elephant -> person
|
||||
"23": 1 # bear -> person
|
||||
"24": 1 # zebra -> person
|
||||
"25": 1 # girafe -> person
|
||||
"base_coco_2017_val":
|
||||
"16": 1 # bird -> person
|
||||
"17": 1 # cat -> person
|
||||
"18": 1 # dog -> person
|
||||
"19": 1 # horse -> person
|
||||
"20": 1 # sheep -> person
|
||||
"21": 1 # cow -> person
|
||||
"22": 1 # elephant -> person
|
||||
"23": 1 # bear -> person
|
||||
"24": 1 # zebra -> person
|
||||
"25": 1 # girafe -> person
|
||||
WHITELISTED_CATEGORIES:
|
||||
"base_coco_2017_train":
|
||||
- 1 # person
|
||||
- 16 # bird
|
||||
- 17 # cat
|
||||
- 18 # dog
|
||||
- 19 # horse
|
||||
- 20 # sheep
|
||||
- 21 # cow
|
||||
- 22 # elephant
|
||||
- 23 # bear
|
||||
- 24 # zebra
|
||||
- 25 # girafe
|
||||
"base_coco_2017_val":
|
||||
- 1 # person
|
||||
- 16 # bird
|
||||
- 17 # cat
|
||||
- 18 # dog
|
||||
- 19 # horse
|
||||
- 20 # sheep
|
||||
- 21 # cow
|
||||
- 22 # elephant
|
||||
- 23 # bear
|
||||
- 24 # zebra
|
||||
- 25 # girafe
|
||||
SOLVER:
|
||||
IMS_PER_BATCH: 16
|
||||
BASE_LR: 0.02
|
||||
STEPS: (60000, 80000)
|
||||
MAX_ITER: 90000
|
||||
INPUT:
|
||||
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
|
||||
VERSION: 2
|
@@ -0,0 +1,7 @@
|
||||
_BASE_: "Base-RCNN-FPN-MC.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
MASK_ON: False
|
||||
DENSEPOSE_ON: False
|
||||
RESNETS:
|
||||
DEPTH: 50
|
@@ -0,0 +1,11 @@
|
||||
_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
ROI_DENSEPOSE_HEAD:
|
||||
NAME: "DensePoseDeepLabHead"
|
||||
DATASETS:
|
||||
TRAIN: ("densepose_coco_2014_minival_100",)
|
||||
TEST: ("densepose_coco_2014_minival_100",)
|
||||
SOLVER:
|
||||
MAX_ITER: 40
|
||||
STEPS: (30,)
|
@@ -0,0 +1,13 @@
|
||||
_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl"
|
||||
DATASETS:
|
||||
TRAIN: ()
|
||||
TEST: ("densepose_coco_2014_minival_100",)
|
||||
TEST:
|
||||
AUG:
|
||||
ENABLED: True
|
||||
MIN_SIZES: (400, 500, 600, 700, 800, 900, 1000, 1100, 1200)
|
||||
MAX_SIZE: 4000
|
||||
FLIP: True
|
||||
EXPECTED_RESULTS: [["bbox_TTA", "AP", 61.74, 0.03], ["densepose_gps_TTA", "AP", 60.22, 0.03], ["densepose_gpsm_TTA", "AP", 63.85, 0.03]]
|
@@ -0,0 +1,19 @@
|
||||
_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
RESNETS:
|
||||
DEPTH: 50
|
||||
ROI_DENSEPOSE_HEAD:
|
||||
UV_CONFIDENCE:
|
||||
ENABLED: True
|
||||
TYPE: "iid_iso"
|
||||
POINT_REGRESSION_WEIGHTS: 0.0005
|
||||
DATASETS:
|
||||
TRAIN: ("densepose_coco_2014_minival_100",)
|
||||
TEST: ("densepose_coco_2014_minival_100",)
|
||||
SOLVER:
|
||||
CLIP_GRADIENTS:
|
||||
ENABLED: True
|
||||
MAX_ITER: 40
|
||||
STEPS: (30,)
|
||||
WARMUP_FACTOR: 0.025
|
@@ -0,0 +1,19 @@
|
||||
_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
RESNETS:
|
||||
DEPTH: 50
|
||||
ROI_DENSEPOSE_HEAD:
|
||||
UV_CONFIDENCE:
|
||||
ENABLED: True
|
||||
TYPE: "indep_aniso"
|
||||
POINT_REGRESSION_WEIGHTS: 0.0005
|
||||
DATASETS:
|
||||
TRAIN: ("densepose_coco_2014_minival_100",)
|
||||
TEST: ("densepose_coco_2014_minival_100",)
|
||||
SOLVER:
|
||||
CLIP_GRADIENTS:
|
||||
ENABLED: True
|
||||
MAX_ITER: 40
|
||||
STEPS: (30,)
|
||||
WARMUP_FACTOR: 0.025
|
@@ -0,0 +1,8 @@
|
||||
_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl"
|
||||
DATASETS:
|
||||
TRAIN: ()
|
||||
TEST: ("densepose_coco_2014_minival_100",)
|
||||
TEST:
|
||||
EXPECTED_RESULTS: [["bbox", "AP", 59.27, 0.025], ["densepose_gps", "AP", 60.11, 0.02], ["densepose_gpsm", "AP", 64.20, 0.02]]
|
@@ -0,0 +1,9 @@
|
||||
_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
DATASETS:
|
||||
TRAIN: ("densepose_coco_2014_minival_100",)
|
||||
TEST: ("densepose_coco_2014_minival_100",)
|
||||
SOLVER:
|
||||
MAX_ITER: 40
|
||||
STEPS: (30,)
|
@@ -0,0 +1,14 @@
|
||||
_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
|
||||
MODEL:
|
||||
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
||||
ROI_HEADS:
|
||||
NUM_CLASSES: 1
|
||||
DATASETS:
|
||||
TRAIN: ("densepose_coco_2014_minival",)
|
||||
TEST: ("densepose_coco_2014_minival",)
|
||||
SOLVER:
|
||||
MAX_ITER: 6000
|
||||
STEPS: (5500, 5800)
|
||||
TEST:
|
||||
EXPECTED_RESULTS: [["bbox", "AP", 58.27, 1.0], ["densepose_gps", "AP", 42.47, 1.5], ["densepose_gpsm", "AP", 49.20, 1.5]]
|
||||
|
@@ -0,0 +1,9 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
from .data.datasets import builtin # just to register data
|
||||
from .config import add_densepose_config, add_dataset_category_config
|
||||
from .densepose_head import ROI_DENSEPOSE_HEAD_REGISTRY
|
||||
from .evaluator import DensePoseCOCOEvaluator
|
||||
from .roi_head import DensePoseROIHeads
|
||||
from .data.structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
|
||||
from .modeling.test_time_augmentation import DensePoseGeneralizedRCNNWithTTA
|
||||
from .utils.transform import load_from_cfg
|
@@ -0,0 +1,68 @@
|
||||
# -*- coding = utf-8 -*-
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
from detectron2.config import CfgNode as CN
|
||||
|
||||
|
||||
def add_dataset_category_config(cfg: CN):
|
||||
"""
|
||||
Add config for additional category-related dataset options
|
||||
- category whitelisting
|
||||
- category mapping
|
||||
"""
|
||||
_C = cfg
|
||||
_C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True)
|
||||
_C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True)
|
||||
|
||||
|
||||
def add_densepose_config(cfg: CN):
|
||||
"""
|
||||
Add config for densepose head.
|
||||
"""
|
||||
_C = cfg
|
||||
|
||||
_C.MODEL.DENSEPOSE_ON = True
|
||||
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD = CN()
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
|
||||
# Number of parts used for point labels
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2 # 15 or 2
|
||||
# Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
|
||||
# Loss weights for annotation masks.(14 Parts)
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0
|
||||
# Loss weights for surface parts. (24 Parts)
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0
|
||||
# Loss weights for UV regression.
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01
|
||||
# For Decoder
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = ""
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4
|
||||
# For DeepLab head
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN()
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN"
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0
|
||||
# Confidences
|
||||
# Enable learning confidences (variances) along with the actual values
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False})
|
||||
# UV confidence lower bound
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01
|
||||
# Statistical model type for confidence learning, possible values:
|
||||
# - "iid_iso": statistically independent identically distributed residuals
|
||||
# with isotropic covariance
|
||||
# - "indep_aniso": statistically independent residuals with anisotropic
|
||||
# covariances
|
||||
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso"
|
@@ -0,0 +1,9 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
from .build import build_detection_test_loader, build_detection_train_loader
|
||||
from .dataset_mapper import DatasetMapper
|
||||
|
||||
# ensure the builtin data are registered
|
||||
from . import datasets
|
||||
|
||||
__all__ = [k for k in globals().keys() if not k.startswith("_")]
|
@@ -0,0 +1,405 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
import itertools
|
||||
import logging
|
||||
import numpy as np
|
||||
import operator
|
||||
from typing import Any, Callable, Collection, Dict, Iterable, List, Optional
|
||||
import torch
|
||||
|
||||
from detectron2.config import CfgNode
|
||||
from detectron2.data import samplers
|
||||
from detectron2.data.build import (
|
||||
load_proposals_into_dataset,
|
||||
print_instances_class_histogram,
|
||||
trivial_batch_collator,
|
||||
worker_init_reset_seed,
|
||||
)
|
||||
from detectron2.data.catalog import DatasetCatalog, MetadataCatalog
|
||||
from detectron2.data.common import AspectRatioGroupedDataset, DatasetFromList, MapDataset
|
||||
from detectron2.utils.comm import get_world_size
|
||||
|
||||
from .dataset_mapper import DatasetMapper
|
||||
from .datasets.coco import DENSEPOSE_KEYS_WITHOUT_MASK as DENSEPOSE_COCO_KEYS_WITHOUT_MASK
|
||||
from .datasets.coco import DENSEPOSE_MASK_KEY as DENSEPOSE_COCO_MASK_KEY
|
||||
|
||||
__all__ = ["build_detection_train_loader", "build_detection_test_loader"]
|
||||
|
||||
|
||||
Instance = Dict[str, Any]
|
||||
InstancePredicate = Callable[[Instance], bool]
|
||||
|
||||
|
||||
def _compute_num_images_per_worker(cfg: CfgNode):
|
||||
num_workers = get_world_size()
|
||||
images_per_batch = cfg.SOLVER.IMS_PER_BATCH
|
||||
assert (
|
||||
images_per_batch % num_workers == 0
|
||||
), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
|
||||
images_per_batch, num_workers
|
||||
)
|
||||
assert (
|
||||
images_per_batch >= num_workers
|
||||
), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
|
||||
images_per_batch, num_workers
|
||||
)
|
||||
images_per_worker = images_per_batch // num_workers
|
||||
return images_per_worker
|
||||
|
||||
|
||||
def _map_category_id_to_contiguous_id(dataset_name: str, dataset_dicts: Iterable[Instance]):
|
||||
meta = MetadataCatalog.get(dataset_name)
|
||||
for dataset_dict in dataset_dicts:
|
||||
for ann in dataset_dict["annotations"]:
|
||||
ann["category_id"] = meta.thing_dataset_id_to_contiguous_id[ann["category_id"]]
|
||||
|
||||
|
||||
def _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names: Iterable[str]):
|
||||
# merge categories for all data
|
||||
merged_categories = {}
|
||||
for dataset_name in dataset_names:
|
||||
meta = MetadataCatalog.get(dataset_name)
|
||||
for cat_id, cat_name in meta.categories.items():
|
||||
if cat_id not in merged_categories:
|
||||
merged_categories[cat_id] = (cat_name, dataset_name)
|
||||
continue
|
||||
cat_name_other, dataset_name_other = merged_categories[cat_id]
|
||||
if cat_name_other != cat_name:
|
||||
raise ValueError(
|
||||
f"Incompatible categories for category ID {cat_id}: "
|
||||
f'dataset {dataset_name} value "{cat_name}", '
|
||||
f'dataset {dataset_name_other} value "{cat_name_other}"'
|
||||
)
|
||||
|
||||
merged_cat_id_to_cont_id = {}
|
||||
for i, cat_id in enumerate(sorted(merged_categories.keys())):
|
||||
merged_cat_id_to_cont_id[cat_id] = i
|
||||
|
||||
# add category maps to metadata
|
||||
for dataset_name in dataset_names:
|
||||
meta = MetadataCatalog.get(dataset_name)
|
||||
categories = meta.get("categories")
|
||||
meta.thing_classes = [categories[cat_id] for cat_id in sorted(categories.keys())]
|
||||
meta.thing_dataset_id_to_contiguous_id = {
|
||||
cat_id: merged_cat_id_to_cont_id[cat_id] for cat_id in sorted(categories.keys())
|
||||
}
|
||||
meta.thing_contiguous_id_to_dataset_id = {
|
||||
merged_cat_id_to_cont_id[cat_id]: cat_id for cat_id in sorted(categories.keys())
|
||||
}
|
||||
|
||||
|
||||
def _maybe_create_general_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
|
||||
def has_annotations(instance: Instance) -> bool:
|
||||
return "annotations" in instance
|
||||
|
||||
def has_only_crowd_anotations(instance: Instance) -> bool:
|
||||
for ann in instance["annotations"]:
|
||||
if ann.get("is_crowd", 0) == 0:
|
||||
return False
|
||||
return True
|
||||
|
||||
def general_keep_instance_predicate(instance: Instance) -> bool:
|
||||
return has_annotations(instance) and not has_only_crowd_anotations(instance)
|
||||
|
||||
if not cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS:
|
||||
return None
|
||||
return general_keep_instance_predicate
|
||||
|
||||
|
||||
def _maybe_create_keypoints_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
|
||||
|
||||
min_num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
|
||||
|
||||
def has_sufficient_num_keypoints(instance: Instance) -> bool:
|
||||
num_kpts = sum(
|
||||
(np.array(ann["keypoints"][2::3]) > 0).sum()
|
||||
for ann in instance["annotations"]
|
||||
if "keypoints" in ann
|
||||
)
|
||||
return num_kpts >= min_num_keypoints
|
||||
|
||||
if cfg.MODEL.KEYPOINT_ON and (min_num_keypoints > 0):
|
||||
return has_sufficient_num_keypoints
|
||||
return None
|
||||
|
||||
|
||||
def _maybe_create_mask_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
|
||||
if not cfg.MODEL.MASK_ON:
|
||||
return None
|
||||
|
||||
def has_mask_annotations(instance: Instance) -> bool:
|
||||
return any("segmentation" in ann for ann in instance["annotations"])
|
||||
|
||||
return has_mask_annotations
|
||||
|
||||
|
||||
def _maybe_create_densepose_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
|
||||
if not cfg.MODEL.DENSEPOSE_ON:
|
||||
return None
|
||||
|
||||
def has_densepose_annotations(instance: Instance) -> bool:
|
||||
for ann in instance["annotations"]:
|
||||
if all(key in ann for key in DENSEPOSE_COCO_KEYS_WITHOUT_MASK) and (
|
||||
(DENSEPOSE_COCO_MASK_KEY in ann) or ("segmentation" in ann)
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
return has_densepose_annotations
|
||||
|
||||
|
||||
def _maybe_create_specific_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
|
||||
specific_predicate_creators = [
|
||||
_maybe_create_keypoints_keep_instance_predicate,
|
||||
_maybe_create_mask_keep_instance_predicate,
|
||||
_maybe_create_densepose_keep_instance_predicate,
|
||||
]
|
||||
predicates = [creator(cfg) for creator in specific_predicate_creators]
|
||||
predicates = [p for p in predicates if p is not None]
|
||||
if not predicates:
|
||||
return None
|
||||
|
||||
def combined_predicate(instance: Instance) -> bool:
|
||||
return any(p(instance) for p in predicates)
|
||||
|
||||
return combined_predicate
|
||||
|
||||
|
||||
def _get_train_keep_instance_predicate(cfg: CfgNode):
|
||||
general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
|
||||
combined_specific_keep_predicate = _maybe_create_specific_keep_instance_predicate(cfg)
|
||||
|
||||
def combined_general_specific_keep_predicate(instance: Instance) -> bool:
|
||||
return general_keep_predicate(instance) and combined_specific_keep_predicate(instance)
|
||||
|
||||
if (general_keep_predicate is None) and (combined_specific_keep_predicate is None):
|
||||
return None
|
||||
if general_keep_predicate is None:
|
||||
return combined_specific_keep_predicate
|
||||
if combined_specific_keep_predicate is None:
|
||||
return general_keep_predicate
|
||||
return combined_general_specific_keep_predicate
|
||||
|
||||
|
||||
def _get_test_keep_instance_predicate(cfg: CfgNode):
|
||||
general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
|
||||
return general_keep_predicate
|
||||
|
||||
|
||||
def _maybe_filter_and_map_categories(
|
||||
dataset_name: str, dataset_dicts: List[Instance]
|
||||
) -> List[Instance]:
|
||||
meta = MetadataCatalog.get(dataset_name)
|
||||
whitelisted_categories = meta.get("whitelisted_categories")
|
||||
category_map = meta.get("category_map", {})
|
||||
if whitelisted_categories is None and not category_map:
|
||||
return dataset_dicts
|
||||
filtered_dataset_dicts = []
|
||||
for dataset_dict in dataset_dicts:
|
||||
anns = []
|
||||
for ann in dataset_dict["annotations"]:
|
||||
cat_id = ann["category_id"]
|
||||
if whitelisted_categories is not None and cat_id not in whitelisted_categories:
|
||||
continue
|
||||
ann["category_id"] = category_map.get(cat_id, cat_id)
|
||||
anns.append(ann)
|
||||
dataset_dict["annotations"] = anns
|
||||
filtered_dataset_dicts.append(dataset_dict)
|
||||
return filtered_dataset_dicts
|
||||
|
||||
|
||||
def _add_category_whitelists_to_metadata(cfg: CfgNode):
|
||||
for dataset_name, whitelisted_cat_ids in cfg.DATASETS.WHITELISTED_CATEGORIES.items():
|
||||
meta = MetadataCatalog.get(dataset_name)
|
||||
meta.whitelisted_categories = whitelisted_cat_ids
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info(
|
||||
"Whitelisted categories for dataset {}: {}".format(
|
||||
dataset_name, meta.whitelisted_categories
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _add_category_maps_to_metadata(cfg: CfgNode):
|
||||
for dataset_name, category_map in cfg.DATASETS.CATEGORY_MAPS.items():
|
||||
category_map = {
|
||||
int(cat_id_src): int(cat_id_dst) for cat_id_src, cat_id_dst in category_map.items()
|
||||
}
|
||||
meta = MetadataCatalog.get(dataset_name)
|
||||
meta.category_map = category_map
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("Category maps for dataset {}: {}".format(dataset_name, meta.category_map))
|
||||
|
||||
|
||||
def combine_detection_dataset_dicts(
|
||||
dataset_names: Collection[str],
|
||||
keep_instance_predicate: Optional[InstancePredicate] = None,
|
||||
proposal_files: Optional[Collection[str]] = None,
|
||||
) -> List[Instance]:
|
||||
"""
|
||||
Load and prepare dataset dicts for training / testing
|
||||
|
||||
Args:
|
||||
dataset_names (Collection[str]): a list of dataset names
|
||||
keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate
|
||||
applied to instance dicts which defines whether to keep the instance
|
||||
proposal_files (Collection[str]): if given, a list of object proposal files
|
||||
that match each dataset in `dataset_names`.
|
||||
"""
|
||||
assert len(dataset_names)
|
||||
if proposal_files is None:
|
||||
proposal_files = [None] * len(dataset_names)
|
||||
assert len(dataset_names) == len(proposal_files)
|
||||
# load annotations and dataset metadata
|
||||
dataset_map = {}
|
||||
for dataset_name in dataset_names:
|
||||
dataset_dicts = DatasetCatalog.get(dataset_name)
|
||||
dataset_map[dataset_name] = dataset_dicts
|
||||
# initialize category maps
|
||||
_add_category_id_to_contiguous_id_maps_to_metadata(dataset_names)
|
||||
# apply category maps
|
||||
all_datasets_dicts = []
|
||||
for dataset_name, proposal_file in zip(dataset_names, proposal_files):
|
||||
dataset_dicts = dataset_map[dataset_name]
|
||||
assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!"
|
||||
if proposal_file is not None:
|
||||
dataset_dicts = load_proposals_into_dataset(dataset_dicts, proposal_file)
|
||||
dataset_dicts = _maybe_filter_and_map_categories(dataset_name, dataset_dicts)
|
||||
_map_category_id_to_contiguous_id(dataset_name, dataset_dicts)
|
||||
print_instances_class_histogram(
|
||||
dataset_dicts, MetadataCatalog.get(dataset_name).thing_classes
|
||||
)
|
||||
all_datasets_dicts.append(dataset_dicts)
|
||||
|
||||
if keep_instance_predicate is not None:
|
||||
all_datasets_dicts_plain = [
|
||||
d
|
||||
for d in itertools.chain.from_iterable(all_datasets_dicts)
|
||||
if keep_instance_predicate(d)
|
||||
]
|
||||
else:
|
||||
all_datasets_dicts_plain = list(itertools.chain.from_iterable(all_datasets_dicts))
|
||||
return all_datasets_dicts_plain
|
||||
|
||||
|
||||
def build_detection_train_loader(cfg: CfgNode, mapper=None):
|
||||
"""
|
||||
A data loader is created in a way similar to that of Detectron2.
|
||||
The main differences are:
|
||||
- it allows to combine data with different but compatible object category sets
|
||||
|
||||
The data loader is created by the following steps:
|
||||
1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts.
|
||||
2. Start workers to work on the dicts. Each worker will:
|
||||
* Map each metadata dict into another format to be consumed by the model.
|
||||
* Batch them by simply putting dicts into a list.
|
||||
The batched ``list[mapped_dict]`` is what this dataloader will return.
|
||||
|
||||
Args:
|
||||
cfg (CfgNode): the config
|
||||
mapper (callable): a callable which takes a sample (dict) from dataset and
|
||||
returns the format to be consumed by the model.
|
||||
By default it will be `DatasetMapper(cfg, True)`.
|
||||
|
||||
Returns:
|
||||
an infinite iterator of training data
|
||||
"""
|
||||
images_per_worker = _compute_num_images_per_worker(cfg)
|
||||
|
||||
_add_category_whitelists_to_metadata(cfg)
|
||||
_add_category_maps_to_metadata(cfg)
|
||||
dataset_dicts = combine_detection_dataset_dicts(
|
||||
cfg.DATASETS.TRAIN,
|
||||
keep_instance_predicate=_get_train_keep_instance_predicate(cfg),
|
||||
proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
|
||||
)
|
||||
dataset = DatasetFromList(dataset_dicts, copy=False)
|
||||
|
||||
if mapper is None:
|
||||
mapper = DatasetMapper(cfg, True)
|
||||
dataset = MapDataset(dataset, mapper)
|
||||
|
||||
sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("Using training sampler {}".format(sampler_name))
|
||||
if sampler_name == "TrainingSampler":
|
||||
sampler = samplers.TrainingSampler(len(dataset))
|
||||
elif sampler_name == "RepeatFactorTrainingSampler":
|
||||
sampler = samplers.RepeatFactorTrainingSampler(
|
||||
dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD
|
||||
)
|
||||
else:
|
||||
raise ValueError("Unknown training sampler: {}".format(sampler_name))
|
||||
|
||||
if cfg.DATALOADER.ASPECT_RATIO_GROUPING:
|
||||
data_loader = torch.utils.data.DataLoader(
|
||||
dataset,
|
||||
sampler=sampler,
|
||||
num_workers=cfg.DATALOADER.NUM_WORKERS,
|
||||
batch_sampler=None,
|
||||
collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements
|
||||
worker_init_fn=worker_init_reset_seed,
|
||||
) # yield individual mapped dict
|
||||
data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker)
|
||||
else:
|
||||
batch_sampler = torch.utils.data.sampler.BatchSampler(
|
||||
sampler, images_per_worker, drop_last=True
|
||||
)
|
||||
# drop_last so the batch always have the same size
|
||||
data_loader = torch.utils.data.DataLoader(
|
||||
dataset,
|
||||
num_workers=cfg.DATALOADER.NUM_WORKERS,
|
||||
batch_sampler=batch_sampler,
|
||||
collate_fn=trivial_batch_collator,
|
||||
worker_init_fn=worker_init_reset_seed,
|
||||
)
|
||||
|
||||
return data_loader
|
||||
|
||||
|
||||
def build_detection_test_loader(cfg, dataset_name, mapper=None):
|
||||
"""
|
||||
Similar to `build_detection_train_loader`.
|
||||
But this function uses the given `dataset_name` argument (instead of the names in cfg),
|
||||
and uses batch size 1.
|
||||
|
||||
Args:
|
||||
cfg: a detectron2 CfgNode
|
||||
dataset_name (str): a name of the dataset that's available in the DatasetCatalog
|
||||
mapper (callable): a callable which takes a sample (dict) from dataset
|
||||
and returns the format to be consumed by the model.
|
||||
By default it will be `DatasetMapper(cfg, False)`.
|
||||
|
||||
Returns:
|
||||
DataLoader: a torch DataLoader, that loads the given detection
|
||||
dataset, with test-time transformation and batching.
|
||||
"""
|
||||
_add_category_whitelists_to_metadata(cfg)
|
||||
_add_category_maps_to_metadata(cfg)
|
||||
dataset_dicts = combine_detection_dataset_dicts(
|
||||
[dataset_name],
|
||||
keep_instance_predicate=_get_test_keep_instance_predicate(cfg),
|
||||
proposal_files=[
|
||||
cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]
|
||||
]
|
||||
if cfg.MODEL.LOAD_PROPOSALS
|
||||
else None,
|
||||
)
|
||||
|
||||
dataset = DatasetFromList(dataset_dicts)
|
||||
if mapper is None:
|
||||
mapper = DatasetMapper(cfg, False)
|
||||
dataset = MapDataset(dataset, mapper)
|
||||
|
||||
sampler = samplers.InferenceSampler(len(dataset))
|
||||
# Always use 1 image per worker during inference since this is the
|
||||
# standard when reporting inference time in papers.
|
||||
batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)
|
||||
|
||||
data_loader = torch.utils.data.DataLoader(
|
||||
dataset,
|
||||
num_workers=cfg.DATALOADER.NUM_WORKERS,
|
||||
batch_sampler=batch_sampler,
|
||||
collate_fn=trivial_batch_collator,
|
||||
)
|
||||
return data_loader
|
@@ -0,0 +1,118 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
import copy
|
||||
import torch
|
||||
from fvcore.common.file_io import PathManager
|
||||
|
||||
from detectron2.data import MetadataCatalog
|
||||
from detectron2.data import detection_utils as utils
|
||||
from detectron2.data import transforms as T
|
||||
|
||||
from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
|
||||
|
||||
|
||||
class DatasetMapper:
|
||||
"""
|
||||
A customized version of `detectron2.data.DatasetMapper`
|
||||
"""
|
||||
|
||||
def __init__(self, cfg, is_train=True):
|
||||
self.tfm_gens = utils.build_transform_gen(cfg, is_train)
|
||||
|
||||
# fmt: off
|
||||
self.img_format = cfg.INPUT.FORMAT
|
||||
self.mask_on = cfg.MODEL.MASK_ON
|
||||
self.keypoint_on = cfg.MODEL.KEYPOINT_ON
|
||||
self.densepose_on = cfg.MODEL.DENSEPOSE_ON
|
||||
assert not cfg.MODEL.LOAD_PROPOSALS, "not supported yet"
|
||||
# fmt: on
|
||||
if self.keypoint_on and is_train:
|
||||
# Flip only makes sense in training
|
||||
self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
|
||||
else:
|
||||
self.keypoint_hflip_indices = None
|
||||
|
||||
if self.densepose_on:
|
||||
densepose_transform_srcs = [
|
||||
MetadataCatalog.get(ds).densepose_transform_src
|
||||
for ds in cfg.DATASETS.TRAIN + cfg.DATASETS.TEST
|
||||
]
|
||||
assert len(densepose_transform_srcs) > 0
|
||||
# TODO: check that DensePose transformation data is the same for
|
||||
# all the data. Otherwise one would have to pass DB ID with
|
||||
# each entry to select proper transformation data. For now, since
|
||||
# all DensePose annotated data uses the same data semantics, we
|
||||
# omit this check.
|
||||
densepose_transform_data_fpath = PathManager.get_local_path(densepose_transform_srcs[0])
|
||||
self.densepose_transform_data = DensePoseTransformData.load(
|
||||
densepose_transform_data_fpath
|
||||
)
|
||||
|
||||
self.is_train = is_train
|
||||
|
||||
def __call__(self, dataset_dict):
|
||||
"""
|
||||
Args:
|
||||
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
|
||||
|
||||
Returns:
|
||||
dict: a format that builtin models in detectron2 accept
|
||||
"""
|
||||
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
|
||||
image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
|
||||
utils.check_image_size(dataset_dict, image)
|
||||
|
||||
image, transforms = T.apply_transform_gens(self.tfm_gens, image)
|
||||
image_shape = image.shape[:2] # h, w
|
||||
dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
|
||||
|
||||
if not self.is_train:
|
||||
dataset_dict.pop("annotations", None)
|
||||
return dataset_dict
|
||||
|
||||
for anno in dataset_dict["annotations"]:
|
||||
if not self.mask_on:
|
||||
anno.pop("segmentation", None)
|
||||
if not self.keypoint_on:
|
||||
anno.pop("keypoints", None)
|
||||
|
||||
# USER: Implement additional transformations if you have other types of data
|
||||
# USER: Don't call transpose_densepose if you don't need
|
||||
annos = [
|
||||
self._transform_densepose(
|
||||
utils.transform_instance_annotations(
|
||||
obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
|
||||
),
|
||||
transforms,
|
||||
)
|
||||
for obj in dataset_dict.pop("annotations")
|
||||
if obj.get("iscrowd", 0) == 0
|
||||
]
|
||||
instances = utils.annotations_to_instances(annos, image_shape)
|
||||
|
||||
if len(annos) and "densepose" in annos[0]:
|
||||
gt_densepose = [obj["densepose"] for obj in annos]
|
||||
instances.gt_densepose = DensePoseList(gt_densepose, instances.gt_boxes, image_shape)
|
||||
|
||||
dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()]
|
||||
return dataset_dict
|
||||
|
||||
def _transform_densepose(self, annotation, transforms):
|
||||
if not self.densepose_on:
|
||||
return annotation
|
||||
|
||||
# Handle densepose annotations
|
||||
is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
|
||||
if is_valid:
|
||||
densepose_data = DensePoseDataRelative(annotation, cleanup=True)
|
||||
densepose_data.apply_transform(transforms, self.densepose_transform_data)
|
||||
annotation["densepose"] = densepose_data
|
||||
else:
|
||||
# logger = logging.getLogger(__name__)
|
||||
# logger.debug("Could not load DensePose annotation: {}".format(reason_not_valid))
|
||||
DensePoseDataRelative.cleanup_annotation(annotation)
|
||||
# NOTE: annotations for certain instances may be unavailable.
|
||||
# 'None' is accepted by the DensePostList data structure.
|
||||
annotation["densepose"] = None
|
||||
return annotation
|
@@ -0,0 +1,5 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
from . import builtin # ensure the builtin data are registered
|
||||
|
||||
__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")]
|
@@ -0,0 +1,10 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
from .coco import BASE_DATASETS as BASE_COCO_DATASETS
|
||||
from .coco import DATASETS as COCO_DATASETS
|
||||
from .coco import register_datasets as register_coco_datasets
|
||||
|
||||
DEFAULT_DATASETS_ROOT = "data"
|
||||
|
||||
|
||||
register_coco_datasets(COCO_DATASETS, DEFAULT_DATASETS_ROOT)
|
||||
register_coco_datasets(BASE_COCO_DATASETS, DEFAULT_DATASETS_ROOT)
|
@@ -0,0 +1,314 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import contextlib
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Iterable, List, Optional
|
||||
from fvcore.common.file_io import PathManager
|
||||
from fvcore.common.timer import Timer
|
||||
|
||||
from detectron2.data import DatasetCatalog, MetadataCatalog
|
||||
from detectron2.structures import BoxMode
|
||||
|
||||
DENSEPOSE_MASK_KEY = "dp_masks"
|
||||
DENSEPOSE_KEYS_WITHOUT_MASK = ["dp_x", "dp_y", "dp_I", "dp_U", "dp_V"]
|
||||
DENSEPOSE_KEYS = DENSEPOSE_KEYS_WITHOUT_MASK + [DENSEPOSE_MASK_KEY]
|
||||
DENSEPOSE_METADATA_URL_PREFIX = "https://dl.fbaipublicfiles.com/densepose/data/"
|
||||
|
||||
|
||||
@dataclass
|
||||
class CocoDatasetInfo:
|
||||
name: str
|
||||
images_root: str
|
||||
annotations_fpath: str
|
||||
|
||||
|
||||
DATASETS = [
|
||||
CocoDatasetInfo(
|
||||
name="densepose_coco_2014_train",
|
||||
images_root="coco/train2014",
|
||||
annotations_fpath="coco/annotations/densepose_train2014.json",
|
||||
),
|
||||
CocoDatasetInfo(
|
||||
name="densepose_coco_2014_minival",
|
||||
images_root="coco/val2014",
|
||||
annotations_fpath="coco/annotations/densepose_minival2014.json",
|
||||
),
|
||||
CocoDatasetInfo(
|
||||
name="densepose_coco_2014_minival_100",
|
||||
images_root="coco/val2014",
|
||||
annotations_fpath="coco/annotations/densepose_minival2014_100.json",
|
||||
),
|
||||
CocoDatasetInfo(
|
||||
name="densepose_coco_2014_valminusminival",
|
||||
images_root="coco/val2014",
|
||||
annotations_fpath="coco/annotations/densepose_valminusminival2014.json",
|
||||
),
|
||||
CocoDatasetInfo(
|
||||
name="densepose_chimps",
|
||||
images_root="densepose_evolution/densepose_chimps",
|
||||
annotations_fpath="densepose_evolution/annotations/densepose_chimps_densepose.json",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
BASE_DATASETS = [
|
||||
CocoDatasetInfo(
|
||||
name="base_coco_2017_train",
|
||||
images_root="coco/train2017",
|
||||
annotations_fpath="coco/annotations/instances_train2017.json",
|
||||
),
|
||||
CocoDatasetInfo(
|
||||
name="base_coco_2017_val",
|
||||
images_root="coco/val2017",
|
||||
annotations_fpath="coco/annotations/instances_val2017.json",
|
||||
),
|
||||
CocoDatasetInfo(
|
||||
name="base_coco_2017_val_100",
|
||||
images_root="coco/val2017",
|
||||
annotations_fpath="coco/annotations/instances_val2017_100.json",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def _is_relative_local_path(path: os.PathLike):
|
||||
path_str = os.fsdecode(path)
|
||||
return ("://" not in path_str) and not os.path.isabs(path)
|
||||
|
||||
|
||||
def _maybe_prepend_base_path(base_path: Optional[os.PathLike], path: os.PathLike):
|
||||
"""
|
||||
Prepends the provided path with a base path prefix if:
|
||||
1) base path is not None;
|
||||
2) path is a local path
|
||||
"""
|
||||
if base_path is None:
|
||||
return path
|
||||
if _is_relative_local_path(path):
|
||||
return os.path.join(base_path, path)
|
||||
return path
|
||||
|
||||
|
||||
def get_metadata(base_path: Optional[os.PathLike]) -> Dict[str, Any]:
|
||||
"""
|
||||
Returns metadata associated with COCO DensePose data
|
||||
|
||||
Args:
|
||||
base_path: Optional[os.PathLike]
|
||||
Base path used to load metadata from
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]
|
||||
Metadata in the form of a dictionary
|
||||
"""
|
||||
meta = {
|
||||
"densepose_transform_src": _maybe_prepend_base_path(
|
||||
base_path, "UV_symmetry_transforms.mat"
|
||||
),
|
||||
"densepose_smpl_subdiv": _maybe_prepend_base_path(base_path, "SMPL_subdiv.mat"),
|
||||
"densepose_smpl_subdiv_transform": _maybe_prepend_base_path(
|
||||
base_path, "SMPL_SUBDIV_TRANSFORM.mat"
|
||||
),
|
||||
}
|
||||
return meta
|
||||
|
||||
|
||||
def _load_coco_annotations(json_file: str):
|
||||
"""
|
||||
Load COCO annotations from a JSON file
|
||||
|
||||
Args:
|
||||
json_file: str
|
||||
Path to the file to load annotations from
|
||||
Returns:
|
||||
Instance of `pycocotools.coco.COCO` that provides access to annotations
|
||||
data
|
||||
"""
|
||||
from pycocotools.coco import COCO
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
timer = Timer()
|
||||
with contextlib.redirect_stdout(io.StringIO()):
|
||||
coco_api = COCO(json_file)
|
||||
if timer.seconds() > 1:
|
||||
logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
|
||||
return coco_api
|
||||
|
||||
|
||||
def _add_categories_metadata(dataset_name: str, categories: Dict[str, Any]):
|
||||
meta = MetadataCatalog.get(dataset_name)
|
||||
meta.categories = {c["id"]: c["name"] for c in categories}
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("Dataset {} categories: {}".format(dataset_name, categories))
|
||||
|
||||
|
||||
def _verify_annotations_have_unique_ids(json_file: str, anns: List[List[Dict[str, Any]]]):
|
||||
if "minival" in json_file:
|
||||
# Skip validation on COCO2014 valminusminival and minival annotations
|
||||
# The ratio of buggy annotations there is tiny and does not affect accuracy
|
||||
# Therefore we explicitly white-list them
|
||||
return
|
||||
ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
|
||||
assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
|
||||
json_file
|
||||
)
|
||||
|
||||
|
||||
def _maybe_add_bbox(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
|
||||
if "bbox" not in ann_dict:
|
||||
return
|
||||
obj["bbox"] = ann_dict["bbox"]
|
||||
obj["bbox_mode"] = BoxMode.XYWH_ABS
|
||||
|
||||
|
||||
def _maybe_add_segm(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
|
||||
if "segmentation" not in ann_dict:
|
||||
return
|
||||
segm = ann_dict["segmentation"]
|
||||
if not isinstance(segm, dict):
|
||||
# filter out invalid polygons (< 3 points)
|
||||
segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
|
||||
if len(segm) == 0:
|
||||
return
|
||||
obj["segmentation"] = segm
|
||||
|
||||
|
||||
def _maybe_add_keypoints(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
|
||||
if "keypoints" not in ann_dict:
|
||||
return
|
||||
keypts = ann_dict["keypoints"] # list[int]
|
||||
for idx, v in enumerate(keypts):
|
||||
if idx % 3 != 2:
|
||||
# COCO's segmentation coordinates are floating points in [0, H or W],
|
||||
# but keypoint coordinates are integers in [0, H-1 or W-1]
|
||||
# Therefore we assume the coordinates are "pixel indices" and
|
||||
# add 0.5 to convert to floating point coordinates.
|
||||
keypts[idx] = v + 0.5
|
||||
obj["keypoints"] = keypts
|
||||
|
||||
|
||||
def _maybe_add_densepose(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
|
||||
for key in DENSEPOSE_KEYS:
|
||||
if key in ann_dict:
|
||||
obj[key] = ann_dict[key]
|
||||
|
||||
|
||||
def _combine_images_with_annotations(
|
||||
dataset_name: str,
|
||||
image_root: str,
|
||||
img_datas: Iterable[Dict[str, Any]],
|
||||
ann_datas: Iterable[Iterable[Dict[str, Any]]],
|
||||
):
|
||||
|
||||
ann_keys = ["iscrowd", "category_id"]
|
||||
dataset_dicts = []
|
||||
|
||||
for img_dict, ann_dicts in zip(img_datas, ann_datas):
|
||||
record = {}
|
||||
record["file_name"] = os.path.join(image_root, img_dict["file_name"])
|
||||
record["height"] = img_dict["height"]
|
||||
record["width"] = img_dict["width"]
|
||||
record["image_id"] = img_dict["id"]
|
||||
record["dataset"] = dataset_name
|
||||
objs = []
|
||||
for ann_dict in ann_dicts:
|
||||
assert ann_dict["image_id"] == record["image_id"]
|
||||
assert ann_dict.get("ignore", 0) == 0
|
||||
obj = {key: ann_dict[key] for key in ann_keys if key in ann_dict}
|
||||
_maybe_add_bbox(obj, ann_dict)
|
||||
_maybe_add_segm(obj, ann_dict)
|
||||
_maybe_add_keypoints(obj, ann_dict)
|
||||
_maybe_add_densepose(obj, ann_dict)
|
||||
objs.append(obj)
|
||||
record["annotations"] = objs
|
||||
dataset_dicts.append(record)
|
||||
return dataset_dicts
|
||||
|
||||
|
||||
def load_coco_json(annotations_json_file: str, image_root: str, dataset_name: str):
|
||||
"""
|
||||
Loads a JSON file with annotations in COCO instances format.
|
||||
Replaces `detectron2.data.data.coco.load_coco_json` to handle metadata
|
||||
in a more flexible way. Postpones category mapping to a later stage to be
|
||||
able to combine several data with different (but coherent) sets of
|
||||
categories.
|
||||
|
||||
Args:
|
||||
|
||||
annotations_json_file: str
|
||||
Path to the JSON file with annotations in COCO instances format.
|
||||
image_root: str
|
||||
directory that contains all the images
|
||||
dataset_name: str
|
||||
the name that identifies a dataset, e.g. "densepose_coco_2014_train"
|
||||
extra_annotation_keys: Optional[List[str]]
|
||||
If provided, these keys are used to extract additional data from
|
||||
the annotations.
|
||||
"""
|
||||
coco_api = _load_coco_annotations(PathManager.get_local_path(annotations_json_file))
|
||||
_add_categories_metadata(dataset_name, coco_api.loadCats(coco_api.getCatIds()))
|
||||
# sort indices for reproducible results
|
||||
img_ids = sorted(coco_api.imgs.keys())
|
||||
# imgs is a list of dicts, each looks something like:
|
||||
# {'license': 4,
|
||||
# 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
|
||||
# 'file_name': 'COCO_val2014_000000001268.jpg',
|
||||
# 'height': 427,
|
||||
# 'width': 640,
|
||||
# 'date_captured': '2013-11-17 05:57:24',
|
||||
# 'id': 1268}
|
||||
imgs = coco_api.loadImgs(img_ids)
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("Loaded {} images in COCO format from {}".format(len(imgs), annotations_json_file))
|
||||
# anns is a list[list[dict]], where each dict is an annotation
|
||||
# record for an object. The inner list enumerates the objects in an image
|
||||
# and the outer list enumerates over images.
|
||||
anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
|
||||
_verify_annotations_have_unique_ids(annotations_json_file, anns)
|
||||
dataset_records = _combine_images_with_annotations(dataset_name, image_root, imgs, anns)
|
||||
return dataset_records
|
||||
|
||||
|
||||
def register_dataset(dataset_data: CocoDatasetInfo, datasets_root: Optional[os.PathLike] = None):
|
||||
"""
|
||||
Registers provided COCO DensePose dataset
|
||||
|
||||
Args:
|
||||
dataset_data: CocoDatasetInfo
|
||||
Dataset data
|
||||
datasets_root: Optional[os.PathLike]
|
||||
Datasets root folder (default: None)
|
||||
"""
|
||||
annotations_fpath = _maybe_prepend_base_path(datasets_root, dataset_data.annotations_fpath)
|
||||
images_root = _maybe_prepend_base_path(datasets_root, dataset_data.images_root)
|
||||
|
||||
def load_annotations():
|
||||
return load_coco_json(
|
||||
annotations_json_file=annotations_fpath,
|
||||
image_root=images_root,
|
||||
dataset_name=dataset_data.name,
|
||||
)
|
||||
|
||||
DatasetCatalog.register(dataset_data.name, load_annotations)
|
||||
MetadataCatalog.get(dataset_data.name).set(
|
||||
json_file=annotations_fpath,
|
||||
image_root=images_root,
|
||||
**get_metadata(DENSEPOSE_METADATA_URL_PREFIX)
|
||||
)
|
||||
|
||||
|
||||
def register_datasets(
|
||||
datasets_data: Iterable[CocoDatasetInfo], datasets_root: Optional[os.PathLike] = None
|
||||
):
|
||||
"""
|
||||
Registers provided COCO DensePose data
|
||||
|
||||
Args:
|
||||
datasets_data: Iterable[CocoDatasetInfo]
|
||||
An iterable of dataset datas
|
||||
datasets_root: Optional[os.PathLike]
|
||||
Datasets root folder (default: None)
|
||||
"""
|
||||
for dataset_data in datasets_data:
|
||||
register_dataset(dataset_data, datasets_root)
|
@@ -0,0 +1,579 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import base64
|
||||
import numpy as np
|
||||
from io import BytesIO
|
||||
import torch
|
||||
from PIL import Image
|
||||
from torch.nn import functional as F
|
||||
|
||||
|
||||
class DensePoseTransformData(object):
|
||||
|
||||
# Horizontal symmetry label transforms used for horizontal flip
|
||||
MASK_LABEL_SYMMETRIES = [0, 1, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14]
|
||||
# fmt: off
|
||||
POINT_LABEL_SYMMETRIES = [ 0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24, 23] # noqa
|
||||
# fmt: on
|
||||
|
||||
def __init__(self, uv_symmetries):
|
||||
self.mask_label_symmetries = DensePoseTransformData.MASK_LABEL_SYMMETRIES
|
||||
self.point_label_symmetries = DensePoseTransformData.POINT_LABEL_SYMMETRIES
|
||||
self.uv_symmetries = uv_symmetries
|
||||
|
||||
@staticmethod
|
||||
def load(fpath):
|
||||
import scipy.io
|
||||
|
||||
uv_symmetry_map = scipy.io.loadmat(fpath)
|
||||
uv_symmetry_map_torch = {}
|
||||
for key in ["U_transforms", "V_transforms"]:
|
||||
uv_symmetry_map_torch[key] = []
|
||||
map_src = uv_symmetry_map[key]
|
||||
map_dst = uv_symmetry_map_torch[key]
|
||||
for i in range(map_src.shape[1]):
|
||||
map_dst.append(torch.from_numpy(map_src[0, i]).to(dtype=torch.float))
|
||||
uv_symmetry_map_torch[key] = torch.stack(map_dst, dim=0).to(
|
||||
device=torch.cuda.current_device()
|
||||
)
|
||||
transform_data = DensePoseTransformData(uv_symmetry_map_torch)
|
||||
return transform_data
|
||||
|
||||
|
||||
class DensePoseDataRelative(object):
|
||||
"""
|
||||
Dense pose relative annotations that can be applied to any bounding box:
|
||||
x - normalized X coordinates [0, 255] of annotated points
|
||||
y - normalized Y coordinates [0, 255] of annotated points
|
||||
i - body part labels 0,...,24 for annotated points
|
||||
u - body part U coordinates [0, 1] for annotated points
|
||||
v - body part V coordinates [0, 1] for annotated points
|
||||
segm - 256x256 segmentation mask with values 0,...,14
|
||||
To obtain absolute x and y data wrt some bounding box one needs to first
|
||||
divide the data by 256, multiply by the respective bounding box size
|
||||
and add bounding box offset:
|
||||
x_img = x0 + x_norm * w / 256.0
|
||||
y_img = y0 + y_norm * h / 256.0
|
||||
Segmentation masks are typically sampled to get image-based masks.
|
||||
"""
|
||||
|
||||
# Key for normalized X coordinates in annotation dict
|
||||
X_KEY = "dp_x"
|
||||
# Key for normalized Y coordinates in annotation dict
|
||||
Y_KEY = "dp_y"
|
||||
# Key for U part coordinates in annotation dict
|
||||
U_KEY = "dp_U"
|
||||
# Key for V part coordinates in annotation dict
|
||||
V_KEY = "dp_V"
|
||||
# Key for I point labels in annotation dict
|
||||
I_KEY = "dp_I"
|
||||
# Key for segmentation mask in annotation dict
|
||||
S_KEY = "dp_masks"
|
||||
# Number of body parts in segmentation masks
|
||||
N_BODY_PARTS = 14
|
||||
# Number of parts in point labels
|
||||
N_PART_LABELS = 24
|
||||
MASK_SIZE = 256
|
||||
|
||||
def __init__(self, annotation, cleanup=False):
|
||||
is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
|
||||
assert is_valid, "Invalid DensePose annotations: {}".format(reason_not_valid)
|
||||
self.x = torch.as_tensor(annotation[DensePoseDataRelative.X_KEY])
|
||||
self.y = torch.as_tensor(annotation[DensePoseDataRelative.Y_KEY])
|
||||
self.i = torch.as_tensor(annotation[DensePoseDataRelative.I_KEY])
|
||||
self.u = torch.as_tensor(annotation[DensePoseDataRelative.U_KEY])
|
||||
self.v = torch.as_tensor(annotation[DensePoseDataRelative.V_KEY])
|
||||
self.segm = DensePoseDataRelative.extract_segmentation_mask(annotation)
|
||||
self.device = torch.device("cpu")
|
||||
if cleanup:
|
||||
DensePoseDataRelative.cleanup_annotation(annotation)
|
||||
|
||||
def to(self, device):
|
||||
if self.device == device:
|
||||
return self
|
||||
new_data = DensePoseDataRelative.__new__(DensePoseDataRelative)
|
||||
new_data.x = self.x
|
||||
new_data.x = self.x.to(device)
|
||||
new_data.y = self.y.to(device)
|
||||
new_data.i = self.i.to(device)
|
||||
new_data.u = self.u.to(device)
|
||||
new_data.v = self.v.to(device)
|
||||
new_data.segm = self.segm.to(device)
|
||||
new_data.device = device
|
||||
return new_data
|
||||
|
||||
@staticmethod
|
||||
def extract_segmentation_mask(annotation):
|
||||
import pycocotools.mask as mask_utils
|
||||
|
||||
poly_specs = annotation[DensePoseDataRelative.S_KEY]
|
||||
segm = torch.zeros((DensePoseDataRelative.MASK_SIZE,) * 2, dtype=torch.float32)
|
||||
for i in range(DensePoseDataRelative.N_BODY_PARTS):
|
||||
poly_i = poly_specs[i]
|
||||
if poly_i:
|
||||
mask_i = mask_utils.decode(poly_i)
|
||||
segm[mask_i > 0] = i + 1
|
||||
return segm
|
||||
|
||||
@staticmethod
|
||||
def validate_annotation(annotation):
|
||||
for key in [
|
||||
DensePoseDataRelative.X_KEY,
|
||||
DensePoseDataRelative.Y_KEY,
|
||||
DensePoseDataRelative.I_KEY,
|
||||
DensePoseDataRelative.U_KEY,
|
||||
DensePoseDataRelative.V_KEY,
|
||||
DensePoseDataRelative.S_KEY,
|
||||
]:
|
||||
if key not in annotation:
|
||||
return False, "no {key} data in the annotation".format(key=key)
|
||||
return True, None
|
||||
|
||||
@staticmethod
|
||||
def cleanup_annotation(annotation):
|
||||
for key in [
|
||||
DensePoseDataRelative.X_KEY,
|
||||
DensePoseDataRelative.Y_KEY,
|
||||
DensePoseDataRelative.I_KEY,
|
||||
DensePoseDataRelative.U_KEY,
|
||||
DensePoseDataRelative.V_KEY,
|
||||
DensePoseDataRelative.S_KEY,
|
||||
]:
|
||||
if key in annotation:
|
||||
del annotation[key]
|
||||
|
||||
def apply_transform(self, transforms, densepose_transform_data):
|
||||
self._transform_pts(transforms, densepose_transform_data)
|
||||
self._transform_segm(transforms, densepose_transform_data)
|
||||
|
||||
def _transform_pts(self, transforms, dp_transform_data):
|
||||
import detectron2.data.transforms as T
|
||||
|
||||
# NOTE: This assumes that HorizFlipTransform is the only one that does flip
|
||||
do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
|
||||
if do_hflip:
|
||||
self.x = self.segm.size(1) - self.x
|
||||
self._flip_iuv_semantics(dp_transform_data)
|
||||
|
||||
def _flip_iuv_semantics(self, dp_transform_data: DensePoseTransformData) -> None:
|
||||
i_old = self.i.clone()
|
||||
uv_symmetries = dp_transform_data.uv_symmetries
|
||||
pt_label_symmetries = dp_transform_data.point_label_symmetries
|
||||
for i in range(self.N_PART_LABELS):
|
||||
if i + 1 in i_old:
|
||||
annot_indices_i = i_old == i + 1
|
||||
if pt_label_symmetries[i + 1] != i + 1:
|
||||
self.i[annot_indices_i] = pt_label_symmetries[i + 1]
|
||||
u_loc = (self.u[annot_indices_i] * 255).long()
|
||||
v_loc = (self.v[annot_indices_i] * 255).long()
|
||||
self.u[annot_indices_i] = uv_symmetries["U_transforms"][i][v_loc, u_loc].to(
|
||||
device=self.u.device
|
||||
)
|
||||
self.v[annot_indices_i] = uv_symmetries["V_transforms"][i][v_loc, u_loc].to(
|
||||
device=self.v.device
|
||||
)
|
||||
|
||||
def _transform_segm(self, transforms, dp_transform_data):
|
||||
import detectron2.data.transforms as T
|
||||
|
||||
# NOTE: This assumes that HorizFlipTransform is the only one that does flip
|
||||
do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
|
||||
if do_hflip:
|
||||
self.segm = torch.flip(self.segm, [1])
|
||||
self._flip_segm_semantics(dp_transform_data)
|
||||
|
||||
def _flip_segm_semantics(self, dp_transform_data):
|
||||
old_segm = self.segm.clone()
|
||||
mask_label_symmetries = dp_transform_data.mask_label_symmetries
|
||||
for i in range(self.N_BODY_PARTS):
|
||||
if mask_label_symmetries[i + 1] != i + 1:
|
||||
self.segm[old_segm == i + 1] = mask_label_symmetries[i + 1]
|
||||
|
||||
|
||||
def normalized_coords_transform(x0, y0, w, h):
|
||||
"""
|
||||
Coordinates transform that maps top left corner to (-1, -1) and bottom
|
||||
right corner to (1, 1). Used for torch.grid_sample to initialize the
|
||||
grid
|
||||
"""
|
||||
|
||||
def f(p):
|
||||
return (2 * (p[0] - x0) / w - 1, 2 * (p[1] - y0) / h - 1)
|
||||
|
||||
return f
|
||||
|
||||
|
||||
class DensePoseOutput(object):
|
||||
def __init__(self, S, I, U, V, confidences):
|
||||
"""
|
||||
Args:
|
||||
S (`torch.Tensor`): coarse segmentation tensor of size (N, A, H, W)
|
||||
I (`torch.Tensor`): fine segmentation tensor of size (N, C, H, W)
|
||||
U (`torch.Tensor`): U coordinates for each fine segmentation label of size (N, C, H, W)
|
||||
V (`torch.Tensor`): V coordinates for each fine segmentation label of size (N, C, H, W)
|
||||
confidences (dict of str -> `torch.Tensor`) estimated confidence model parameters
|
||||
"""
|
||||
self.S = S
|
||||
self.I = I # noqa: E741
|
||||
self.U = U
|
||||
self.V = V
|
||||
self.confidences = confidences
|
||||
self._check_output_dims(S, I, U, V)
|
||||
|
||||
def _check_output_dims(self, S, I, U, V):
|
||||
assert (
|
||||
len(S.size()) == 4
|
||||
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
|
||||
S.size()
|
||||
)
|
||||
assert (
|
||||
len(I.size()) == 4
|
||||
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
|
||||
S.size()
|
||||
)
|
||||
assert (
|
||||
len(U.size()) == 4
|
||||
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
|
||||
S.size()
|
||||
)
|
||||
assert (
|
||||
len(V.size()) == 4
|
||||
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
|
||||
S.size()
|
||||
)
|
||||
assert len(S) == len(I), (
|
||||
"Number of output segmentation planes {} "
|
||||
"should be equal to the number of output part index "
|
||||
"planes {}".format(len(S), len(I))
|
||||
)
|
||||
assert S.size()[2:] == I.size()[2:], (
|
||||
"Output segmentation plane size {} "
|
||||
"should be equal to the output part index "
|
||||
"plane size {}".format(S.size()[2:], I.size()[2:])
|
||||
)
|
||||
assert I.size() == U.size(), (
|
||||
"Part index output shape {} "
|
||||
"should be the same as U coordinates output shape {}".format(I.size(), U.size())
|
||||
)
|
||||
assert I.size() == V.size(), (
|
||||
"Part index output shape {} "
|
||||
"should be the same as V coordinates output shape {}".format(I.size(), V.size())
|
||||
)
|
||||
|
||||
def resize(self, image_size_hw):
|
||||
# do nothing - outputs are invariant to resize
|
||||
pass
|
||||
|
||||
def _crop(self, S, I, U, V, bbox_old_xywh, bbox_new_xywh):
|
||||
"""
|
||||
Resample S, I, U, V from bbox_old to the cropped bbox_new
|
||||
"""
|
||||
x0old, y0old, wold, hold = bbox_old_xywh
|
||||
x0new, y0new, wnew, hnew = bbox_new_xywh
|
||||
tr_coords = normalized_coords_transform(x0old, y0old, wold, hold)
|
||||
topleft = (x0new, y0new)
|
||||
bottomright = (x0new + wnew, y0new + hnew)
|
||||
topleft_norm = tr_coords(topleft)
|
||||
bottomright_norm = tr_coords(bottomright)
|
||||
hsize = S.size(1)
|
||||
wsize = S.size(2)
|
||||
grid = torch.meshgrid(
|
||||
torch.arange(
|
||||
topleft_norm[1],
|
||||
bottomright_norm[1],
|
||||
(bottomright_norm[1] - topleft_norm[1]) / hsize,
|
||||
)[:hsize],
|
||||
torch.arange(
|
||||
topleft_norm[0],
|
||||
bottomright_norm[0],
|
||||
(bottomright_norm[0] - topleft_norm[0]) / wsize,
|
||||
)[:wsize],
|
||||
)
|
||||
grid = torch.stack(grid, dim=2).to(S.device)
|
||||
assert (
|
||||
grid.size(0) == hsize
|
||||
), "Resampled grid expected " "height={}, actual height={}".format(hsize, grid.size(0))
|
||||
assert grid.size(1) == wsize, "Resampled grid expected " "width={}, actual width={}".format(
|
||||
wsize, grid.size(1)
|
||||
)
|
||||
S_new = F.grid_sample(
|
||||
S.unsqueeze(0),
|
||||
torch.unsqueeze(grid, 0),
|
||||
mode="bilinear",
|
||||
padding_mode="border",
|
||||
align_corners=True,
|
||||
).squeeze(0)
|
||||
I_new = F.grid_sample(
|
||||
I.unsqueeze(0),
|
||||
torch.unsqueeze(grid, 0),
|
||||
mode="bilinear",
|
||||
padding_mode="border",
|
||||
align_corners=True,
|
||||
).squeeze(0)
|
||||
U_new = F.grid_sample(
|
||||
U.unsqueeze(0),
|
||||
torch.unsqueeze(grid, 0),
|
||||
mode="bilinear",
|
||||
padding_mode="border",
|
||||
align_corners=True,
|
||||
).squeeze(0)
|
||||
V_new = F.grid_sample(
|
||||
V.unsqueeze(0),
|
||||
torch.unsqueeze(grid, 0),
|
||||
mode="bilinear",
|
||||
padding_mode="border",
|
||||
align_corners=True,
|
||||
).squeeze(0)
|
||||
return S_new, I_new, U_new, V_new
|
||||
|
||||
def crop(self, indices_cropped, bboxes_old, bboxes_new):
|
||||
"""
|
||||
Crop outputs for selected bounding boxes to the new bounding boxes.
|
||||
"""
|
||||
# VK: cropping is ignored for now
|
||||
# for i, ic in enumerate(indices_cropped):
|
||||
# self.S[ic], self.I[ic], self.U[ic], self.V[ic] = \
|
||||
# self._crop(self.S[ic], self.I[ic], self.U[ic], self.V[ic],
|
||||
# bboxes_old[i], bboxes_new[i])
|
||||
pass
|
||||
|
||||
def hflip(self, transform_data: DensePoseTransformData) -> None:
|
||||
"""
|
||||
Change S, I, U and V to take into account a Horizontal flip.
|
||||
"""
|
||||
if self.I.shape[0] > 0:
|
||||
for el in "SIUV":
|
||||
self.__dict__[el] = torch.flip(self.__dict__[el], [3])
|
||||
self._flip_iuv_semantics_tensor(transform_data)
|
||||
self._flip_segm_semantics_tensor(transform_data)
|
||||
|
||||
def _flip_iuv_semantics_tensor(self, dp_transform_data: DensePoseTransformData) -> None:
|
||||
point_label_symmetries = dp_transform_data.point_label_symmetries
|
||||
uv_symmetries = dp_transform_data.uv_symmetries
|
||||
|
||||
N, C, H, W = self.U.shape
|
||||
u_loc = (self.U[:, 1:, :, :].clamp(0, 1) * 255).long()
|
||||
v_loc = (self.V[:, 1:, :, :].clamp(0, 1) * 255).long()
|
||||
Iindex = torch.arange(C - 1, device=self.U.device)[None, :, None, None].expand(
|
||||
N, C - 1, H, W
|
||||
)
|
||||
self.U[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc].to(
|
||||
device=self.U.device
|
||||
)
|
||||
self.V[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc].to(
|
||||
device=self.V.device
|
||||
)
|
||||
|
||||
for el in "IUV":
|
||||
self.__dict__[el] = self.__dict__[el][:, point_label_symmetries, :, :]
|
||||
|
||||
def _flip_segm_semantics_tensor(self, dp_transform_data):
|
||||
if self.S.shape[1] == DensePoseDataRelative.N_BODY_PARTS + 1:
|
||||
self.S = self.S[:, dp_transform_data.mask_label_symmetries, :, :]
|
||||
|
||||
def to_result(self, boxes_xywh):
|
||||
"""
|
||||
Convert DensePose outputs to results format. Results are more compact,
|
||||
but cannot be resampled any more
|
||||
"""
|
||||
result = DensePoseResult(boxes_xywh, self.S, self.I, self.U, self.V)
|
||||
return result
|
||||
|
||||
def __getitem__(self, item):
|
||||
if isinstance(item, int):
|
||||
S_selected = self.S[item].unsqueeze(0)
|
||||
I_selected = self.I[item].unsqueeze(0)
|
||||
U_selected = self.U[item].unsqueeze(0)
|
||||
V_selected = self.V[item].unsqueeze(0)
|
||||
conf_selected = {}
|
||||
for key in self.confidences:
|
||||
conf_selected[key] = self.confidences[key][item].unsqueeze(0)
|
||||
else:
|
||||
S_selected = self.S[item]
|
||||
I_selected = self.I[item]
|
||||
U_selected = self.U[item]
|
||||
V_selected = self.V[item]
|
||||
conf_selected = {}
|
||||
for key in self.confidences:
|
||||
conf_selected[key] = self.confidences[key][item]
|
||||
return DensePoseOutput(S_selected, I_selected, U_selected, V_selected, conf_selected)
|
||||
|
||||
def __str__(self):
|
||||
s = "DensePoseOutput S {}, I {}, U {}, V {}".format(
|
||||
list(self.S.size()), list(self.I.size()), list(self.U.size()), list(self.V.size())
|
||||
)
|
||||
s_conf = "confidences: [{}]".format(
|
||||
", ".join([f"{key} {list(self.confidences[key].size())}" for key in self.confidences])
|
||||
)
|
||||
return ", ".join([s, s_conf])
|
||||
|
||||
def __len__(self):
|
||||
return self.S.size(0)
|
||||
|
||||
|
||||
class DensePoseResult(object):
|
||||
def __init__(self, boxes_xywh, S, I, U, V):
|
||||
self.results = []
|
||||
self.boxes_xywh = boxes_xywh.cpu().tolist()
|
||||
assert len(boxes_xywh.size()) == 2
|
||||
assert boxes_xywh.size(1) == 4
|
||||
for i, box_xywh in enumerate(boxes_xywh):
|
||||
result_i = self._output_to_result(box_xywh, S[[i]], I[[i]], U[[i]], V[[i]])
|
||||
result_numpy_i = result_i.cpu().numpy()
|
||||
result_encoded_i = DensePoseResult.encode_png_data(result_numpy_i)
|
||||
result_encoded_with_shape_i = (result_numpy_i.shape, result_encoded_i)
|
||||
self.results.append(result_encoded_with_shape_i)
|
||||
|
||||
def __str__(self):
|
||||
s = "DensePoseResult: N={} [{}]".format(
|
||||
len(self.results), ", ".join([str(list(r[0])) for r in self.results])
|
||||
)
|
||||
return s
|
||||
|
||||
def _output_to_result(self, box_xywh, S, I, U, V):
|
||||
x, y, w, h = box_xywh
|
||||
w = max(int(w), 1)
|
||||
h = max(int(h), 1)
|
||||
result = torch.zeros([3, h, w], dtype=torch.uint8, device=U.device)
|
||||
assert (
|
||||
len(S.size()) == 4
|
||||
), "AnnIndex tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
|
||||
s_bbox = F.interpolate(S, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
|
||||
assert (
|
||||
len(I.size()) == 4
|
||||
), "IndexUV tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
|
||||
i_bbox = (
|
||||
F.interpolate(I, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
|
||||
* (s_bbox > 0).long()
|
||||
).squeeze(0)
|
||||
assert len(U.size()) == 4, "U tensor size should have {} " "dimensions but has {}".format(
|
||||
4, len(U.size())
|
||||
)
|
||||
u_bbox = F.interpolate(U, (h, w), mode="bilinear", align_corners=False)
|
||||
assert len(V.size()) == 4, "V tensor size should have {} " "dimensions but has {}".format(
|
||||
4, len(V.size())
|
||||
)
|
||||
v_bbox = F.interpolate(V, (h, w), mode="bilinear", align_corners=False)
|
||||
result[0] = i_bbox
|
||||
for part_id in range(1, u_bbox.size(1)):
|
||||
result[1][i_bbox == part_id] = (
|
||||
(u_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
|
||||
)
|
||||
result[2][i_bbox == part_id] = (
|
||||
(v_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
|
||||
)
|
||||
assert (
|
||||
result.size(1) == h
|
||||
), "Results height {} should be equal" "to bounding box height {}".format(result.size(1), h)
|
||||
assert (
|
||||
result.size(2) == w
|
||||
), "Results width {} should be equal" "to bounding box width {}".format(result.size(2), w)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def encode_png_data(arr):
|
||||
"""
|
||||
Encode array data as a PNG image using the highest compression rate
|
||||
@param arr [in] Data stored in an array of size (3, M, N) of type uint8
|
||||
@return Base64-encoded string containing PNG-compressed data
|
||||
"""
|
||||
assert len(arr.shape) == 3, "Expected a 3D array as an input," " got a {0}D array".format(
|
||||
len(arr.shape)
|
||||
)
|
||||
assert arr.shape[0] == 3, "Expected first array dimension of size 3," " got {0}".format(
|
||||
arr.shape[0]
|
||||
)
|
||||
assert arr.dtype == np.uint8, "Expected an array of type np.uint8, " " got {0}".format(
|
||||
arr.dtype
|
||||
)
|
||||
data = np.moveaxis(arr, 0, -1)
|
||||
im = Image.fromarray(data)
|
||||
fstream = BytesIO()
|
||||
im.save(fstream, format="png", optimize=True)
|
||||
s = base64.encodebytes(fstream.getvalue()).decode()
|
||||
return s
|
||||
|
||||
@staticmethod
|
||||
def decode_png_data(shape, s):
|
||||
"""
|
||||
Decode array data from a string that contains PNG-compressed data
|
||||
@param Base64-encoded string containing PNG-compressed data
|
||||
@return Data stored in an array of size (3, M, N) of type uint8
|
||||
"""
|
||||
fstream = BytesIO(base64.decodebytes(s.encode()))
|
||||
im = Image.open(fstream)
|
||||
data = np.moveaxis(np.array(im.getdata(), dtype=np.uint8), -1, 0)
|
||||
return data.reshape(shape)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.results)
|
||||
|
||||
def __getitem__(self, item):
|
||||
result_encoded = self.results[item]
|
||||
bbox_xywh = self.boxes_xywh[item]
|
||||
return result_encoded, bbox_xywh
|
||||
|
||||
|
||||
class DensePoseList(object):
|
||||
|
||||
_TORCH_DEVICE_CPU = torch.device("cpu")
|
||||
|
||||
def __init__(self, densepose_datas, boxes_xyxy_abs, image_size_hw, device=_TORCH_DEVICE_CPU):
|
||||
assert len(densepose_datas) == len(
|
||||
boxes_xyxy_abs
|
||||
), "Attempt to initialize DensePoseList with {} DensePose datas " "and {} boxes".format(
|
||||
len(densepose_datas), len(boxes_xyxy_abs)
|
||||
)
|
||||
self.densepose_datas = []
|
||||
for densepose_data in densepose_datas:
|
||||
assert isinstance(densepose_data, DensePoseDataRelative) or densepose_data is None, (
|
||||
"Attempt to initialize DensePoseList with DensePose datas "
|
||||
"of type {}, expected DensePoseDataRelative".format(type(densepose_data))
|
||||
)
|
||||
densepose_data_ondevice = (
|
||||
densepose_data.to(device) if densepose_data is not None else None
|
||||
)
|
||||
self.densepose_datas.append(densepose_data_ondevice)
|
||||
self.boxes_xyxy_abs = boxes_xyxy_abs.to(device)
|
||||
self.image_size_hw = image_size_hw
|
||||
self.device = device
|
||||
|
||||
def to(self, device):
|
||||
if self.device == device:
|
||||
return self
|
||||
return DensePoseList(self.densepose_datas, self.boxes_xyxy_abs, self.image_size_hw, device)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.densepose_datas)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.densepose_datas)
|
||||
|
||||
def __repr__(self):
|
||||
s = self.__class__.__name__ + "("
|
||||
s += "num_instances={}, ".format(len(self.densepose_datas))
|
||||
s += "image_width={}, ".format(self.image_size_hw[1])
|
||||
s += "image_height={})".format(self.image_size_hw[0])
|
||||
return s
|
||||
|
||||
def __getitem__(self, item):
|
||||
if isinstance(item, int):
|
||||
densepose_data_rel = self.densepose_datas[item]
|
||||
return densepose_data_rel
|
||||
elif isinstance(item, slice):
|
||||
densepose_datas_rel = self.densepose_datas[item]
|
||||
boxes_xyxy_abs = self.boxes_xyxy_abs[item]
|
||||
return DensePoseList(
|
||||
densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
|
||||
)
|
||||
elif isinstance(item, torch.Tensor) and (item.dtype == torch.bool):
|
||||
densepose_datas_rel = [self.densepose_datas[i] for i, x in enumerate(item) if x > 0]
|
||||
boxes_xyxy_abs = self.boxes_xyxy_abs[item]
|
||||
return DensePoseList(
|
||||
densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
|
||||
)
|
||||
else:
|
||||
densepose_datas_rel = [self.densepose_datas[i] for i in item]
|
||||
boxes_xyxy_abs = self.boxes_xyxy_abs[item]
|
||||
return DensePoseList(
|
||||
densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
|
||||
)
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,158 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
import contextlib
|
||||
import copy
|
||||
import io
|
||||
import itertools
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
import torch
|
||||
from fvcore.common.file_io import PathManager
|
||||
from pycocotools.coco import COCO
|
||||
|
||||
from detectron2.data import MetadataCatalog
|
||||
from detectron2.evaluation import DatasetEvaluator
|
||||
from detectron2.structures import BoxMode
|
||||
from detectron2.utils.comm import all_gather, is_main_process, synchronize
|
||||
from detectron2.utils.logger import create_small_table
|
||||
|
||||
from .densepose_coco_evaluation import DensePoseCocoEval, DensePoseEvalMode
|
||||
|
||||
|
||||
class DensePoseCOCOEvaluator(DatasetEvaluator):
|
||||
def __init__(self, dataset_name, distributed, output_dir=None):
|
||||
self._distributed = distributed
|
||||
self._output_dir = output_dir
|
||||
|
||||
self._cpu_device = torch.device("cpu")
|
||||
self._logger = logging.getLogger(__name__)
|
||||
|
||||
self._metadata = MetadataCatalog.get(dataset_name)
|
||||
json_file = PathManager.get_local_path(self._metadata.json_file)
|
||||
with contextlib.redirect_stdout(io.StringIO()):
|
||||
self._coco_api = COCO(json_file)
|
||||
|
||||
def reset(self):
|
||||
self._predictions = []
|
||||
|
||||
def process(self, inputs, outputs):
|
||||
"""
|
||||
Args:
|
||||
inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
|
||||
It is a list of dict. Each dict corresponds to an image and
|
||||
contains keys like "height", "width", "file_name", "image_id".
|
||||
outputs: the outputs of a COCO model. It is a list of dicts with key
|
||||
"instances" that contains :class:`Instances`.
|
||||
The :class:`Instances` object needs to have `densepose` field.
|
||||
"""
|
||||
for input, output in zip(inputs, outputs):
|
||||
instances = output["instances"].to(self._cpu_device)
|
||||
|
||||
boxes = instances.pred_boxes.tensor.clone()
|
||||
boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
|
||||
instances.pred_densepose = instances.pred_densepose.to_result(boxes)
|
||||
|
||||
json_results = prediction_to_json(instances, input["image_id"])
|
||||
self._predictions.extend(json_results)
|
||||
|
||||
def evaluate(self):
|
||||
if self._distributed:
|
||||
synchronize()
|
||||
predictions = all_gather(self._predictions)
|
||||
predictions = list(itertools.chain(*predictions))
|
||||
if not is_main_process():
|
||||
return
|
||||
else:
|
||||
predictions = self._predictions
|
||||
|
||||
return copy.deepcopy(self._eval_predictions(predictions))
|
||||
|
||||
def _eval_predictions(self, predictions):
|
||||
"""
|
||||
Evaluate predictions on densepose.
|
||||
Return results with the metrics of the tasks.
|
||||
"""
|
||||
self._logger.info("Preparing results for COCO format ...")
|
||||
|
||||
if self._output_dir:
|
||||
file_path = os.path.join(self._output_dir, "coco_densepose_results.json")
|
||||
with open(file_path, "w") as f:
|
||||
json.dump(predictions, f)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
|
||||
self._logger.info("Evaluating predictions ...")
|
||||
res = OrderedDict()
|
||||
results_gps, results_gpsm = _evaluate_predictions_on_coco(self._coco_api, predictions)
|
||||
res["densepose_gps"] = results_gps
|
||||
res["densepose_gpsm"] = results_gpsm
|
||||
return res
|
||||
|
||||
|
||||
def prediction_to_json(instances, img_id):
|
||||
"""
|
||||
Args:
|
||||
instances (Instances): the output of the model
|
||||
img_id (str): the image id in COCO
|
||||
|
||||
Returns:
|
||||
list[dict]: the results in densepose evaluation format
|
||||
"""
|
||||
scores = instances.scores.tolist()
|
||||
|
||||
results = []
|
||||
for k in range(len(instances)):
|
||||
densepose = instances.pred_densepose[k]
|
||||
result = {
|
||||
"image_id": img_id,
|
||||
"category_id": 1, # densepose only has one class
|
||||
"bbox": densepose[1],
|
||||
"score": scores[k],
|
||||
"densepose": densepose,
|
||||
}
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
|
||||
def _evaluate_predictions_on_coco(coco_gt, coco_results):
|
||||
metrics = ["AP", "AP50", "AP75", "APm", "APl"]
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if len(coco_results) == 0: # cocoapi does not handle empty results very well
|
||||
logger.warn("No predictions from the model! Set scores to -1")
|
||||
results_gps = {metric: -1 for metric in metrics}
|
||||
results_gpsm = {metric: -1 for metric in metrics}
|
||||
return results_gps, results_gpsm
|
||||
|
||||
coco_dt = coco_gt.loadRes(coco_results)
|
||||
results_gps = _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics)
|
||||
logger.info(
|
||||
"Evaluation results for densepose, GPS metric: \n" + create_small_table(results_gps)
|
||||
)
|
||||
results_gpsm = _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics)
|
||||
logger.info(
|
||||
"Evaluation results for densepose, GPSm metric: \n" + create_small_table(results_gpsm)
|
||||
)
|
||||
return results_gps, results_gpsm
|
||||
|
||||
|
||||
def _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics):
|
||||
coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPS)
|
||||
coco_eval.evaluate()
|
||||
coco_eval.accumulate()
|
||||
coco_eval.summarize()
|
||||
results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
|
||||
return results
|
||||
|
||||
|
||||
def _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics):
|
||||
coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPSM)
|
||||
coco_eval.evaluate()
|
||||
coco_eval.accumulate()
|
||||
coco_eval.summarize()
|
||||
results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
|
||||
return results
|
@@ -0,0 +1,75 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
from detectron2.modeling.test_time_augmentation import GeneralizedRCNNWithTTA
|
||||
|
||||
|
||||
class DensePoseGeneralizedRCNNWithTTA(GeneralizedRCNNWithTTA):
|
||||
def __init__(self, cfg, model, transform_data, tta_mapper=None, batch_size=1):
|
||||
"""
|
||||
Args:
|
||||
cfg (CfgNode):
|
||||
model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on.
|
||||
transform_data (DensePoseTransformData): contains symmetry label
|
||||
transforms used for horizontal flip
|
||||
tta_mapper (callable): takes a dataset dict and returns a list of
|
||||
augmented versions of the dataset dict. Defaults to
|
||||
`DatasetMapperTTA(cfg)`.
|
||||
batch_size (int): batch the augmented images into this batch size for inference.
|
||||
"""
|
||||
self._transform_data = transform_data
|
||||
super().__init__(cfg=cfg, model=model, tta_mapper=tta_mapper, batch_size=batch_size)
|
||||
|
||||
# the implementation follows closely the one from detectron2/modeling
|
||||
def _inference_one_image(self, input):
|
||||
"""
|
||||
Args:
|
||||
input (dict): one dataset dict
|
||||
|
||||
Returns:
|
||||
dict: one output dict
|
||||
"""
|
||||
|
||||
augmented_inputs, aug_vars = self._get_augmented_inputs(input)
|
||||
# Detect boxes from all augmented versions
|
||||
with self._turn_off_roi_heads(["mask_on", "keypoint_on", "densepose_on"]):
|
||||
# temporarily disable roi heads
|
||||
all_boxes, all_scores, all_classes = self._get_augmented_boxes(
|
||||
augmented_inputs, aug_vars
|
||||
)
|
||||
merged_instances = self._merge_detections(
|
||||
all_boxes, all_scores, all_classes, (aug_vars["height"], aug_vars["width"])
|
||||
)
|
||||
|
||||
if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.DENSEPOSE_ON:
|
||||
# Use the detected boxes to obtain new fields
|
||||
augmented_instances = self._rescale_detected_boxes(
|
||||
augmented_inputs, merged_instances, aug_vars
|
||||
)
|
||||
# run forward on the detected boxes
|
||||
outputs = self._batch_inference(
|
||||
augmented_inputs, augmented_instances, do_postprocess=False
|
||||
)
|
||||
# Delete now useless variables to avoid being out of memory
|
||||
del augmented_inputs, augmented_instances, merged_instances
|
||||
# average the predictions
|
||||
if self.cfg.MODEL.MASK_ON:
|
||||
outputs[0].pred_masks = self._reduce_pred_masks(outputs, aug_vars)
|
||||
if self.cfg.MODEL.DENSEPOSE_ON:
|
||||
outputs[0].pred_densepose = self._reduce_pred_densepose(outputs, aug_vars)
|
||||
# postprocess
|
||||
output = self._detector_postprocess(outputs[0], aug_vars)
|
||||
return {"instances": output}
|
||||
else:
|
||||
return {"instances": merged_instances}
|
||||
|
||||
def _reduce_pred_densepose(self, outputs, aug_vars):
|
||||
for idx, output in enumerate(outputs):
|
||||
if aug_vars["do_hflip"][idx]:
|
||||
output.pred_densepose.hflip(self._transform_data)
|
||||
# Less memory-intensive averaging
|
||||
for attr in "SIUV":
|
||||
setattr(
|
||||
outputs[0].pred_densepose,
|
||||
attr,
|
||||
sum(getattr(o.pred_densepose, attr) for o in outputs) / len(outputs),
|
||||
)
|
||||
return outputs[0].pred_densepose
|
@@ -0,0 +1,213 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
import numpy as np
|
||||
from typing import Dict
|
||||
import fvcore.nn.weight_init as weight_init
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
from detectron2.layers import Conv2d, ShapeSpec, get_norm
|
||||
from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
|
||||
from detectron2.modeling.poolers import ROIPooler
|
||||
from detectron2.modeling.roi_heads import select_foreground_proposals
|
||||
|
||||
from .densepose_head import (
|
||||
build_densepose_data_filter,
|
||||
build_densepose_head,
|
||||
build_densepose_losses,
|
||||
build_densepose_predictor,
|
||||
densepose_inference,
|
||||
)
|
||||
|
||||
|
||||
class Decoder(nn.Module):
|
||||
"""
|
||||
A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper
|
||||
(https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from
|
||||
all levels of the FPN into single output.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features):
|
||||
super(Decoder, self).__init__()
|
||||
|
||||
# fmt: off
|
||||
self.in_features = in_features
|
||||
feature_strides = {k: v.stride for k, v in input_shape.items()}
|
||||
feature_channels = {k: v.channels for k, v in input_shape.items()}
|
||||
num_classes = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
|
||||
conv_dims = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
|
||||
self.common_stride = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
|
||||
norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
|
||||
# fmt: on
|
||||
|
||||
self.scale_heads = []
|
||||
for in_feature in self.in_features:
|
||||
head_ops = []
|
||||
head_length = max(
|
||||
1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
|
||||
)
|
||||
for k in range(head_length):
|
||||
conv = Conv2d(
|
||||
feature_channels[in_feature] if k == 0 else conv_dims,
|
||||
conv_dims,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
bias=not norm,
|
||||
norm=get_norm(norm, conv_dims),
|
||||
activation=F.relu,
|
||||
)
|
||||
weight_init.c2_msra_fill(conv)
|
||||
head_ops.append(conv)
|
||||
if feature_strides[in_feature] != self.common_stride:
|
||||
head_ops.append(
|
||||
nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
|
||||
)
|
||||
self.scale_heads.append(nn.Sequential(*head_ops))
|
||||
self.add_module(in_feature, self.scale_heads[-1])
|
||||
self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
|
||||
weight_init.c2_msra_fill(self.predictor)
|
||||
|
||||
def forward(self, features):
|
||||
for i, _ in enumerate(self.in_features):
|
||||
if i == 0:
|
||||
x = self.scale_heads[i](features[i])
|
||||
else:
|
||||
x = x + self.scale_heads[i](features[i])
|
||||
x = self.predictor(x)
|
||||
return x
|
||||
|
||||
|
||||
@ROI_HEADS_REGISTRY.register()
|
||||
class DensePoseROIHeads(StandardROIHeads):
|
||||
"""
|
||||
A Standard ROIHeads which contains an addition of DensePose head.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg, input_shape):
|
||||
super().__init__(cfg, input_shape)
|
||||
self._init_densepose_head(cfg, input_shape)
|
||||
|
||||
def _init_densepose_head(self, cfg, input_shape):
|
||||
# fmt: off
|
||||
self.densepose_on = cfg.MODEL.DENSEPOSE_ON
|
||||
if not self.densepose_on:
|
||||
return
|
||||
self.densepose_data_filter = build_densepose_data_filter(cfg)
|
||||
dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION
|
||||
dp_pooler_sampling_ratio = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO
|
||||
dp_pooler_type = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE
|
||||
self.use_decoder = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON
|
||||
# fmt: on
|
||||
if self.use_decoder:
|
||||
dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,)
|
||||
else:
|
||||
dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features)
|
||||
in_channels = [input_shape[f].channels for f in self.in_features][0]
|
||||
|
||||
if self.use_decoder:
|
||||
self.decoder = Decoder(cfg, input_shape, self.in_features)
|
||||
|
||||
self.densepose_pooler = ROIPooler(
|
||||
output_size=dp_pooler_resolution,
|
||||
scales=dp_pooler_scales,
|
||||
sampling_ratio=dp_pooler_sampling_ratio,
|
||||
pooler_type=dp_pooler_type,
|
||||
)
|
||||
self.densepose_head = build_densepose_head(cfg, in_channels)
|
||||
self.densepose_predictor = build_densepose_predictor(
|
||||
cfg, self.densepose_head.n_out_channels
|
||||
)
|
||||
self.densepose_losses = build_densepose_losses(cfg)
|
||||
|
||||
def _forward_densepose(self, features, instances):
|
||||
"""
|
||||
Forward logic of the densepose prediction branch.
|
||||
|
||||
Args:
|
||||
features (list[Tensor]): #level input features for densepose prediction
|
||||
instances (list[Instances]): the per-image instances to train/predict densepose.
|
||||
In training, they can be the proposals.
|
||||
In inference, they can be the predicted boxes.
|
||||
|
||||
Returns:
|
||||
In training, a dict of losses.
|
||||
In inference, update `instances` with new fields "densepose" and return it.
|
||||
"""
|
||||
if not self.densepose_on:
|
||||
return {} if self.training else instances
|
||||
|
||||
features = [features[f] for f in self.in_features]
|
||||
if self.training:
|
||||
proposals, _ = select_foreground_proposals(instances, self.num_classes)
|
||||
proposals_dp = self.densepose_data_filter(proposals)
|
||||
if len(proposals_dp) > 0:
|
||||
# NOTE may deadlock in DDP if certain workers have empty proposals_dp
|
||||
proposal_boxes = [x.proposal_boxes for x in proposals_dp]
|
||||
|
||||
if self.use_decoder:
|
||||
features = [self.decoder(features)]
|
||||
|
||||
features_dp = self.densepose_pooler(features, proposal_boxes)
|
||||
densepose_head_outputs = self.densepose_head(features_dp)
|
||||
densepose_outputs, _, confidences, _ = self.densepose_predictor(
|
||||
densepose_head_outputs
|
||||
)
|
||||
densepose_loss_dict = self.densepose_losses(
|
||||
proposals_dp, densepose_outputs, confidences
|
||||
)
|
||||
return densepose_loss_dict
|
||||
else:
|
||||
pred_boxes = [x.pred_boxes for x in instances]
|
||||
|
||||
if self.use_decoder:
|
||||
features = [self.decoder(features)]
|
||||
|
||||
features_dp = self.densepose_pooler(features, pred_boxes)
|
||||
if len(features_dp) > 0:
|
||||
densepose_head_outputs = self.densepose_head(features_dp)
|
||||
densepose_outputs, _, confidences, _ = self.densepose_predictor(
|
||||
densepose_head_outputs
|
||||
)
|
||||
else:
|
||||
# If no detection occurred instances
|
||||
# set densepose_outputs to empty tensors
|
||||
empty_tensor = torch.zeros(size=(0, 0, 0, 0), device=features_dp.device)
|
||||
densepose_outputs = tuple([empty_tensor] * 4)
|
||||
confidences = tuple([empty_tensor] * 4)
|
||||
|
||||
densepose_inference(densepose_outputs, confidences, instances)
|
||||
return instances
|
||||
|
||||
def forward(self, images, features, proposals, targets=None):
|
||||
instances, losses = super().forward(images, features, proposals, targets)
|
||||
del targets, images
|
||||
|
||||
if self.training:
|
||||
losses.update(self._forward_densepose(features, instances))
|
||||
return instances, losses
|
||||
|
||||
def forward_with_given_boxes(self, features, instances):
|
||||
"""
|
||||
Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
|
||||
|
||||
This is useful for downstream tasks where a box is known, but need to obtain
|
||||
other attributes (outputs of other heads).
|
||||
Test-time augmentation also uses this.
|
||||
|
||||
Args:
|
||||
features: same as in `forward()`
|
||||
instances (list[Instances]): instances to predict other outputs. Expect the keys
|
||||
"pred_boxes" and "pred_classes" to exist.
|
||||
|
||||
Returns:
|
||||
instances (list[Instances]):
|
||||
the same `Instances` objects, with extra
|
||||
fields such as `pred_masks` or `pred_keypoints`.
|
||||
"""
|
||||
|
||||
instances = super().forward_with_given_boxes(features, instances)
|
||||
instances = self._forward_densepose(features, instances)
|
||||
return instances
|
@@ -0,0 +1,145 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
|
||||
class EntrySelector(object):
|
||||
"""
|
||||
Base class for entry selectors
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def from_string(spec: str) -> "EntrySelector":
|
||||
if spec == "*":
|
||||
return AllEntrySelector()
|
||||
return FieldEntrySelector(spec)
|
||||
|
||||
|
||||
class AllEntrySelector(EntrySelector):
|
||||
"""
|
||||
Selector that accepts all entries
|
||||
"""
|
||||
|
||||
SPECIFIER = "*"
|
||||
|
||||
def __call__(self, entry):
|
||||
return True
|
||||
|
||||
|
||||
class FieldEntrySelector(EntrySelector):
|
||||
"""
|
||||
Selector that accepts only entries that match provided field
|
||||
specifier(s). Only a limited set of specifiers is supported for now:
|
||||
<specifiers>::=<specifier>[<comma><specifiers>]
|
||||
<specifier>::=<field_name>[<type_delim><type>]<equal><value_or_range>
|
||||
<field_name> is a valid identifier
|
||||
<type> ::= "int" | "str"
|
||||
<equal> ::= "="
|
||||
<comma> ::= ","
|
||||
<type_delim> ::= ":"
|
||||
<value_or_range> ::= <value> | <range>
|
||||
<range> ::= <value><range_delim><value>
|
||||
<range_delim> ::= "-"
|
||||
<value> is a string without spaces and special symbols
|
||||
(e.g. <comma>, <equal>, <type_delim>, <range_delim>)
|
||||
"""
|
||||
|
||||
_SPEC_DELIM = ","
|
||||
_TYPE_DELIM = ":"
|
||||
_RANGE_DELIM = "-"
|
||||
_EQUAL = "="
|
||||
_ERROR_PREFIX = "Invalid field selector specifier"
|
||||
|
||||
class _FieldEntryValuePredicate(object):
|
||||
"""
|
||||
Predicate that checks strict equality for the specified entry field
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, typespec: str, value: str):
|
||||
import builtins
|
||||
|
||||
self.name = name
|
||||
self.type = getattr(builtins, typespec) if typespec is not None else str
|
||||
self.value = value
|
||||
|
||||
def __call__(self, entry):
|
||||
return entry[self.name] == self.type(self.value)
|
||||
|
||||
class _FieldEntryRangePredicate(object):
|
||||
"""
|
||||
Predicate that checks whether an entry field falls into the specified range
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, typespec: str, vmin: str, vmax: str):
|
||||
import builtins
|
||||
|
||||
self.name = name
|
||||
self.type = getattr(builtins, typespec) if typespec is not None else str
|
||||
self.vmin = vmin
|
||||
self.vmax = vmax
|
||||
|
||||
def __call__(self, entry):
|
||||
return (entry[self.name] >= self.type(self.vmin)) and (
|
||||
entry[self.name] <= self.type(self.vmax)
|
||||
)
|
||||
|
||||
def __init__(self, spec: str):
|
||||
self._predicates = self._parse_specifier_into_predicates(spec)
|
||||
|
||||
def __call__(self, entry: Dict[str, Any]):
|
||||
for predicate in self._predicates:
|
||||
if not predicate(entry):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _parse_specifier_into_predicates(self, spec: str):
|
||||
predicates = []
|
||||
specs = spec.split(self._SPEC_DELIM)
|
||||
for subspec in specs:
|
||||
eq_idx = subspec.find(self._EQUAL)
|
||||
if eq_idx > 0:
|
||||
field_name_with_type = subspec[:eq_idx]
|
||||
field_name, field_type = self._parse_field_name_type(field_name_with_type)
|
||||
field_value_or_range = subspec[eq_idx + 1 :]
|
||||
if self._is_range_spec(field_value_or_range):
|
||||
vmin, vmax = self._get_range_spec(field_value_or_range)
|
||||
predicate = FieldEntrySelector._FieldEntryRangePredicate(
|
||||
field_name, field_type, vmin, vmax
|
||||
)
|
||||
else:
|
||||
predicate = FieldEntrySelector._FieldEntryValuePredicate(
|
||||
field_name, field_type, field_value_or_range
|
||||
)
|
||||
predicates.append(predicate)
|
||||
elif eq_idx == 0:
|
||||
self._parse_error(f'"{subspec}", field name is empty!')
|
||||
else:
|
||||
self._parse_error(f'"{subspec}", should have format ' "<field>=<value_or_range>!")
|
||||
return predicates
|
||||
|
||||
def _parse_field_name_type(self, field_name_with_type: str) -> Tuple[str, Optional[str]]:
|
||||
type_delim_idx = field_name_with_type.find(self._TYPE_DELIM)
|
||||
if type_delim_idx > 0:
|
||||
field_name = field_name_with_type[:type_delim_idx]
|
||||
field_type = field_name_with_type[type_delim_idx + 1 :]
|
||||
elif type_delim_idx == 0:
|
||||
self._parse_error(f'"{field_name_with_type}", field name is empty!')
|
||||
else:
|
||||
field_name = field_name_with_type
|
||||
field_type = None
|
||||
return field_name, field_type
|
||||
|
||||
def _is_range_spec(self, field_value_or_range):
|
||||
delim_idx = field_value_or_range.find(self._RANGE_DELIM)
|
||||
return delim_idx > 0
|
||||
|
||||
def _get_range_spec(self, field_value_or_range):
|
||||
if self._is_range_spec(field_value_or_range):
|
||||
delim_idx = field_value_or_range.find(self._RANGE_DELIM)
|
||||
vmin = field_value_or_range[:delim_idx]
|
||||
vmax = field_value_or_range[delim_idx + 1 :]
|
||||
return vmin, vmax
|
||||
else:
|
||||
self._parse_error('"field_value_or_range", range of values expected!')
|
||||
|
||||
def _parse_error(self, msg):
|
||||
raise ValueError(f"{self._ERROR_PREFIX}: {msg}")
|
@@ -0,0 +1,13 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import logging
|
||||
|
||||
|
||||
def verbosity_to_level(verbosity):
|
||||
if verbosity is not None:
|
||||
if verbosity == 0:
|
||||
return logging.WARNING
|
||||
elif verbosity == 1:
|
||||
return logging.INFO
|
||||
elif verbosity >= 2:
|
||||
return logging.DEBUG
|
||||
return logging.WARNING
|
@@ -0,0 +1,16 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
from fvcore.common.file_io import PathManager
|
||||
|
||||
from detectron2.data import MetadataCatalog
|
||||
|
||||
from densepose import DensePoseTransformData
|
||||
|
||||
|
||||
def load_for_dataset(dataset_name):
|
||||
path = MetadataCatalog.get(dataset_name).densepose_transform_src
|
||||
densepose_transform_data_fpath = PathManager.get_local_path(path)
|
||||
return DensePoseTransformData.load(densepose_transform_data_fpath)
|
||||
|
||||
|
||||
def load_from_cfg(cfg):
|
||||
return load_for_dataset(cfg.DATASETS.TEST[0])
|
@@ -0,0 +1,191 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import logging
|
||||
import numpy as np
|
||||
import cv2
|
||||
import torch
|
||||
|
||||
Image = np.ndarray
|
||||
Boxes = torch.Tensor
|
||||
|
||||
|
||||
class MatrixVisualizer(object):
|
||||
"""
|
||||
Base visualizer for matrix data
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
inplace=True,
|
||||
cmap=cv2.COLORMAP_PARULA,
|
||||
val_scale=1.0,
|
||||
alpha=0.7,
|
||||
interp_method_matrix=cv2.INTER_LINEAR,
|
||||
interp_method_mask=cv2.INTER_NEAREST,
|
||||
):
|
||||
self.inplace = inplace
|
||||
self.cmap = cmap
|
||||
self.val_scale = val_scale
|
||||
self.alpha = alpha
|
||||
self.interp_method_matrix = interp_method_matrix
|
||||
self.interp_method_mask = interp_method_mask
|
||||
|
||||
def visualize(self, image_bgr, mask, matrix, bbox_xywh):
|
||||
self._check_image(image_bgr)
|
||||
self._check_mask_matrix(mask, matrix)
|
||||
if self.inplace:
|
||||
image_target_bgr = image_bgr
|
||||
else:
|
||||
image_target_bgr = image_bgr * 0
|
||||
x, y, w, h = [int(v) for v in bbox_xywh]
|
||||
if w <= 0 or h <= 0:
|
||||
return image_bgr
|
||||
mask, matrix = self._resize(mask, matrix, w, h)
|
||||
mask_bg = np.tile((mask == 0)[:, :, np.newaxis], [1, 1, 3])
|
||||
matrix_scaled = matrix.astype(np.float32) * self.val_scale
|
||||
_EPSILON = 1e-6
|
||||
if np.any(matrix_scaled > 255 + _EPSILON):
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.warning(
|
||||
f"Matrix has values > {255 + _EPSILON} after " f"scaling, clipping to [0..255]"
|
||||
)
|
||||
matrix_scaled_8u = matrix_scaled.clip(0, 255).astype(np.uint8)
|
||||
matrix_vis = cv2.applyColorMap(matrix_scaled_8u, self.cmap)
|
||||
matrix_vis[mask_bg] = image_target_bgr[y : y + h, x : x + w, :][mask_bg]
|
||||
image_target_bgr[y : y + h, x : x + w, :] = (
|
||||
image_target_bgr[y : y + h, x : x + w, :] * (1.0 - self.alpha) + matrix_vis * self.alpha
|
||||
)
|
||||
return image_target_bgr.astype(np.uint8)
|
||||
|
||||
def _resize(self, mask, matrix, w, h):
|
||||
if (w != mask.shape[1]) or (h != mask.shape[0]):
|
||||
mask = cv2.resize(mask, (w, h), self.interp_method_mask)
|
||||
if (w != matrix.shape[1]) or (h != matrix.shape[0]):
|
||||
matrix = cv2.resize(matrix, (w, h), self.interp_method_matrix)
|
||||
return mask, matrix
|
||||
|
||||
def _check_image(self, image_rgb):
|
||||
assert len(image_rgb.shape) == 3
|
||||
assert image_rgb.shape[2] == 3
|
||||
assert image_rgb.dtype == np.uint8
|
||||
|
||||
def _check_mask_matrix(self, mask, matrix):
|
||||
assert len(matrix.shape) == 2
|
||||
assert len(mask.shape) == 2
|
||||
assert mask.dtype == np.uint8
|
||||
|
||||
|
||||
class RectangleVisualizer(object):
|
||||
|
||||
_COLOR_GREEN = (18, 127, 15)
|
||||
|
||||
def __init__(self, color=_COLOR_GREEN, thickness=1):
|
||||
self.color = color
|
||||
self.thickness = thickness
|
||||
|
||||
def visualize(self, image_bgr, bbox_xywh, color=None, thickness=None):
|
||||
x, y, w, h = bbox_xywh
|
||||
color = color or self.color
|
||||
thickness = thickness or self.thickness
|
||||
cv2.rectangle(image_bgr, (int(x), int(y)), (int(x + w), int(y + h)), color, thickness)
|
||||
return image_bgr
|
||||
|
||||
|
||||
class PointsVisualizer(object):
|
||||
|
||||
_COLOR_GREEN = (18, 127, 15)
|
||||
|
||||
def __init__(self, color_bgr=_COLOR_GREEN, r=5):
|
||||
self.color_bgr = color_bgr
|
||||
self.r = r
|
||||
|
||||
def visualize(self, image_bgr, pts_xy, colors_bgr=None, rs=None):
|
||||
for j, pt_xy in enumerate(pts_xy):
|
||||
x, y = pt_xy
|
||||
color_bgr = colors_bgr[j] if colors_bgr is not None else self.color_bgr
|
||||
r = rs[j] if rs is not None else self.r
|
||||
cv2.circle(image_bgr, (x, y), r, color_bgr, -1)
|
||||
return image_bgr
|
||||
|
||||
|
||||
class TextVisualizer(object):
|
||||
|
||||
_COLOR_GRAY = (218, 227, 218)
|
||||
_COLOR_WHITE = (255, 255, 255)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
font_face=cv2.FONT_HERSHEY_SIMPLEX,
|
||||
font_color_bgr=_COLOR_GRAY,
|
||||
font_scale=0.35,
|
||||
font_line_type=cv2.LINE_AA,
|
||||
font_line_thickness=1,
|
||||
fill_color_bgr=_COLOR_WHITE,
|
||||
fill_color_transparency=1.0,
|
||||
frame_color_bgr=_COLOR_WHITE,
|
||||
frame_color_transparency=1.0,
|
||||
frame_thickness=1,
|
||||
):
|
||||
self.font_face = font_face
|
||||
self.font_color_bgr = font_color_bgr
|
||||
self.font_scale = font_scale
|
||||
self.font_line_type = font_line_type
|
||||
self.font_line_thickness = font_line_thickness
|
||||
self.fill_color_bgr = fill_color_bgr
|
||||
self.fill_color_transparency = fill_color_transparency
|
||||
self.frame_color_bgr = frame_color_bgr
|
||||
self.frame_color_transparency = frame_color_transparency
|
||||
self.frame_thickness = frame_thickness
|
||||
|
||||
def visualize(self, image_bgr, txt, topleft_xy):
|
||||
txt_w, txt_h = self.get_text_size_wh(txt)
|
||||
topleft_xy = tuple(map(int, topleft_xy))
|
||||
x, y = topleft_xy
|
||||
if self.frame_color_transparency < 1.0:
|
||||
t = self.frame_thickness
|
||||
image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :] = (
|
||||
image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :]
|
||||
* self.frame_color_transparency
|
||||
+ np.array(self.frame_color_bgr) * (1.0 - self.frame_color_transparency)
|
||||
).astype(np.float)
|
||||
if self.fill_color_transparency < 1.0:
|
||||
image_bgr[y : y + txt_h, x : x + txt_w, :] = (
|
||||
image_bgr[y : y + txt_h, x : x + txt_w, :] * self.fill_color_transparency
|
||||
+ np.array(self.fill_color_bgr) * (1.0 - self.fill_color_transparency)
|
||||
).astype(np.float)
|
||||
cv2.putText(
|
||||
image_bgr,
|
||||
txt,
|
||||
topleft_xy,
|
||||
self.font_face,
|
||||
self.font_scale,
|
||||
self.font_color_bgr,
|
||||
self.font_line_thickness,
|
||||
self.font_line_type,
|
||||
)
|
||||
return image_bgr
|
||||
|
||||
def get_text_size_wh(self, txt):
|
||||
((txt_w, txt_h), _) = cv2.getTextSize(
|
||||
txt, self.font_face, self.font_scale, self.font_line_thickness
|
||||
)
|
||||
return txt_w, txt_h
|
||||
|
||||
|
||||
class CompoundVisualizer(object):
|
||||
def __init__(self, visualizers):
|
||||
self.visualizers = visualizers
|
||||
|
||||
def visualize(self, image_bgr, data):
|
||||
assert len(data) == len(
|
||||
self.visualizers
|
||||
), "The number of datas {} should match the number of visualizers" " {}".format(
|
||||
len(data), len(self.visualizers)
|
||||
)
|
||||
image = image_bgr
|
||||
for i, visualizer in enumerate(self.visualizers):
|
||||
image = visualizer.visualize(image, data[i])
|
||||
return image
|
||||
|
||||
def __str__(self):
|
||||
visualizer_str = ", ".join([str(v) for v in self.visualizers])
|
||||
return "Compound Visualizer [{}]".format(visualizer_str)
|
@@ -0,0 +1,37 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
from .base import RectangleVisualizer, TextVisualizer
|
||||
|
||||
|
||||
class BoundingBoxVisualizer(object):
|
||||
def __init__(self):
|
||||
self.rectangle_visualizer = RectangleVisualizer()
|
||||
|
||||
def visualize(self, image_bgr, boxes_xywh):
|
||||
for bbox_xywh in boxes_xywh:
|
||||
image_bgr = self.rectangle_visualizer.visualize(image_bgr, bbox_xywh)
|
||||
return image_bgr
|
||||
|
||||
|
||||
class ScoredBoundingBoxVisualizer(object):
|
||||
def __init__(self, bbox_visualizer_params=None, score_visualizer_params=None):
|
||||
if bbox_visualizer_params is None:
|
||||
bbox_visualizer_params = {}
|
||||
if score_visualizer_params is None:
|
||||
score_visualizer_params = {}
|
||||
self.visualizer_bbox = RectangleVisualizer(**bbox_visualizer_params)
|
||||
self.visualizer_score = TextVisualizer(**score_visualizer_params)
|
||||
|
||||
def visualize(self, image_bgr, scored_bboxes):
|
||||
boxes_xywh, box_scores = scored_bboxes
|
||||
assert len(boxes_xywh) == len(
|
||||
box_scores
|
||||
), "Number of bounding boxes {} should be equal to the number of scores {}".format(
|
||||
len(boxes_xywh), len(box_scores)
|
||||
)
|
||||
for i, box_xywh in enumerate(boxes_xywh):
|
||||
score_i = box_scores[i]
|
||||
image_bgr = self.visualizer_bbox.visualize(image_bgr, box_xywh)
|
||||
score_txt = "{0:6.4f}".format(score_i)
|
||||
topleft_xy = box_xywh[0], box_xywh[1]
|
||||
image_bgr = self.visualizer_score.visualize(image_bgr, score_txt, topleft_xy)
|
||||
return image_bgr
|
@@ -0,0 +1,593 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import logging
|
||||
import numpy as np
|
||||
from typing import Iterable, Optional, Tuple
|
||||
import cv2
|
||||
|
||||
from ..data.structures import DensePoseDataRelative, DensePoseOutput, DensePoseResult
|
||||
from .base import Boxes, Image, MatrixVisualizer, PointsVisualizer
|
||||
|
||||
|
||||
class DensePoseResultsVisualizer(object):
|
||||
def visualize(self, image_bgr: Image, densepose_result: Optional[DensePoseResult]) -> Image:
|
||||
if densepose_result is None:
|
||||
return image_bgr
|
||||
context = self.create_visualization_context(image_bgr)
|
||||
for i, result_encoded_w_shape in enumerate(densepose_result.results):
|
||||
iuv_arr = DensePoseResult.decode_png_data(*result_encoded_w_shape)
|
||||
bbox_xywh = densepose_result.boxes_xywh[i]
|
||||
self.visualize_iuv_arr(context, iuv_arr, bbox_xywh)
|
||||
image_bgr = self.context_to_image_bgr(context)
|
||||
return image_bgr
|
||||
|
||||
|
||||
class DensePoseMaskedColormapResultsVisualizer(DensePoseResultsVisualizer):
|
||||
def __init__(
|
||||
self,
|
||||
data_extractor,
|
||||
segm_extractor,
|
||||
inplace=True,
|
||||
cmap=cv2.COLORMAP_PARULA,
|
||||
alpha=0.7,
|
||||
val_scale=1.0,
|
||||
):
|
||||
self.mask_visualizer = MatrixVisualizer(
|
||||
inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha
|
||||
)
|
||||
self.data_extractor = data_extractor
|
||||
self.segm_extractor = segm_extractor
|
||||
|
||||
def create_visualization_context(self, image_bgr: Image):
|
||||
return image_bgr
|
||||
|
||||
def context_to_image_bgr(self, context):
|
||||
return context
|
||||
|
||||
def get_image_bgr_from_context(self, context):
|
||||
return context
|
||||
|
||||
def visualize_iuv_arr(self, context, iuv_arr, bbox_xywh):
|
||||
image_bgr = self.get_image_bgr_from_context(context)
|
||||
matrix = self.data_extractor(iuv_arr)
|
||||
segm = self.segm_extractor(iuv_arr)
|
||||
mask = np.zeros(matrix.shape, dtype=np.uint8)
|
||||
mask[segm > 0] = 1
|
||||
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
|
||||
return image_bgr
|
||||
|
||||
|
||||
def _extract_i_from_iuvarr(iuv_arr):
|
||||
return iuv_arr[0, :, :]
|
||||
|
||||
|
||||
def _extract_u_from_iuvarr(iuv_arr):
|
||||
return iuv_arr[1, :, :]
|
||||
|
||||
|
||||
def _extract_v_from_iuvarr(iuv_arr):
|
||||
return iuv_arr[2, :, :]
|
||||
|
||||
|
||||
class DensePoseResultsMplContourVisualizer(DensePoseResultsVisualizer):
|
||||
def __init__(self, levels=10, **kwargs):
|
||||
self.levels = levels
|
||||
self.plot_args = kwargs
|
||||
|
||||
def create_visualization_context(self, image_bgr: Image):
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
|
||||
|
||||
context = {}
|
||||
context["image_bgr"] = image_bgr
|
||||
dpi = 100
|
||||
height_inches = float(image_bgr.shape[0]) / dpi
|
||||
width_inches = float(image_bgr.shape[1]) / dpi
|
||||
fig = plt.figure(figsize=(width_inches, height_inches), dpi=dpi)
|
||||
plt.axes([0, 0, 1, 1])
|
||||
plt.axis("off")
|
||||
context["fig"] = fig
|
||||
canvas = FigureCanvas(fig)
|
||||
context["canvas"] = canvas
|
||||
extent = (0, image_bgr.shape[1], image_bgr.shape[0], 0)
|
||||
plt.imshow(image_bgr[:, :, ::-1], extent=extent)
|
||||
return context
|
||||
|
||||
def context_to_image_bgr(self, context):
|
||||
fig = context["fig"]
|
||||
w, h = map(int, fig.get_size_inches() * fig.get_dpi())
|
||||
canvas = context["canvas"]
|
||||
canvas.draw()
|
||||
image_1d = np.fromstring(canvas.tostring_rgb(), dtype="uint8")
|
||||
image_rgb = image_1d.reshape(h, w, 3)
|
||||
image_bgr = image_rgb[:, :, ::-1].copy()
|
||||
return image_bgr
|
||||
|
||||
def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> Image:
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
|
||||
v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
|
||||
extent = (
|
||||
bbox_xywh[0],
|
||||
bbox_xywh[0] + bbox_xywh[2],
|
||||
bbox_xywh[1],
|
||||
bbox_xywh[1] + bbox_xywh[3],
|
||||
)
|
||||
plt.contour(u, self.levels, extent=extent, **self.plot_args)
|
||||
plt.contour(v, self.levels, extent=extent, **self.plot_args)
|
||||
|
||||
|
||||
class DensePoseResultsCustomContourVisualizer(DensePoseResultsVisualizer):
|
||||
"""
|
||||
Contour visualization using marching squares
|
||||
"""
|
||||
|
||||
def __init__(self, levels=10, **kwargs):
|
||||
# TODO: colormap is hardcoded
|
||||
cmap = cv2.COLORMAP_PARULA
|
||||
if isinstance(levels, int):
|
||||
self.levels = np.linspace(0, 1, levels)
|
||||
else:
|
||||
self.levels = levels
|
||||
if "linewidths" in kwargs:
|
||||
self.linewidths = kwargs["linewidths"]
|
||||
else:
|
||||
self.linewidths = [1] * len(self.levels)
|
||||
self.plot_args = kwargs
|
||||
img_colors_bgr = cv2.applyColorMap((self.levels * 255).astype(np.uint8), cmap)
|
||||
self.level_colors_bgr = [
|
||||
[int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
|
||||
]
|
||||
|
||||
def create_visualization_context(self, image_bgr: Image):
|
||||
return image_bgr
|
||||
|
||||
def context_to_image_bgr(self, context):
|
||||
return context
|
||||
|
||||
def get_image_bgr_from_context(self, context):
|
||||
return context
|
||||
|
||||
def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> Image:
|
||||
image_bgr = self.get_image_bgr_from_context(context)
|
||||
segm = _extract_i_from_iuvarr(iuv_arr)
|
||||
u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
|
||||
v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
|
||||
self._contours(image_bgr, u, segm, bbox_xywh)
|
||||
self._contours(image_bgr, v, segm, bbox_xywh)
|
||||
|
||||
def _contours(self, image_bgr, arr, segm, bbox_xywh):
|
||||
for part_idx in range(1, DensePoseDataRelative.N_PART_LABELS + 1):
|
||||
mask = segm == part_idx
|
||||
if not np.any(mask):
|
||||
continue
|
||||
arr_min = np.amin(arr[mask])
|
||||
arr_max = np.amax(arr[mask])
|
||||
I, J = np.nonzero(mask)
|
||||
i0 = np.amin(I)
|
||||
i1 = np.amax(I) + 1
|
||||
j0 = np.amin(J)
|
||||
j1 = np.amax(J) + 1
|
||||
if (j1 == j0 + 1) or (i1 == i0 + 1):
|
||||
continue
|
||||
Nw = arr.shape[1] - 1
|
||||
Nh = arr.shape[0] - 1
|
||||
for level_idx, level in enumerate(self.levels):
|
||||
if (level < arr_min) or (level > arr_max):
|
||||
continue
|
||||
vp = arr[i0:i1, j0:j1] >= level
|
||||
bin_codes = vp[:-1, :-1] + vp[1:, :-1] * 2 + vp[1:, 1:] * 4 + vp[:-1, 1:] * 8
|
||||
mp = mask[i0:i1, j0:j1]
|
||||
bin_mask_codes = mp[:-1, :-1] + mp[1:, :-1] * 2 + mp[1:, 1:] * 4 + mp[:-1, 1:] * 8
|
||||
it = np.nditer(bin_codes, flags=["multi_index"])
|
||||
color_bgr = self.level_colors_bgr[level_idx]
|
||||
linewidth = self.linewidths[level_idx]
|
||||
while not it.finished:
|
||||
if (it[0] != 0) and (it[0] != 15):
|
||||
i, j = it.multi_index
|
||||
if bin_mask_codes[i, j] != 0:
|
||||
self._draw_line(
|
||||
image_bgr,
|
||||
arr,
|
||||
mask,
|
||||
level,
|
||||
color_bgr,
|
||||
linewidth,
|
||||
it[0],
|
||||
it.multi_index,
|
||||
bbox_xywh,
|
||||
Nw,
|
||||
Nh,
|
||||
(i0, j0),
|
||||
)
|
||||
it.iternext()
|
||||
|
||||
def _draw_line(
|
||||
self,
|
||||
image_bgr,
|
||||
arr,
|
||||
mask,
|
||||
v,
|
||||
color_bgr,
|
||||
linewidth,
|
||||
bin_code,
|
||||
multi_idx,
|
||||
bbox_xywh,
|
||||
Nw,
|
||||
Nh,
|
||||
offset,
|
||||
):
|
||||
lines = self._bin_code_2_lines(arr, v, bin_code, multi_idx, Nw, Nh, offset)
|
||||
x0, y0, w, h = bbox_xywh
|
||||
x1 = x0 + w
|
||||
y1 = y0 + h
|
||||
for line in lines:
|
||||
x0r, y0r = line[0]
|
||||
x1r, y1r = line[1]
|
||||
pt0 = (int(x0 + x0r * (x1 - x0)), int(y0 + y0r * (y1 - y0)))
|
||||
pt1 = (int(x0 + x1r * (x1 - x0)), int(y0 + y1r * (y1 - y0)))
|
||||
cv2.line(image_bgr, pt0, pt1, color_bgr, linewidth)
|
||||
|
||||
def _bin_code_2_lines(self, arr, v, bin_code, multi_idx, Nw, Nh, offset):
|
||||
i0, j0 = offset
|
||||
i, j = multi_idx
|
||||
i += i0
|
||||
j += j0
|
||||
v0, v1, v2, v3 = arr[i, j], arr[i + 1, j], arr[i + 1, j + 1], arr[i, j + 1]
|
||||
x0i = float(j) / Nw
|
||||
y0j = float(i) / Nh
|
||||
He = 1.0 / Nh
|
||||
We = 1.0 / Nw
|
||||
if (bin_code == 1) or (bin_code == 14):
|
||||
a = (v - v0) / (v1 - v0)
|
||||
b = (v - v0) / (v3 - v0)
|
||||
pt1 = (x0i, y0j + a * He)
|
||||
pt2 = (x0i + b * We, y0j)
|
||||
return [(pt1, pt2)]
|
||||
elif (bin_code == 2) or (bin_code == 13):
|
||||
a = (v - v0) / (v1 - v0)
|
||||
b = (v - v1) / (v2 - v1)
|
||||
pt1 = (x0i, y0j + a * He)
|
||||
pt2 = (x0i + b * We, y0j + He)
|
||||
return [(pt1, pt2)]
|
||||
elif (bin_code == 3) or (bin_code == 12):
|
||||
a = (v - v0) / (v3 - v0)
|
||||
b = (v - v1) / (v2 - v1)
|
||||
pt1 = (x0i + a * We, y0j)
|
||||
pt2 = (x0i + b * We, y0j + He)
|
||||
return [(pt1, pt2)]
|
||||
elif (bin_code == 4) or (bin_code == 11):
|
||||
a = (v - v1) / (v2 - v1)
|
||||
b = (v - v3) / (v2 - v3)
|
||||
pt1 = (x0i + a * We, y0j + He)
|
||||
pt2 = (x0i + We, y0j + b * He)
|
||||
return [(pt1, pt2)]
|
||||
elif (bin_code == 6) or (bin_code == 9):
|
||||
a = (v - v0) / (v1 - v0)
|
||||
b = (v - v3) / (v2 - v3)
|
||||
pt1 = (x0i, y0j + a * He)
|
||||
pt2 = (x0i + We, y0j + b * He)
|
||||
return [(pt1, pt2)]
|
||||
elif (bin_code == 7) or (bin_code == 8):
|
||||
a = (v - v0) / (v3 - v0)
|
||||
b = (v - v3) / (v2 - v3)
|
||||
pt1 = (x0i + a * We, y0j)
|
||||
pt2 = (x0i + We, y0j + b * He)
|
||||
return [(pt1, pt2)]
|
||||
elif bin_code == 5:
|
||||
a1 = (v - v0) / (v1 - v0)
|
||||
b1 = (v - v1) / (v2 - v1)
|
||||
pt11 = (x0i, y0j + a1 * He)
|
||||
pt12 = (x0i + b1 * We, y0j + He)
|
||||
a2 = (v - v0) / (v3 - v0)
|
||||
b2 = (v - v3) / (v2 - v3)
|
||||
pt21 = (x0i + a2 * We, y0j)
|
||||
pt22 = (x0i + We, y0j + b2 * He)
|
||||
return [(pt11, pt12), (pt21, pt22)]
|
||||
elif bin_code == 10:
|
||||
a1 = (v - v0) / (v3 - v0)
|
||||
b1 = (v - v0) / (v1 - v0)
|
||||
pt11 = (x0i + a1 * We, y0j)
|
||||
pt12 = (x0i, y0j + b1 * He)
|
||||
a2 = (v - v1) / (v2 - v1)
|
||||
b2 = (v - v3) / (v2 - v3)
|
||||
pt21 = (x0i + a2 * We, y0j + He)
|
||||
pt22 = (x0i + We, y0j + b2 * He)
|
||||
return [(pt11, pt12), (pt21, pt22)]
|
||||
return []
|
||||
|
||||
|
||||
try:
|
||||
import matplotlib
|
||||
|
||||
matplotlib.use("Agg")
|
||||
DensePoseResultsContourVisualizer = DensePoseResultsMplContourVisualizer
|
||||
except ModuleNotFoundError:
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.warning("Could not import matplotlib, using custom contour visualizer")
|
||||
DensePoseResultsContourVisualizer = DensePoseResultsCustomContourVisualizer
|
||||
|
||||
|
||||
class DensePoseResultsFineSegmentationVisualizer(DensePoseMaskedColormapResultsVisualizer):
|
||||
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
|
||||
super(DensePoseResultsFineSegmentationVisualizer, self).__init__(
|
||||
_extract_i_from_iuvarr,
|
||||
_extract_i_from_iuvarr,
|
||||
inplace,
|
||||
cmap,
|
||||
alpha,
|
||||
val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
|
||||
)
|
||||
|
||||
|
||||
class DensePoseResultsUVisualizer(DensePoseMaskedColormapResultsVisualizer):
|
||||
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
|
||||
super(DensePoseResultsUVisualizer, self).__init__(
|
||||
_extract_u_from_iuvarr, _extract_i_from_iuvarr, inplace, cmap, alpha, val_scale=1.0
|
||||
)
|
||||
|
||||
|
||||
class DensePoseResultsVVisualizer(DensePoseMaskedColormapResultsVisualizer):
|
||||
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
|
||||
super(DensePoseResultsVVisualizer, self).__init__(
|
||||
_extract_v_from_iuvarr, _extract_i_from_iuvarr, inplace, cmap, alpha, val_scale=1.0
|
||||
)
|
||||
|
||||
|
||||
class DensePoseOutputsFineSegmentationVisualizer(object):
|
||||
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
|
||||
self.mask_visualizer = MatrixVisualizer(
|
||||
inplace=inplace,
|
||||
cmap=cmap,
|
||||
val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
|
||||
alpha=alpha,
|
||||
)
|
||||
|
||||
def visualize(
|
||||
self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
|
||||
) -> Image:
|
||||
if dp_output_with_bboxes is None:
|
||||
return image_bgr
|
||||
densepose_output, bboxes_xywh = dp_output_with_bboxes
|
||||
S = densepose_output.S
|
||||
I = densepose_output.I # noqa
|
||||
U = densepose_output.U
|
||||
V = densepose_output.V
|
||||
N = S.size(0)
|
||||
assert N == I.size(
|
||||
0
|
||||
), "densepose outputs S {} and I {}" " should have equal first dim size".format(
|
||||
S.size(), I.size()
|
||||
)
|
||||
assert N == U.size(
|
||||
0
|
||||
), "densepose outputs S {} and U {}" " should have equal first dim size".format(
|
||||
S.size(), U.size()
|
||||
)
|
||||
assert N == V.size(
|
||||
0
|
||||
), "densepose outputs S {} and V {}" " should have equal first dim size".format(
|
||||
S.size(), V.size()
|
||||
)
|
||||
assert N == len(
|
||||
bboxes_xywh
|
||||
), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
|
||||
len(bboxes_xywh), N
|
||||
)
|
||||
for n in range(N):
|
||||
Sn = S[n].argmax(dim=0)
|
||||
In = I[n].argmax(dim=0) * (Sn > 0).long()
|
||||
matrix = In.cpu().numpy().astype(np.uint8)
|
||||
mask = np.zeros(matrix.shape, dtype=np.uint8)
|
||||
mask[matrix > 0] = 1
|
||||
bbox_xywh = bboxes_xywh[n]
|
||||
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
|
||||
return image_bgr
|
||||
|
||||
|
||||
class DensePoseOutputsUVisualizer(object):
|
||||
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
|
||||
self.mask_visualizer = MatrixVisualizer(
|
||||
inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha
|
||||
)
|
||||
|
||||
def visualize(
|
||||
self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
|
||||
) -> Image:
|
||||
if dp_output_with_bboxes is None:
|
||||
return image_bgr
|
||||
densepose_output, bboxes_xywh = dp_output_with_bboxes
|
||||
assert isinstance(
|
||||
densepose_output, DensePoseOutput
|
||||
), "DensePoseOutput expected, {} encountered".format(type(densepose_output))
|
||||
S = densepose_output.S
|
||||
I = densepose_output.I # noqa
|
||||
U = densepose_output.U
|
||||
V = densepose_output.V
|
||||
N = S.size(0)
|
||||
assert N == I.size(
|
||||
0
|
||||
), "densepose outputs S {} and I {}" " should have equal first dim size".format(
|
||||
S.size(), I.size()
|
||||
)
|
||||
assert N == U.size(
|
||||
0
|
||||
), "densepose outputs S {} and U {}" " should have equal first dim size".format(
|
||||
S.size(), U.size()
|
||||
)
|
||||
assert N == V.size(
|
||||
0
|
||||
), "densepose outputs S {} and V {}" " should have equal first dim size".format(
|
||||
S.size(), V.size()
|
||||
)
|
||||
assert N == len(
|
||||
bboxes_xywh
|
||||
), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
|
||||
len(bboxes_xywh), N
|
||||
)
|
||||
for n in range(N):
|
||||
Sn = S[n].argmax(dim=0)
|
||||
In = I[n].argmax(dim=0) * (Sn > 0).long()
|
||||
segmentation = In.cpu().numpy().astype(np.uint8)
|
||||
mask = np.zeros(segmentation.shape, dtype=np.uint8)
|
||||
mask[segmentation > 0] = 1
|
||||
Un = U[n].cpu().numpy().astype(np.float32)
|
||||
Uvis = np.zeros(segmentation.shape, dtype=np.float32)
|
||||
for partId in range(Un.shape[0]):
|
||||
Uvis[segmentation == partId] = Un[partId][segmentation == partId].clip(0, 1) * 255
|
||||
bbox_xywh = bboxes_xywh[n]
|
||||
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, Uvis, bbox_xywh)
|
||||
return image_bgr
|
||||
|
||||
|
||||
class DensePoseOutputsVVisualizer(object):
|
||||
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
|
||||
self.mask_visualizer = MatrixVisualizer(
|
||||
inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha
|
||||
)
|
||||
|
||||
def visualize(
|
||||
self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
|
||||
) -> Image:
|
||||
if dp_output_with_bboxes is None:
|
||||
return image_bgr
|
||||
densepose_output, bboxes_xywh = dp_output_with_bboxes
|
||||
assert isinstance(
|
||||
densepose_output, DensePoseOutput
|
||||
), "DensePoseOutput expected, {} encountered".format(type(densepose_output))
|
||||
S = densepose_output.S
|
||||
I = densepose_output.I # noqa
|
||||
U = densepose_output.U
|
||||
V = densepose_output.V
|
||||
N = S.size(0)
|
||||
assert N == I.size(
|
||||
0
|
||||
), "densepose outputs S {} and I {}" " should have equal first dim size".format(
|
||||
S.size(), I.size()
|
||||
)
|
||||
assert N == U.size(
|
||||
0
|
||||
), "densepose outputs S {} and U {}" " should have equal first dim size".format(
|
||||
S.size(), U.size()
|
||||
)
|
||||
assert N == V.size(
|
||||
0
|
||||
), "densepose outputs S {} and V {}" " should have equal first dim size".format(
|
||||
S.size(), V.size()
|
||||
)
|
||||
assert N == len(
|
||||
bboxes_xywh
|
||||
), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
|
||||
len(bboxes_xywh), N
|
||||
)
|
||||
for n in range(N):
|
||||
Sn = S[n].argmax(dim=0)
|
||||
In = I[n].argmax(dim=0) * (Sn > 0).long()
|
||||
segmentation = In.cpu().numpy().astype(np.uint8)
|
||||
mask = np.zeros(segmentation.shape, dtype=np.uint8)
|
||||
mask[segmentation > 0] = 1
|
||||
Vn = V[n].cpu().numpy().astype(np.float32)
|
||||
Vvis = np.zeros(segmentation.shape, dtype=np.float32)
|
||||
for partId in range(Vn.size(0)):
|
||||
Vvis[segmentation == partId] = Vn[partId][segmentation == partId].clip(0, 1) * 255
|
||||
bbox_xywh = bboxes_xywh[n]
|
||||
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, Vvis, bbox_xywh)
|
||||
return image_bgr
|
||||
|
||||
|
||||
class DensePoseDataCoarseSegmentationVisualizer(object):
|
||||
"""
|
||||
Visualizer for ground truth segmentation
|
||||
"""
|
||||
|
||||
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
|
||||
self.mask_visualizer = MatrixVisualizer(
|
||||
inplace=inplace,
|
||||
cmap=cmap,
|
||||
val_scale=255.0 / DensePoseDataRelative.N_BODY_PARTS,
|
||||
alpha=alpha,
|
||||
)
|
||||
|
||||
def visualize(
|
||||
self,
|
||||
image_bgr: Image,
|
||||
bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
|
||||
) -> Image:
|
||||
if bbox_densepose_datas is None:
|
||||
return image_bgr
|
||||
for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
|
||||
matrix = densepose_data.segm.numpy()
|
||||
mask = np.zeros(matrix.shape, dtype=np.uint8)
|
||||
mask[matrix > 0] = 1
|
||||
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh.numpy())
|
||||
return image_bgr
|
||||
|
||||
|
||||
class DensePoseDataPointsVisualizer(object):
|
||||
def __init__(self, densepose_data_to_value_fn=None, cmap=cv2.COLORMAP_PARULA):
|
||||
self.points_visualizer = PointsVisualizer()
|
||||
self.densepose_data_to_value_fn = densepose_data_to_value_fn
|
||||
self.cmap = cmap
|
||||
|
||||
def visualize(
|
||||
self,
|
||||
image_bgr: Image,
|
||||
bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
|
||||
) -> Image:
|
||||
if bbox_densepose_datas is None:
|
||||
return image_bgr
|
||||
for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
|
||||
x0, y0, w, h = bbox_xywh.numpy()
|
||||
x = densepose_data.x.numpy() * w / 255.0 + x0
|
||||
y = densepose_data.y.numpy() * h / 255.0 + y0
|
||||
pts_xy = zip(x, y)
|
||||
if self.densepose_data_to_value_fn is None:
|
||||
image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy)
|
||||
else:
|
||||
v = self.densepose_data_to_value_fn(densepose_data)
|
||||
img_colors_bgr = cv2.applyColorMap(v, self.cmap)
|
||||
colors_bgr = [
|
||||
[int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
|
||||
]
|
||||
image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy, colors_bgr)
|
||||
return image_bgr
|
||||
|
||||
|
||||
def _densepose_data_u_for_cmap(densepose_data):
|
||||
u = np.clip(densepose_data.u.numpy(), 0, 1) * 255.0
|
||||
return u.astype(np.uint8)
|
||||
|
||||
|
||||
def _densepose_data_v_for_cmap(densepose_data):
|
||||
v = np.clip(densepose_data.v.numpy(), 0, 1) * 255.0
|
||||
return v.astype(np.uint8)
|
||||
|
||||
|
||||
def _densepose_data_i_for_cmap(densepose_data):
|
||||
i = (
|
||||
np.clip(densepose_data.i.numpy(), 0.0, DensePoseDataRelative.N_PART_LABELS)
|
||||
* 255.0
|
||||
/ DensePoseDataRelative.N_PART_LABELS
|
||||
)
|
||||
return i.astype(np.uint8)
|
||||
|
||||
|
||||
class DensePoseDataPointsUVisualizer(DensePoseDataPointsVisualizer):
|
||||
def __init__(self):
|
||||
super(DensePoseDataPointsUVisualizer, self).__init__(
|
||||
densepose_data_to_value_fn=_densepose_data_u_for_cmap
|
||||
)
|
||||
|
||||
|
||||
class DensePoseDataPointsVVisualizer(DensePoseDataPointsVisualizer):
|
||||
def __init__(self):
|
||||
super(DensePoseDataPointsVVisualizer, self).__init__(
|
||||
densepose_data_to_value_fn=_densepose_data_v_for_cmap
|
||||
)
|
||||
|
||||
|
||||
class DensePoseDataPointsIVisualizer(DensePoseDataPointsVisualizer):
|
||||
def __init__(self):
|
||||
super(DensePoseDataPointsIVisualizer, self).__init__(
|
||||
densepose_data_to_value_fn=_densepose_data_i_for_cmap
|
||||
)
|
@@ -0,0 +1,152 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import logging
|
||||
from typing import Sequence
|
||||
import torch
|
||||
|
||||
from detectron2.layers.nms import batched_nms
|
||||
from detectron2.structures.instances import Instances
|
||||
|
||||
from densepose.vis.bounding_box import BoundingBoxVisualizer, ScoredBoundingBoxVisualizer
|
||||
from densepose.vis.densepose import DensePoseResultsVisualizer
|
||||
|
||||
from .base import CompoundVisualizer
|
||||
|
||||
Scores = Sequence[float]
|
||||
|
||||
|
||||
def extract_scores_from_instances(instances: Instances, select=None):
|
||||
if instances.has("scores"):
|
||||
return instances.scores if select is None else instances.scores[select]
|
||||
return None
|
||||
|
||||
|
||||
def extract_boxes_xywh_from_instances(instances: Instances, select=None):
|
||||
if instances.has("pred_boxes"):
|
||||
boxes_xywh = instances.pred_boxes.tensor.clone()
|
||||
boxes_xywh[:, 2] -= boxes_xywh[:, 0]
|
||||
boxes_xywh[:, 3] -= boxes_xywh[:, 1]
|
||||
return boxes_xywh if select is None else boxes_xywh[select]
|
||||
return None
|
||||
|
||||
|
||||
def create_extractor(visualizer: object):
|
||||
"""
|
||||
Create an extractor for the provided visualizer
|
||||
"""
|
||||
if isinstance(visualizer, CompoundVisualizer):
|
||||
extractors = [create_extractor(v) for v in visualizer.visualizers]
|
||||
return CompoundExtractor(extractors)
|
||||
elif isinstance(visualizer, DensePoseResultsVisualizer):
|
||||
return DensePoseResultExtractor()
|
||||
elif isinstance(visualizer, ScoredBoundingBoxVisualizer):
|
||||
return CompoundExtractor([extract_boxes_xywh_from_instances, extract_scores_from_instances])
|
||||
elif isinstance(visualizer, BoundingBoxVisualizer):
|
||||
return extract_boxes_xywh_from_instances
|
||||
else:
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.error(f"Could not create extractor for {visualizer}")
|
||||
return None
|
||||
|
||||
|
||||
class BoundingBoxExtractor(object):
|
||||
"""
|
||||
Extracts bounding boxes from instances
|
||||
"""
|
||||
|
||||
def __call__(self, instances: Instances):
|
||||
boxes_xywh = extract_boxes_xywh_from_instances(instances)
|
||||
return boxes_xywh
|
||||
|
||||
|
||||
class ScoredBoundingBoxExtractor(object):
|
||||
"""
|
||||
Extracts bounding boxes from instances
|
||||
"""
|
||||
|
||||
def __call__(self, instances: Instances, select=None):
|
||||
scores = extract_scores_from_instances(instances)
|
||||
boxes_xywh = extract_boxes_xywh_from_instances(instances)
|
||||
if (scores is None) or (boxes_xywh is None):
|
||||
return (boxes_xywh, scores)
|
||||
if select is not None:
|
||||
scores = scores[select]
|
||||
boxes_xywh = boxes_xywh[select]
|
||||
return (boxes_xywh, scores)
|
||||
|
||||
|
||||
class DensePoseResultExtractor(object):
|
||||
"""
|
||||
Extracts DensePose result from instances
|
||||
"""
|
||||
|
||||
def __call__(self, instances: Instances, select=None):
|
||||
boxes_xywh = extract_boxes_xywh_from_instances(instances)
|
||||
if instances.has("pred_densepose") and (boxes_xywh is not None):
|
||||
dpout = instances.pred_densepose
|
||||
if select is not None:
|
||||
dpout = dpout[select]
|
||||
boxes_xywh = boxes_xywh[select]
|
||||
return dpout.to_result(boxes_xywh)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class CompoundExtractor(object):
|
||||
"""
|
||||
Extracts data for CompoundVisualizer
|
||||
"""
|
||||
|
||||
def __init__(self, extractors):
|
||||
self.extractors = extractors
|
||||
|
||||
def __call__(self, instances: Instances, select=None):
|
||||
datas = []
|
||||
for extractor in self.extractors:
|
||||
data = extractor(instances, select)
|
||||
datas.append(data)
|
||||
return datas
|
||||
|
||||
|
||||
class NmsFilteredExtractor(object):
|
||||
"""
|
||||
Extracts data in the format accepted by NmsFilteredVisualizer
|
||||
"""
|
||||
|
||||
def __init__(self, extractor, iou_threshold):
|
||||
self.extractor = extractor
|
||||
self.iou_threshold = iou_threshold
|
||||
|
||||
def __call__(self, instances: Instances, select=None):
|
||||
scores = extract_scores_from_instances(instances)
|
||||
boxes_xywh = extract_boxes_xywh_from_instances(instances)
|
||||
if boxes_xywh is None:
|
||||
return None
|
||||
select_local_idx = batched_nms(
|
||||
boxes_xywh,
|
||||
scores,
|
||||
torch.zeros(len(scores), dtype=torch.int32),
|
||||
iou_threshold=self.iou_threshold,
|
||||
).squeeze()
|
||||
select_local = torch.zeros(len(boxes_xywh), dtype=torch.bool, device=boxes_xywh.device)
|
||||
select_local[select_local_idx] = True
|
||||
select = select_local if select is None else (select & select_local)
|
||||
return self.extractor(instances, select=select)
|
||||
|
||||
|
||||
class ScoreThresholdedExtractor(object):
|
||||
"""
|
||||
Extracts data in the format accepted by ScoreThresholdedVisualizer
|
||||
"""
|
||||
|
||||
def __init__(self, extractor, min_score):
|
||||
self.extractor = extractor
|
||||
self.min_score = min_score
|
||||
|
||||
def __call__(self, instances: Instances, select=None):
|
||||
scores = extract_scores_from_instances(instances)
|
||||
if scores is None:
|
||||
return None
|
||||
select_local = scores > self.min_score
|
||||
select = select_local if select is None else (select & select_local)
|
||||
data = self.extractor(instances, select=select)
|
||||
return data
|
@@ -0,0 +1,7 @@
|
||||
|
||||
## Some scripts for developers to use, include:
|
||||
|
||||
- `run_instant_tests.sh`: run training for a few iterations.
|
||||
- `run_inference_tests.sh`: run inference on a small dataset.
|
||||
- `../../dev/linter.sh`: lint the codebase before commit
|
||||
- `../../dev/parse_results.sh`: parse results from log file.
|
@@ -0,0 +1,33 @@
|
||||
#!/bin/bash -e
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
BIN="python train_net.py"
|
||||
OUTPUT="inference_test_output"
|
||||
NUM_GPUS=2
|
||||
IMS_PER_GPU=2
|
||||
IMS_PER_BATCH=$(( NUM_GPUS * IMS_PER_GPU ))
|
||||
|
||||
CFG_LIST=( "${@:1}" )
|
||||
|
||||
if [ ${#CFG_LIST[@]} -eq 0 ]; then
|
||||
CFG_LIST=( ./configs/quick_schedules/*inference_acc_test.yaml )
|
||||
fi
|
||||
|
||||
echo "========================================================================"
|
||||
echo "Configs to run:"
|
||||
echo "${CFG_LIST[@]}"
|
||||
echo "========================================================================"
|
||||
|
||||
for cfg in "${CFG_LIST[@]}"; do
|
||||
echo "========================================================================"
|
||||
echo "Running $cfg ..."
|
||||
echo "========================================================================"
|
||||
$BIN \
|
||||
--eval-only \
|
||||
--num-gpus $NUM_GPUS \
|
||||
--config-file "$cfg" \
|
||||
OUTPUT_DIR "$OUTPUT" \
|
||||
SOLVER.IMS_PER_BATCH $IMS_PER_BATCH
|
||||
rm -rf $OUTPUT
|
||||
done
|
||||
|
@@ -0,0 +1,28 @@
|
||||
#!/bin/bash -e
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
BIN="python train_net.py"
|
||||
OUTPUT="instant_test_output"
|
||||
NUM_GPUS=2
|
||||
SOLVER_IMS_PER_BATCH=$((NUM_GPUS * 2))
|
||||
|
||||
CFG_LIST=( "${@:1}" )
|
||||
if [ ${#CFG_LIST[@]} -eq 0 ]; then
|
||||
CFG_LIST=( ./configs/quick_schedules/*instant_test.yaml )
|
||||
fi
|
||||
|
||||
echo "========================================================================"
|
||||
echo "Configs to run:"
|
||||
echo "${CFG_LIST[@]}"
|
||||
echo "========================================================================"
|
||||
|
||||
for cfg in "${CFG_LIST[@]}"; do
|
||||
echo "========================================================================"
|
||||
echo "Running $cfg ..."
|
||||
echo "========================================================================"
|
||||
$BIN --num-gpus $NUM_GPUS --config-file "$cfg" \
|
||||
SOLVER.IMS_PER_BATCH $SOLVER_IMS_PER_BATCH \
|
||||
OUTPUT_DIR "$OUTPUT"
|
||||
rm -rf "$OUTPUT"
|
||||
done
|
||||
|
@@ -0,0 +1,58 @@
|
||||
# Getting Started with DensePose
|
||||
|
||||
## Inference with Pre-trained Models
|
||||
|
||||
1. Pick a model and its config file from [Model Zoo](MODEL_ZOO.md), for example [densepose_rcnn_R_50_FPN_s1x.yaml](../configs/densepose_rcnn_R_50_FPN_s1x.yaml)
|
||||
2. Run the [Apply Net](TOOL_APPLY_NET.md) tool to visualize the results or save the to disk. For example, to use contour visualization for DensePose, one can run:
|
||||
```bash
|
||||
python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml densepose_rcnn_R_50_FPN_s1x.pkl image.jpg dp_contour,bbox --output image_densepose_contour.png
|
||||
```
|
||||
Please see [Apply Net](TOOL_APPLY_NET.md) for more details on the tool.
|
||||
|
||||
## Training
|
||||
|
||||
First, prepare the [dataset](http://densepose.org/#dataset) into the following structure under the directory you'll run training scripts:
|
||||
<pre>
|
||||
datasets/coco/
|
||||
annotations/
|
||||
densepose_{train,minival,valminusminival}2014.json
|
||||
<a href="https://dl.fbaipublicfiles.com/detectron2/densepose/densepose_minival2014_100.json">densepose_minival2014_100.json </a> (optional, for testing only)
|
||||
{train,val}2014/
|
||||
# image files that are mentioned in the corresponding json
|
||||
</pre>
|
||||
|
||||
To train a model one can use the [train_net.py](../train_net.py) script.
|
||||
This script was used to train all DensePose models in [Model Zoo](MODEL_ZOO.md).
|
||||
For example, to launch end-to-end DensePose-RCNN training with ResNet-50 FPN backbone
|
||||
on 8 GPUs following the s1x schedule, one can run
|
||||
```bash
|
||||
python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml --num-gpus 8
|
||||
```
|
||||
The configs are made for 8-GPU training. To train on 1 GPU, one can apply the
|
||||
[linear learning rate scaling rule](https://arxiv.org/abs/1706.02677):
|
||||
```bash
|
||||
python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml \
|
||||
SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
|
||||
```
|
||||
|
||||
## Evaluation
|
||||
|
||||
Model testing can be done in the same way as training, except for an additional flag `--eval-only` and
|
||||
model location specification through `MODEL.WEIGHTS model.pth` in the command line
|
||||
```bash
|
||||
python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml \
|
||||
--eval-only MODEL.WEIGHTS model.pth
|
||||
```
|
||||
|
||||
## Tools
|
||||
|
||||
We provide tools which allow one to:
|
||||
- easily view DensePose annotated data in a dataset;
|
||||
- perform DensePose inference on a set of images;
|
||||
- visualize DensePose model results;
|
||||
|
||||
`query_db` is a tool to print or visualize DensePose data in a dataset.
|
||||
Please refer to [Query DB](TOOL_QUERY_DB.md) for more details on this tool
|
||||
|
||||
`apply_net` is a tool to print or visualize DensePose results.
|
||||
Please refer to [Apply Net](TOOL_APPLY_NET.md) for more details on this tool
|
@@ -0,0 +1,277 @@
|
||||
# Model Zoo and Baselines
|
||||
|
||||
# Introduction
|
||||
|
||||
We provide baselines trained with Detectron2 DensePose. The corresponding
|
||||
configuration files can be found in the [configs](../configs) directory.
|
||||
All models were trained on COCO `train2014` + `valminusminival2014` and
|
||||
evaluated on COCO `minival2014`. For the details on common settings in which
|
||||
baselines were trained, please check [Detectron 2 Model Zoo](../../../MODEL_ZOO.md).
|
||||
|
||||
## License
|
||||
|
||||
All models available for download through this document are licensed under the
|
||||
[Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/)
|
||||
|
||||
## COCO DensePose Baselines with DensePose-RCNN
|
||||
|
||||
### Legacy Models
|
||||
|
||||
Baselines trained using schedules from [Güler et al, 2018](https://arxiv.org/pdf/1802.00434.pdf)
|
||||
|
||||
<table><tbody>
|
||||
<!-- START TABLE -->
|
||||
<!-- TABLE HEADER -->
|
||||
<th valign="bottom">Name</th>
|
||||
<th valign="bottom">lr<br/>sched</th>
|
||||
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
|
||||
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
|
||||
<th valign="bottom">train<br/>mem<br/>(GB)</th>
|
||||
<th valign="bottom">box<br/>AP</th>
|
||||
<th valign="bottom">dp. AP<br/>GPS</th>
|
||||
<th valign="bottom">dp. AP<br/>GPSm</th>
|
||||
<th valign="bottom">model id</th>
|
||||
<th valign="bottom">download</th>
|
||||
<!-- TABLE BODY -->
|
||||
<!-- ROW: densepose_rcnn_R_50_FPN_s1x_legacy -->
|
||||
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml">R_50_FPN_s1x_legacy</a></td>
|
||||
<td align="center">s1x</td>
|
||||
<td align="center">0.307</td>
|
||||
<td align="center">0.051</td>
|
||||
<td align="center">3.2</td>
|
||||
<td align="center">58.1</td>
|
||||
<td align="center">52.1</td>
|
||||
<td align="center">54.9</td>
|
||||
<td align="center">164832157</td>
|
||||
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x_legacy/164832157/model_final_d366fa.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x_legacy/164832157/metrics.json">metrics</a></td>
|
||||
</tr>
|
||||
<!-- ROW: densepose_rcnn_R_101_FPN_s1x_legacy -->
|
||||
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml">R_101_FPN_s1x_legacy</a></td>
|
||||
<td align="center">s1x</td>
|
||||
<td align="center">0.390</td>
|
||||
<td align="center">0.063</td>
|
||||
<td align="center">4.3</td>
|
||||
<td align="center">59.5</td>
|
||||
<td align="center">53.2</td>
|
||||
<td align="center">56.1</td>
|
||||
<td align="center">164832182</td>
|
||||
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x_legacy/164832182/model_final_10af0e.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x_legacy/164832182/metrics.json">metrics</a></td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
|
||||
### Improved Baselines, Original Fully Convolutional Haad
|
||||
|
||||
These models use an improved training schedule and Panoptic FPN head from [Kirillov et al, 2019](https://arxiv.org/abs/1901.02446).
|
||||
|
||||
<table><tbody>
|
||||
<!-- START TABLE -->
|
||||
<!-- TABLE HEADER -->
|
||||
<th valign="bottom">Name</th>
|
||||
<th valign="bottom">lr<br/>sched</th>
|
||||
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
|
||||
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
|
||||
<th valign="bottom">train<br/>mem<br/>(GB)</th>
|
||||
<th valign="bottom">box<br/>AP</th>
|
||||
<th valign="bottom">dp. AP<br/>GPS</th>
|
||||
<th valign="bottom">dp. AP<br/>GPSm</th>
|
||||
<th valign="bottom">model id</th>
|
||||
<th valign="bottom">download</th>
|
||||
<!-- TABLE BODY -->
|
||||
<!-- ROW: densepose_rcnn_R_50_FPN_s1x -->
|
||||
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_s1x.yaml">R_50_FPN_s1x</a></td>
|
||||
<td align="center">s1x</td>
|
||||
<td align="center">0.359</td>
|
||||
<td align="center">0.066</td>
|
||||
<td align="center">4.5</td>
|
||||
<td align="center">61.2</td>
|
||||
<td align="center">63.7</td>
|
||||
<td align="center">65.3</td>
|
||||
<td align="center">165712039</td>
|
||||
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/metrics.json">metrics</a></td>
|
||||
</tr>
|
||||
<!-- ROW: densepose_rcnn_R_101_FPN_s1x -->
|
||||
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_s1x.yaml">R_101_FPN_s1x</a></td>
|
||||
<td align="center">s1x</td>
|
||||
<td align="center">0.428</td>
|
||||
<td align="center">0.079</td>
|
||||
<td align="center">5.8</td>
|
||||
<td align="center">62.3</td>
|
||||
<td align="center">64.5</td>
|
||||
<td align="center">66.4</td>
|
||||
<td align="center">165712084</td>
|
||||
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x/165712084/model_final_c6ab63.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x/165712084/metrics.json">metrics</a></td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
|
||||
### Improved Baselines, DeepLabV3 Head
|
||||
|
||||
These models use an improved training schedule, Panoptic FPN head from [Kirillov et al, 2019](https://arxiv.org/abs/1901.02446) and DeepLabV3 head from [Chen et al, 2017](https://arxiv.org/abs/1706.05587).
|
||||
|
||||
<table><tbody>
|
||||
<!-- START TABLE -->
|
||||
<!-- TABLE HEADER -->
|
||||
<th valign="bottom">Name</th>
|
||||
<th valign="bottom">lr<br/>sched</th>
|
||||
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
|
||||
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
|
||||
<th valign="bottom">train<br/>mem<br/>(GB)</th>
|
||||
<th valign="bottom">box<br/>AP</th>
|
||||
<th valign="bottom">dp. AP<br/>GPS</th>
|
||||
<th valign="bottom">dp. AP<br/>GPSm</th>
|
||||
<th valign="bottom">model id</th>
|
||||
<th valign="bottom">download</th>
|
||||
<!-- TABLE BODY -->
|
||||
<!-- ROW: densepose_rcnn_R_50_FPN_DL_s1x -->
|
||||
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml">R_50_FPN_DL_s1x</a></td>
|
||||
<td align="center">s1x</td>
|
||||
<td align="center">0.392</td>
|
||||
<td align="center">0.070</td>
|
||||
<td align="center">6.7</td>
|
||||
<td align="center">61.1</td>
|
||||
<td align="center">65.6</td>
|
||||
<td align="center">66.8</td>
|
||||
<td align="center">165712097</td>
|
||||
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_s1x/165712097/model_final_0ed407.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_s1x/165712097/metrics.json">metrics</a></td>
|
||||
</tr>
|
||||
<!-- ROW: densepose_rcnn_R_101_FPN_DL_s1x -->
|
||||
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml">R_101_FPN_DL_s1x</a></td>
|
||||
<td align="center">s1x</td>
|
||||
<td align="center">0.478</td>
|
||||
<td align="center">0.083</td>
|
||||
<td align="center">7.0</td>
|
||||
<td align="center">62.3</td>
|
||||
<td align="center">66.3</td>
|
||||
<td align="center">67.7</td>
|
||||
<td align="center">165712116</td>
|
||||
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_s1x/165712116/model_final_844d15.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_s1x/165712116/metrics.json">metrics</a></td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
|
||||
### Baselines with Confidence Estimation
|
||||
|
||||
These models perform additional estimation of confidence in regressed UV coodrinates, along the lines of [Neverova et al., 2019](https://papers.nips.cc/paper/8378-correlated-uncertainty-for-learning-dense-correspondences-from-noisy-labels).
|
||||
|
||||
<table><tbody>
|
||||
<!-- START TABLE -->
|
||||
<!-- TABLE HEADER -->
|
||||
<th valign="bottom">Name</th>
|
||||
<th valign="bottom">lr<br/>sched</th>
|
||||
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
|
||||
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
|
||||
<th valign="bottom">train<br/>mem<br/>(GB)</th>
|
||||
<th valign="bottom">box<br/>AP</th>
|
||||
<th valign="bottom">dp. AP<br/>GPS</th>
|
||||
<th valign="bottom">dp. AP<br/>GPSm</th>
|
||||
<th valign="bottom">model id</th>
|
||||
<th valign="bottom">download</th>
|
||||
<!-- TABLE BODY -->
|
||||
<!-- ROW: densepose_rcnn_R_50_FPN_WC1_s1x -->
|
||||
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml">R_50_FPN_WC1_s1x</a></td>
|
||||
<td align="center">s1x</td>
|
||||
<td align="center">0.353</td>
|
||||
<td align="center">0.064</td>
|
||||
<td align="center">4.6</td>
|
||||
<td align="center">60.5</td>
|
||||
<td align="center">64.2</td>
|
||||
<td align="center">65.6</td>
|
||||
<td align="center">173862049</td>
|
||||
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC1_s1x/173862049/model_final_289019.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC1_s1x/173862049/metrics.json">metrics</a></td>
|
||||
</tr>
|
||||
<!-- ROW: densepose_rcnn_R_50_FPN_WC2_s1x -->
|
||||
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml">R_50_FPN_WC2_s1x</a></td>
|
||||
<td align="center">s1x</td>
|
||||
<td align="center">0.364</td>
|
||||
<td align="center">0.066</td>
|
||||
<td align="center">4.8</td>
|
||||
<td align="center">60.7</td>
|
||||
<td align="center">64.2</td>
|
||||
<td align="center">65.7</td>
|
||||
<td align="center">173861455</td>
|
||||
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC2_s1x/173861455/model_final_3abe14.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC2_s1x/173861455/metrics.json">metrics</a></td>
|
||||
</tr>
|
||||
<!-- ROW: densepose_rcnn_R_50_FPN_DL_WC1_s1x -->
|
||||
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml">R_50_FPN_DL_WC1_s1x</a></td>
|
||||
<td align="center">s1x</td>
|
||||
<td align="center">0.397</td>
|
||||
<td align="center">0.068</td>
|
||||
<td align="center">6.7</td>
|
||||
<td align="center">61.1</td>
|
||||
<td align="center">65.8</td>
|
||||
<td align="center">67.1</td>
|
||||
<td align="center">173067973</td>
|
||||
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC1_s1x/173067973/model_final_b1e525.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC1_s1x/173067973/metrics.json">metrics</a></td>
|
||||
</tr>
|
||||
<!-- ROW: densepose_rcnn_R_50_FPN_DL_WC2_s1x -->
|
||||
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml">R_50_FPN_DL_WC2_s1x</a></td>
|
||||
<td align="center">s1x</td>
|
||||
<td align="center">0.410</td>
|
||||
<td align="center">0.070</td>
|
||||
<td align="center">6.8</td>
|
||||
<td align="center">60.8</td>
|
||||
<td align="center">65.6</td>
|
||||
<td align="center">66.7</td>
|
||||
<td align="center">173859335</td>
|
||||
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC2_s1x/173859335/model_final_60fed4.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC2_s1x/173859335/metrics.json">metrics</a></td>
|
||||
</tr>
|
||||
<!-- ROW: densepose_rcnn_R_101_FPN_WC1_s1x -->
|
||||
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml">R_101_FPN_WC1_s1x</a></td>
|
||||
<td align="center">s1x</td>
|
||||
<td align="center">0.435</td>
|
||||
<td align="center">0.076</td>
|
||||
<td align="center">5.7</td>
|
||||
<td align="center">62.5</td>
|
||||
<td align="center">64.9</td>
|
||||
<td align="center">66.5</td>
|
||||
<td align="center">171402969</td>
|
||||
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC1_s1x/171402969/model_final_9e47f0.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC1_s1x/171402969/metrics.json">metrics</a></td>
|
||||
</tr>
|
||||
<!-- ROW: densepose_rcnn_R_101_FPN_WC2_s1x -->
|
||||
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml">R_101_FPN_WC2_s1x</a></td>
|
||||
<td align="center">s1x</td>
|
||||
<td align="center">0.450</td>
|
||||
<td align="center">0.078</td>
|
||||
<td align="center">5.7</td>
|
||||
<td align="center">62.3</td>
|
||||
<td align="center">64.8</td>
|
||||
<td align="center">66.6</td>
|
||||
<td align="center">173860702</td>
|
||||
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC2_s1x/173860702/model_final_5ea023.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC2_s1x/173860702/metrics.json">metrics</a></td>
|
||||
</tr>
|
||||
<!-- ROW: densepose_rcnn_R_101_FPN_DL_WC1_s1x -->
|
||||
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml">R_101_FPN_DL_WC1_s1x</a></td>
|
||||
<td align="center">s1x</td>
|
||||
<td align="center">0.479</td>
|
||||
<td align="center">0.081</td>
|
||||
<td align="center">7.9</td>
|
||||
<td align="center">62.0</td>
|
||||
<td align="center">66.2</td>
|
||||
<td align="center">67.4</td>
|
||||
<td align="center">173858525</td>
|
||||
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC1_s1x/173858525/model_final_f359f3.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC1_s1x/173858525/metrics.json">metrics</a></td>
|
||||
</tr>
|
||||
<!-- ROW: densepose_rcnn_R_101_FPN_DL_WC2_s1x -->
|
||||
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml">R_101_FPN_DL_WC2_s1x</a></td>
|
||||
<td align="center">s1x</td>
|
||||
<td align="center">0.491</td>
|
||||
<td align="center">0.082</td>
|
||||
<td align="center">7.6</td>
|
||||
<td align="center">61.7</td>
|
||||
<td align="center">65.9</td>
|
||||
<td align="center">67.3</td>
|
||||
<td align="center">173294801</td>
|
||||
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC2_s1x/173294801/model_final_6e1ed1.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC2_s1x/173294801/metrics.json">metrics</a></td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
|
||||
## Old Baselines
|
||||
|
||||
It is still possible to use some baselines from [DensePose 1](https://github.com/facebookresearch/DensePose).
|
||||
Below are evaluation metrics for the baselines recomputed in the current framework:
|
||||
|
||||
| Model | bbox AP | AP | AP50 | AP75 | APm |APl |
|
||||
|-----|-----|-----|--- |--- |--- |--- |
|
||||
| [`ResNet50_FPN_s1x-e2e`](https://dl.fbaipublicfiles.com/densepose/DensePose_ResNet50_FPN_s1x-e2e.pkl) | 54.673 | 48.894 | 84.963 | 50.717 | 43.132 | 50.433 |
|
||||
| [`ResNet101_FPN_s1x-e2e`](https://dl.fbaipublicfiles.com/densepose/DensePose_ResNet101_FPN_s1x-e2e.pkl) | 56.032 | 51.088 | 86.250 | 55.057 | 46.542 | 52.563 |
|
||||
|
||||
Note: these scores are close, but not strictly equal to the ones reported in the [DensePose 1 Model Zoo](https://github.com/facebookresearch/DensePose/blob/master/MODEL_ZOO.md),
|
||||
which is due to small incompatibilities between the frameworks.
|
@@ -0,0 +1,130 @@
|
||||
# Apply Net
|
||||
|
||||
`apply_net` is a tool to print or visualize DensePose results on a set of images.
|
||||
It has two modes: `dump` to save DensePose model results to a pickle file
|
||||
and `show` to visualize them on images.
|
||||
|
||||
## Dump Mode
|
||||
|
||||
The general command form is:
|
||||
```bash
|
||||
python apply_net.py dump [-h] [-v] [--output <dump_file>] <config> <model> <input>
|
||||
```
|
||||
|
||||
There are three mandatory arguments:
|
||||
- `<config>`, configuration file for a given model;
|
||||
- `<model>`, model file with trained parameters
|
||||
- `<input>`, input image file name, pattern or folder
|
||||
|
||||
One can additionally provide `--output` argument to define the output file name,
|
||||
which defaults to `output.pkl`.
|
||||
|
||||
|
||||
Examples:
|
||||
|
||||
1. Dump results of a DensePose model with ResNet-50 FPN backbone for images
|
||||
in a folder `images` to file `dump.pkl`:
|
||||
```bash
|
||||
python apply_net.py dump configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl images --output dump.pkl -v
|
||||
```
|
||||
|
||||
2. Dump results of a DensePose model with ResNet-50 FPN backbone for images
|
||||
with file name matching a pattern `image*.jpg` to file `results.pkl`:
|
||||
```bash
|
||||
python apply_net.py dump configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl "image*.jpg" --output results.pkl -v
|
||||
```
|
||||
|
||||
If you want to load the pickle file generated by the above command:
|
||||
```
|
||||
# make sure DensePose is in your PYTHONPATH, or use the following line to add it:
|
||||
sys.path.append("/your_detectron2_path/detectron2_repo/projects/DensePose/")
|
||||
|
||||
f = open('/your_result_path/results.pkl', 'rb')
|
||||
data = pickle.load(f)
|
||||
```
|
||||
|
||||
The file `results.pkl` contains the list of results per image, for each image the result is a dictionary:
|
||||
```
|
||||
data: [{'file_name': '/your_path/image1.jpg',
|
||||
'scores': tensor([0.9884]),
|
||||
'pred_boxes_XYXY': tensor([[ 69.6114, 0.0000, 706.9797, 706.0000]]),
|
||||
'pred_densepose': <densepose.structures.DensePoseResult object at 0x7f791b312470>},
|
||||
{'file_name': '/your_path/image2.jpg',
|
||||
'scores': tensor([0.9999, 0.5373, 0.3991]),
|
||||
'pred_boxes_XYXY': tensor([[ 59.5734, 7.7535, 579.9311, 932.3619],
|
||||
[612.9418, 686.1254, 612.9999, 704.6053],
|
||||
[164.5081, 407.4034, 598.3944, 920.4266]]),
|
||||
'pred_densepose': <densepose.structures.DensePoseResult object at 0x7f7071229be0>}]
|
||||
```
|
||||
|
||||
We can use the following code, to parse the outputs of the first
|
||||
detected instance on the first image.
|
||||
```
|
||||
img_id, instance_id = 0, 0 # Look at the first image and the first detected instance
|
||||
bbox_xyxy = data[img_id]['pred_boxes_XYXY'][instance_id]
|
||||
result_encoded = data[img_id]['pred_densepose'].results[instance_id]
|
||||
iuv_arr = DensePoseResult.decode_png_data(*result_encoded)
|
||||
```
|
||||
The array `bbox_xyxy` contains (x0, y0, x1, y1) of the bounding box.
|
||||
|
||||
The shape of `iuv_arr` is `[3, H, W]`, where (H, W) is the shape of the bounding box.
|
||||
- `iuv_arr[0,:,:]`: The patch index of image points, indicating which of the 24 surface patches the point is on.
|
||||
- `iuv_arr[1,:,:]`: The U-coordinate value of image points.
|
||||
- `iuv_arr[2,:,:]`: The V-coordinate value of image points.
|
||||
|
||||
|
||||
## Visualization Mode
|
||||
|
||||
The general command form is:
|
||||
```bash
|
||||
python apply_net.py show [-h] [-v] [--min_score <score>] [--nms_thresh <threshold>] [--output <image_file>] <config> <model> <input> <visualizations>
|
||||
```
|
||||
|
||||
There are four mandatory arguments:
|
||||
- `<config>`, configuration file for a given model;
|
||||
- `<model>`, model file with trained parameters
|
||||
- `<input>`, input image file name, pattern or folder
|
||||
- `<visualizations>`, visualizations specifier; currently available visualizations are:
|
||||
* `bbox` - bounding boxes of detected persons;
|
||||
* `dp_segm` - segmentation masks for detected persons;
|
||||
* `dp_u` - each body part is colored according to the estimated values of the
|
||||
U coordinate in part parameterization;
|
||||
* `dp_v` - each body part is colored according to the estimated values of the
|
||||
V coordinate in part parameterization;
|
||||
* `dp_contour` - plots contours with color-coded U and V coordinates
|
||||
|
||||
|
||||
One can additionally provide the following optional arguments:
|
||||
- `--min_score` to only show detections with sufficient scores that are not lower than provided value
|
||||
- `--nms_thresh` to additionally apply non-maximum suppression to detections at a given threshold
|
||||
- `--output` to define visualization file name template, which defaults to `output.png`.
|
||||
To distinguish output file names for different images, the tool appends 1-based entry index,
|
||||
e.g. output.0001.png, output.0002.png, etc...
|
||||
|
||||
|
||||
The following examples show how to output results of a DensePose model
|
||||
with ResNet-50 FPN backbone using different visualizations for image `image.jpg`:
|
||||
|
||||
1. Show bounding box and segmentation:
|
||||
```bash
|
||||
python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg bbox,dp_segm -v
|
||||
```
|
||||

|
||||
|
||||
2. Show bounding box and estimated U coordinates for body parts:
|
||||
```bash
|
||||
python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg bbox,dp_u -v
|
||||
```
|
||||

|
||||
|
||||
3. Show bounding box and estimated V coordinates for body parts:
|
||||
```bash
|
||||
python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg bbox,dp_v -v
|
||||
```
|
||||

|
||||
|
||||
4. Show bounding box and estimated U and V coordinates via contour plots:
|
||||
```bash
|
||||
python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg dp_contour,bbox -v
|
||||
```
|
||||

|
@@ -0,0 +1,105 @@
|
||||
|
||||
# Query Dataset
|
||||
|
||||
`query_db` is a tool to print or visualize DensePose data from a dataset.
|
||||
It has two modes: `print` and `show` to output dataset entries to standard
|
||||
output or to visualize them on images.
|
||||
|
||||
## Print Mode
|
||||
|
||||
The general command form is:
|
||||
```bash
|
||||
python query_db.py print [-h] [-v] [--max-entries N] <dataset> <selector>
|
||||
```
|
||||
|
||||
There are two mandatory arguments:
|
||||
- `<dataset>`, DensePose dataset specification, from which to select
|
||||
the entries (e.g. `densepose_coco_2014_train`).
|
||||
- `<selector>`, dataset entry selector which can be a single specification,
|
||||
or a comma-separated list of specifications of the form
|
||||
`field[:type]=value` for exact match with the value
|
||||
or `field[:type]=min-max` for a range of values
|
||||
|
||||
One can additionally limit the maximum number of entries to output
|
||||
by providing `--max-entries` argument.
|
||||
|
||||
Examples:
|
||||
|
||||
1. Output at most 10 first entries from the `densepose_coco_2014_train` dataset:
|
||||
```bash
|
||||
python query_db.py print densepose_coco_2014_train \* --max-entries 10 -v
|
||||
```
|
||||
|
||||
2. Output all entries with `file_name` equal to `COCO_train2014_000000000036.jpg`:
|
||||
```bash
|
||||
python query_db.py print densepose_coco_2014_train file_name=COCO_train2014_000000000036.jpg -v
|
||||
```
|
||||
|
||||
3. Output all entries with `image_id` between 36 and 156:
|
||||
```bash
|
||||
python query_db.py print densepose_coco_2014_train image_id:int=36-156 -v
|
||||
```
|
||||
|
||||
## Visualization Mode
|
||||
|
||||
The general command form is:
|
||||
```bash
|
||||
python query_db.py show [-h] [-v] [--max-entries N] [--output <image_file>] <dataset> <selector> <visualizations>
|
||||
```
|
||||
|
||||
There are three mandatory arguments:
|
||||
- `<dataset>`, DensePose dataset specification, from which to select
|
||||
the entries (e.g. `densepose_coco_2014_train`).
|
||||
- `<selector>`, dataset entry selector which can be a single specification,
|
||||
or a comma-separated list of specifications of the form
|
||||
`field[:type]=value` for exact match with the value
|
||||
or `field[:type]=min-max` for a range of values
|
||||
- `<visualizations>`, visualizations specifier; currently available visualizations are:
|
||||
* `bbox` - bounding boxes of annotated persons;
|
||||
* `dp_i` - annotated points colored according to the containing part;
|
||||
* `dp_pts` - annotated points in green color;
|
||||
* `dp_segm` - segmentation masks for annotated persons;
|
||||
* `dp_u` - annotated points colored according to their U coordinate in part parameterization;
|
||||
* `dp_v` - annotated points colored according to their V coordinate in part parameterization;
|
||||
|
||||
One can additionally provide one of the two optional arguments:
|
||||
- `--max_entries` to limit the maximum number of entries to visualize
|
||||
- `--output` to provide visualization file name template, which defaults
|
||||
to `output.png`. To distinguish file names for different dataset
|
||||
entries, the tool appends 1-based entry index to the output file name,
|
||||
e.g. output.0001.png, output.0002.png, etc.
|
||||
|
||||
The following examples show how to output different visualizations for image with `id = 322`
|
||||
from `densepose_coco_2014_train` dataset:
|
||||
|
||||
1. Show bounding box and segmentation:
|
||||
```bash
|
||||
python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_segm -v
|
||||
```
|
||||

|
||||
|
||||
2. Show bounding box and points colored according to the containing part:
|
||||
```bash
|
||||
python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_i -v
|
||||
```
|
||||

|
||||
|
||||
3. Show bounding box and annotated points in green color:
|
||||
```bash
|
||||
python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_segm -v
|
||||
```
|
||||

|
||||
|
||||
4. Show bounding box and annotated points colored according to their U coordinate in part parameterization:
|
||||
```bash
|
||||
python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_u -v
|
||||
```
|
||||

|
||||
|
||||
5. Show bounding box and annotated points colored according to their V coordinate in part parameterization:
|
||||
```bash
|
||||
python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_v -v
|
||||
```
|
||||

|
||||
|
||||
|
@@ -0,0 +1,250 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from timeit import default_timer as timer
|
||||
from typing import Any, ClassVar, Dict, List
|
||||
import torch
|
||||
from fvcore.common.file_io import PathManager
|
||||
|
||||
from detectron2.data.catalog import DatasetCatalog
|
||||
from detectron2.utils.logger import setup_logger
|
||||
|
||||
from densepose.data.structures import DensePoseDataRelative
|
||||
from densepose.utils.dbhelper import EntrySelector
|
||||
from densepose.utils.logger import verbosity_to_level
|
||||
from densepose.vis.base import CompoundVisualizer
|
||||
from densepose.vis.bounding_box import BoundingBoxVisualizer
|
||||
from densepose.vis.densepose import (
|
||||
DensePoseDataCoarseSegmentationVisualizer,
|
||||
DensePoseDataPointsIVisualizer,
|
||||
DensePoseDataPointsUVisualizer,
|
||||
DensePoseDataPointsVisualizer,
|
||||
DensePoseDataPointsVVisualizer,
|
||||
)
|
||||
|
||||
DOC = """Query DB - a tool to print / visualize data from a database
|
||||
"""
|
||||
|
||||
LOGGER_NAME = "query_db"
|
||||
|
||||
logger = logging.getLogger(LOGGER_NAME)
|
||||
|
||||
_ACTION_REGISTRY: Dict[str, "Action"] = {}
|
||||
|
||||
|
||||
class Action(object):
|
||||
@classmethod
|
||||
def add_arguments(cls: type, parser: argparse.ArgumentParser):
|
||||
parser.add_argument(
|
||||
"-v",
|
||||
"--verbosity",
|
||||
action="count",
|
||||
help="Verbose mode. Multiple -v options increase the verbosity.",
|
||||
)
|
||||
|
||||
|
||||
def register_action(cls: type):
|
||||
"""
|
||||
Decorator for action classes to automate action registration
|
||||
"""
|
||||
global _ACTION_REGISTRY
|
||||
_ACTION_REGISTRY[cls.COMMAND] = cls
|
||||
return cls
|
||||
|
||||
|
||||
class EntrywiseAction(Action):
|
||||
@classmethod
|
||||
def add_arguments(cls: type, parser: argparse.ArgumentParser):
|
||||
super(EntrywiseAction, cls).add_arguments(parser)
|
||||
parser.add_argument(
|
||||
"dataset", metavar="<dataset>", help="Dataset name (e.g. densepose_coco_2014_train)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"selector",
|
||||
metavar="<selector>",
|
||||
help="Dataset entry selector in the form field1[:type]=value1[,"
|
||||
"field2[:type]=value_min-value_max...] which selects all "
|
||||
"entries from the dataset that satisfy the constraints",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-entries", metavar="N", help="Maximum number of entries to process", type=int
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls: type, args: argparse.Namespace):
|
||||
dataset = setup_dataset(args.dataset)
|
||||
entry_selector = EntrySelector.from_string(args.selector)
|
||||
context = cls.create_context(args)
|
||||
if args.max_entries is not None:
|
||||
for _, entry in zip(range(args.max_entries), dataset):
|
||||
if entry_selector(entry):
|
||||
cls.execute_on_entry(entry, context)
|
||||
else:
|
||||
for entry in dataset:
|
||||
if entry_selector(entry):
|
||||
cls.execute_on_entry(entry, context)
|
||||
|
||||
@classmethod
|
||||
def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]:
|
||||
context = {}
|
||||
return context
|
||||
|
||||
|
||||
@register_action
|
||||
class PrintAction(EntrywiseAction):
|
||||
"""
|
||||
Print action that outputs selected entries to stdout
|
||||
"""
|
||||
|
||||
COMMAND: ClassVar[str] = "print"
|
||||
|
||||
@classmethod
|
||||
def add_parser(cls: type, subparsers: argparse._SubParsersAction):
|
||||
parser = subparsers.add_parser(cls.COMMAND, help="Output selected entries to stdout. ")
|
||||
cls.add_arguments(parser)
|
||||
parser.set_defaults(func=cls.execute)
|
||||
|
||||
@classmethod
|
||||
def add_arguments(cls: type, parser: argparse.ArgumentParser):
|
||||
super(PrintAction, cls).add_arguments(parser)
|
||||
|
||||
@classmethod
|
||||
def execute_on_entry(cls: type, entry: Dict[str, Any], context: Dict[str, Any]):
|
||||
import pprint
|
||||
|
||||
printer = pprint.PrettyPrinter(indent=2, width=200, compact=True)
|
||||
printer.pprint(entry)
|
||||
|
||||
|
||||
@register_action
|
||||
class ShowAction(EntrywiseAction):
|
||||
"""
|
||||
Show action that visualizes selected entries on an image
|
||||
"""
|
||||
|
||||
COMMAND: ClassVar[str] = "show"
|
||||
VISUALIZERS: ClassVar[Dict[str, object]] = {
|
||||
"dp_segm": DensePoseDataCoarseSegmentationVisualizer(),
|
||||
"dp_i": DensePoseDataPointsIVisualizer(),
|
||||
"dp_u": DensePoseDataPointsUVisualizer(),
|
||||
"dp_v": DensePoseDataPointsVVisualizer(),
|
||||
"dp_pts": DensePoseDataPointsVisualizer(),
|
||||
"bbox": BoundingBoxVisualizer(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def add_parser(cls: type, subparsers: argparse._SubParsersAction):
|
||||
parser = subparsers.add_parser(cls.COMMAND, help="Visualize selected entries")
|
||||
cls.add_arguments(parser)
|
||||
parser.set_defaults(func=cls.execute)
|
||||
|
||||
@classmethod
|
||||
def add_arguments(cls: type, parser: argparse.ArgumentParser):
|
||||
super(ShowAction, cls).add_arguments(parser)
|
||||
parser.add_argument(
|
||||
"visualizations",
|
||||
metavar="<visualizations>",
|
||||
help="Comma separated list of visualizations, possible values: "
|
||||
"[{}]".format(",".join(sorted(cls.VISUALIZERS.keys()))),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
metavar="<image_file>",
|
||||
default="output.png",
|
||||
help="File name to save output to",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute_on_entry(cls: type, entry: Dict[str, Any], context: Dict[str, Any]):
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
image_fpath = PathManager.get_local_path(entry["file_name"])
|
||||
image = cv2.imread(image_fpath, cv2.IMREAD_GRAYSCALE)
|
||||
image = np.tile(image[:, :, np.newaxis], [1, 1, 3])
|
||||
datas = cls._extract_data_for_visualizers_from_entry(context["vis_specs"], entry)
|
||||
visualizer = context["visualizer"]
|
||||
image_vis = visualizer.visualize(image, datas)
|
||||
entry_idx = context["entry_idx"] + 1
|
||||
out_fname = cls._get_out_fname(entry_idx, context["out_fname"])
|
||||
cv2.imwrite(out_fname, image_vis)
|
||||
logger.info(f"Output saved to {out_fname}")
|
||||
context["entry_idx"] += 1
|
||||
|
||||
@classmethod
|
||||
def _get_out_fname(cls: type, entry_idx: int, fname_base: str):
|
||||
base, ext = os.path.splitext(fname_base)
|
||||
return base + ".{0:04d}".format(entry_idx) + ext
|
||||
|
||||
@classmethod
|
||||
def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]:
|
||||
vis_specs = args.visualizations.split(",")
|
||||
visualizers = []
|
||||
for vis_spec in vis_specs:
|
||||
vis = cls.VISUALIZERS[vis_spec]
|
||||
visualizers.append(vis)
|
||||
context = {
|
||||
"vis_specs": vis_specs,
|
||||
"visualizer": CompoundVisualizer(visualizers),
|
||||
"out_fname": args.output,
|
||||
"entry_idx": 0,
|
||||
}
|
||||
return context
|
||||
|
||||
@classmethod
|
||||
def _extract_data_for_visualizers_from_entry(
|
||||
cls: type, vis_specs: List[str], entry: Dict[str, Any]
|
||||
):
|
||||
dp_list = []
|
||||
bbox_list = []
|
||||
for annotation in entry["annotations"]:
|
||||
is_valid, _ = DensePoseDataRelative.validate_annotation(annotation)
|
||||
if not is_valid:
|
||||
continue
|
||||
bbox = torch.as_tensor(annotation["bbox"])
|
||||
bbox_list.append(bbox)
|
||||
dp_data = DensePoseDataRelative(annotation)
|
||||
dp_list.append(dp_data)
|
||||
datas = []
|
||||
for vis_spec in vis_specs:
|
||||
datas.append(bbox_list if "bbox" == vis_spec else (bbox_list, dp_list))
|
||||
return datas
|
||||
|
||||
|
||||
def setup_dataset(dataset_name):
|
||||
logger.info("Loading dataset {}".format(dataset_name))
|
||||
start = timer()
|
||||
dataset = DatasetCatalog.get(dataset_name)
|
||||
stop = timer()
|
||||
logger.info("Loaded dataset {} in {:.3f}s".format(dataset_name, stop - start))
|
||||
return dataset
|
||||
|
||||
|
||||
def create_argument_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
description=DOC,
|
||||
formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120),
|
||||
)
|
||||
parser.set_defaults(func=lambda _: parser.print_help(sys.stdout))
|
||||
subparsers = parser.add_subparsers(title="Actions")
|
||||
for _, action in _ACTION_REGISTRY.items():
|
||||
action.add_parser(subparsers)
|
||||
return parser
|
||||
|
||||
|
||||
def main():
|
||||
parser = create_argument_parser()
|
||||
args = parser.parse_args()
|
||||
verbosity = args.verbosity if hasattr(args, "verbosity") else None
|
||||
global logger
|
||||
logger = setup_logger(name=LOGGER_NAME)
|
||||
logger.setLevel(verbosity_to_level(verbosity))
|
||||
args.func(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -0,0 +1,110 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
|
||||
import os
|
||||
import torch
|
||||
|
||||
from detectron2.config import get_cfg
|
||||
from detectron2.engine import default_setup
|
||||
from detectron2.modeling import build_model
|
||||
|
||||
from densepose import add_dataset_category_config, add_densepose_config
|
||||
|
||||
_BASE_CONFIG_DIR = "configs"
|
||||
_EVOLUTION_CONFIG_SUB_DIR = "evolution"
|
||||
_QUICK_SCHEDULES_CONFIG_SUB_DIR = "quick_schedules"
|
||||
_BASE_CONFIG_FILE_PREFIX = "Base-"
|
||||
_CONFIG_FILE_EXT = ".yaml"
|
||||
|
||||
|
||||
def _get_base_config_dir():
|
||||
"""
|
||||
Return the base directory for configurations
|
||||
"""
|
||||
return os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", _BASE_CONFIG_DIR)
|
||||
|
||||
|
||||
def _get_evolution_config_dir():
|
||||
"""
|
||||
Return the base directory for evolution configurations
|
||||
"""
|
||||
return os.path.join(_get_base_config_dir(), _EVOLUTION_CONFIG_SUB_DIR)
|
||||
|
||||
|
||||
def _get_quick_schedules_config_dir():
|
||||
"""
|
||||
Return the base directory for quick schedules configurations
|
||||
"""
|
||||
return os.path.join(_get_base_config_dir(), _QUICK_SCHEDULES_CONFIG_SUB_DIR)
|
||||
|
||||
|
||||
def _collect_config_files(config_dir):
|
||||
"""
|
||||
Collect all configuration files (i.e. densepose_*.yaml) directly in the specified directory
|
||||
"""
|
||||
start = _get_base_config_dir()
|
||||
results = []
|
||||
for entry in os.listdir(config_dir):
|
||||
path = os.path.join(config_dir, entry)
|
||||
if not os.path.isfile(path):
|
||||
continue
|
||||
_, ext = os.path.splitext(entry)
|
||||
if ext != _CONFIG_FILE_EXT:
|
||||
continue
|
||||
if entry.startswith(_BASE_CONFIG_FILE_PREFIX):
|
||||
continue
|
||||
config_file = os.path.relpath(path, start)
|
||||
results.append(config_file)
|
||||
return results
|
||||
|
||||
|
||||
def get_config_files():
|
||||
"""
|
||||
Get all the configuration files (relative to the base configuration directory)
|
||||
"""
|
||||
return _collect_config_files(_get_base_config_dir())
|
||||
|
||||
|
||||
def get_evolution_config_files():
|
||||
"""
|
||||
Get all the evolution configuration files (relative to the base configuration directory)
|
||||
"""
|
||||
return _collect_config_files(_get_evolution_config_dir())
|
||||
|
||||
|
||||
def get_quick_schedules_config_files():
|
||||
"""
|
||||
Get all the quick schedules configuration files (relative to the base configuration directory)
|
||||
"""
|
||||
return _collect_config_files(_get_quick_schedules_config_dir())
|
||||
|
||||
|
||||
def _get_model_config(config_file):
|
||||
"""
|
||||
Load and return the configuration from the specified file (relative to the base configuration
|
||||
directory)
|
||||
"""
|
||||
cfg = get_cfg()
|
||||
add_dataset_category_config(cfg)
|
||||
add_densepose_config(cfg)
|
||||
path = os.path.join(_get_base_config_dir(), config_file)
|
||||
cfg.merge_from_file(path)
|
||||
if not torch.cuda.is_available():
|
||||
cfg.MODEL_DEVICE = "cpu"
|
||||
return cfg
|
||||
|
||||
|
||||
def get_model(config_file):
|
||||
"""
|
||||
Get the model from the specified file (relative to the base configuration directory)
|
||||
"""
|
||||
cfg = _get_model_config(config_file)
|
||||
return build_model(cfg)
|
||||
|
||||
|
||||
def setup(config_file):
|
||||
"""
|
||||
Setup the configuration from the specified file (relative to the base configuration directory)
|
||||
"""
|
||||
cfg = _get_model_config(config_file)
|
||||
cfg.freeze()
|
||||
default_setup(cfg, {})
|
@@ -0,0 +1,43 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
|
||||
import unittest
|
||||
import torch
|
||||
|
||||
from detectron2.structures import BitMasks, Boxes, Instances
|
||||
|
||||
from .common import get_model
|
||||
|
||||
|
||||
# TODO(plabatut): Modularize detectron2 tests and re-use
|
||||
def make_model_inputs(image, instances=None):
|
||||
if instances is None:
|
||||
return {"image": image}
|
||||
|
||||
return {"image": image, "instances": instances}
|
||||
|
||||
|
||||
def make_empty_instances(h, w):
|
||||
instances = Instances((h, w))
|
||||
instances.gt_boxes = Boxes(torch.rand(0, 4))
|
||||
instances.gt_classes = torch.tensor([]).to(dtype=torch.int64)
|
||||
instances.gt_masks = BitMasks(torch.rand(0, h, w))
|
||||
return instances
|
||||
|
||||
|
||||
class ModelE2ETest(unittest.TestCase):
|
||||
CONFIG_PATH = ""
|
||||
|
||||
def setUp(self):
|
||||
self.model = get_model(self.CONFIG_PATH)
|
||||
|
||||
def _test_eval(self, sizes):
|
||||
inputs = [make_model_inputs(torch.rand(3, size[0], size[1])) for size in sizes]
|
||||
self.model.eval()
|
||||
self.model(inputs)
|
||||
|
||||
|
||||
class DensePoseRCNNE2ETest(ModelE2ETest):
|
||||
CONFIG_PATH = "densepose_rcnn_R_101_FPN_s1x.yaml"
|
||||
|
||||
def test_empty_data(self):
|
||||
self._test_eval([(200, 250), (200, 249)])
|
@@ -0,0 +1,30 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
|
||||
import unittest
|
||||
|
||||
from .common import (
|
||||
get_config_files,
|
||||
get_evolution_config_files,
|
||||
get_quick_schedules_config_files,
|
||||
setup,
|
||||
)
|
||||
|
||||
|
||||
class TestSetup(unittest.TestCase):
|
||||
def _test_setup(self, config_file):
|
||||
setup(config_file)
|
||||
|
||||
def test_setup_configs(self):
|
||||
config_files = get_config_files()
|
||||
for config_file in config_files:
|
||||
self._test_setup(config_file)
|
||||
|
||||
def test_setup_evolution_configs(self):
|
||||
config_files = get_evolution_config_files()
|
||||
for config_file in config_files:
|
||||
self._test_setup(config_file)
|
||||
|
||||
def test_setup_quick_schedules_configs(self):
|
||||
config_files = get_quick_schedules_config_files()
|
||||
for config_file in config_files:
|
||||
self._test_setup(config_file)
|
@@ -0,0 +1,25 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
|
||||
import unittest
|
||||
|
||||
from densepose.data.structures import normalized_coords_transform
|
||||
|
||||
|
||||
class TestStructures(unittest.TestCase):
|
||||
def test_normalized_coords_transform(self):
|
||||
bbox = (32, 24, 288, 216)
|
||||
x0, y0, w, h = bbox
|
||||
xmin, ymin, xmax, ymax = x0, y0, x0 + w, y0 + h
|
||||
f = normalized_coords_transform(*bbox)
|
||||
# Top-left
|
||||
expected_p, actual_p = (-1, -1), f((xmin, ymin))
|
||||
self.assertEqual(expected_p, actual_p)
|
||||
# Top-right
|
||||
expected_p, actual_p = (1, -1), f((xmax, ymin))
|
||||
self.assertEqual(expected_p, actual_p)
|
||||
# Bottom-left
|
||||
expected_p, actual_p = (-1, 1), f((xmin, ymax))
|
||||
self.assertEqual(expected_p, actual_p)
|
||||
# Bottom-right
|
||||
expected_p, actual_p = (1, 1), f((xmax, ymax))
|
||||
self.assertEqual(expected_p, actual_p)
|
@@ -0,0 +1,122 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
"""
|
||||
DensePose Training Script.
|
||||
|
||||
This script is similar to the training script in detectron2/tools.
|
||||
|
||||
It is an example of how a user might use detectron2 for a new project.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
from fvcore.common.file_io import PathManager
|
||||
|
||||
import detectron2.utils.comm as comm
|
||||
from detectron2.checkpoint import DetectionCheckpointer
|
||||
from detectron2.config import CfgNode, get_cfg
|
||||
from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch
|
||||
from detectron2.evaluation import COCOEvaluator, DatasetEvaluators, verify_results
|
||||
from detectron2.modeling import DatasetMapperTTA
|
||||
from detectron2.utils.logger import setup_logger
|
||||
|
||||
from densepose import (
|
||||
DensePoseCOCOEvaluator,
|
||||
DensePoseGeneralizedRCNNWithTTA,
|
||||
add_dataset_category_config,
|
||||
add_densepose_config,
|
||||
load_from_cfg,
|
||||
)
|
||||
from densepose.data import DatasetMapper, build_detection_test_loader, build_detection_train_loader
|
||||
|
||||
|
||||
class Trainer(DefaultTrainer):
|
||||
@classmethod
|
||||
def build_evaluator(cls, cfg: CfgNode, dataset_name, output_folder=None):
|
||||
if output_folder is None:
|
||||
output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
|
||||
evaluators = [COCOEvaluator(dataset_name, cfg, True, output_folder)]
|
||||
if cfg.MODEL.DENSEPOSE_ON:
|
||||
evaluators.append(DensePoseCOCOEvaluator(dataset_name, True, output_folder))
|
||||
return DatasetEvaluators(evaluators)
|
||||
|
||||
@classmethod
|
||||
def build_test_loader(cls, cfg: CfgNode, dataset_name):
|
||||
return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))
|
||||
|
||||
@classmethod
|
||||
def build_train_loader(cls, cfg: CfgNode):
|
||||
return build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True))
|
||||
|
||||
@classmethod
|
||||
def test_with_TTA(cls, cfg: CfgNode, model):
|
||||
logger = logging.getLogger("detectron2.trainer")
|
||||
# In the end of training, run an evaluation with TTA
|
||||
# Only support some R-CNN models.
|
||||
logger.info("Running inference with test-time augmentation ...")
|
||||
transform_data = load_from_cfg(cfg)
|
||||
model = DensePoseGeneralizedRCNNWithTTA(cfg, model, transform_data, DatasetMapperTTA(cfg))
|
||||
evaluators = [
|
||||
cls.build_evaluator(
|
||||
cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
|
||||
)
|
||||
for name in cfg.DATASETS.TEST
|
||||
]
|
||||
res = cls.test(cfg, model, evaluators)
|
||||
res = OrderedDict({k + "_TTA": v for k, v in res.items()})
|
||||
return res
|
||||
|
||||
|
||||
def setup(args):
|
||||
cfg = get_cfg()
|
||||
add_dataset_category_config(cfg)
|
||||
add_densepose_config(cfg)
|
||||
cfg.merge_from_file(args.config_file)
|
||||
cfg.merge_from_list(args.opts)
|
||||
cfg.freeze()
|
||||
default_setup(cfg, args)
|
||||
# Setup logger for "densepose" module
|
||||
setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="densepose")
|
||||
return cfg
|
||||
|
||||
|
||||
def main(args):
|
||||
cfg = setup(args)
|
||||
# disable strict kwargs checking: allow one to specify path handle
|
||||
# hints through kwargs, like timeout in DP evaluation
|
||||
PathManager.set_strict_kwargs_checking(False)
|
||||
|
||||
if args.eval_only:
|
||||
model = Trainer.build_model(cfg)
|
||||
DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
|
||||
cfg.MODEL.WEIGHTS, resume=args.resume
|
||||
)
|
||||
res = Trainer.test(cfg, model)
|
||||
if cfg.TEST.AUG.ENABLED:
|
||||
res.update(Trainer.test_with_TTA(cfg, model))
|
||||
if comm.is_main_process():
|
||||
verify_results(cfg, res)
|
||||
return res
|
||||
|
||||
trainer = Trainer(cfg)
|
||||
trainer.resume_or_load(resume=args.resume)
|
||||
if cfg.TEST.AUG.ENABLED:
|
||||
trainer.register_hooks(
|
||||
[hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))]
|
||||
)
|
||||
return trainer.train()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = default_argument_parser().parse_args()
|
||||
print("Command Line Args:", args)
|
||||
launch(
|
||||
main,
|
||||
args.num_gpus,
|
||||
num_machines=args.num_machines,
|
||||
machine_rank=args.machine_rank,
|
||||
dist_url=args.dist_url,
|
||||
args=(args,),
|
||||
)
|
Reference in New Issue
Block a user