Add at new repo again

This commit is contained in:
2025-01-28 21:48:35 +00:00
commit 6e660ddb3c
564 changed files with 75575 additions and 0 deletions

View File

@@ -0,0 +1 @@
_build

View File

@@ -0,0 +1,19 @@
# Minimal makefile for Sphinx documentation
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

View File

@@ -0,0 +1,16 @@
# Read the docs:
The latest documentation built from this directory is available at [detectron2.readthedocs.io](https://detectron2.readthedocs.io/).
Documents in this directory are not meant to be read on github.
# Build the docs:
1. Install detectron2 according to [INSTALL.md](INSTALL.md).
2. Install additional libraries required to build docs:
- docutils==0.16
- Sphinx==3.0.0
- recommonmark==0.6.0
- sphinx_rtd_theme
- mock
3. Run `make html` from this directory.

View File

@@ -0,0 +1,335 @@
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# flake8: noqa
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
import mock
from sphinx.domains import Domain
from typing import Dict, List, Tuple
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
import sphinx_rtd_theme
class GithubURLDomain(Domain):
"""
Resolve certain links in markdown files to github source.
"""
name = "githuburl"
ROOT = "https://github.com/facebookresearch/detectron2/blob/master/"
LINKED_DOC = ["tutorials/install", "tutorials/getting_started"]
def resolve_any_xref(self, env, fromdocname, builder, target, node, contnode):
github_url = None
if not target.endswith("html") and target.startswith("../../"):
url = target.replace("../", "")
github_url = url
if fromdocname in self.LINKED_DOC:
# unresolved links in these docs are all github links
github_url = target
if github_url is not None:
if github_url.endswith("MODEL_ZOO") or github_url.endswith("README"):
# bug of recommonmark.
# https://github.com/readthedocs/recommonmark/blob/ddd56e7717e9745f11300059e4268e204138a6b1/recommonmark/parser.py#L152-L155
github_url += ".md"
print("Ref {} resolved to github:{}".format(target, github_url))
contnode["refuri"] = self.ROOT + github_url
return [("githuburl:any", contnode)]
else:
return []
# to support markdown
from recommonmark.parser import CommonMarkParser
sys.path.insert(0, os.path.abspath("../"))
os.environ["DOC_BUILDING"] = "True"
DEPLOY = os.environ.get("READTHEDOCS") == "True"
# -- Project information -----------------------------------------------------
# fmt: off
try:
import torch # noqa
except ImportError:
for m in [
"torch", "torchvision", "torch.nn", "torch.nn.parallel", "torch.distributed", "torch.multiprocessing", "torch.autograd",
"torch.autograd.function", "torch.nn.modules", "torch.nn.modules.utils", "torch.utils", "torch.utils.data", "torch.onnx",
"torchvision", "torchvision.ops",
]:
sys.modules[m] = mock.Mock(name=m)
sys.modules['torch'].__version__ = "1.5" # fake version
for m in [
"cv2", "scipy", "portalocker", "detectron2._C",
"pycocotools", "pycocotools.mask", "pycocotools.coco", "pycocotools.cocoeval",
"google", "google.protobuf", "google.protobuf.internal", "onnx",
"caffe2", "caffe2.proto", "caffe2.python", "caffe2.python.utils", "caffe2.python.onnx", "caffe2.python.onnx.backend",
]:
sys.modules[m] = mock.Mock(name=m)
# fmt: on
sys.modules["cv2"].__version__ = "3.4"
import detectron2 # isort: skip
project = "detectron2"
copyright = "2019-2020, detectron2 contributors"
author = "detectron2 contributors"
# The short X.Y version
version = detectron2.__version__
# The full version, including alpha/beta/rc tags
release = version
# -- General configuration ---------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
needs_sphinx = "3.0"
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"recommonmark",
"sphinx.ext.autodoc",
"sphinx.ext.napoleon",
"sphinx.ext.intersphinx",
"sphinx.ext.todo",
"sphinx.ext.coverage",
"sphinx.ext.mathjax",
"sphinx.ext.viewcode",
"sphinx.ext.githubpages",
]
# -- Configurations for plugins ------------
napoleon_google_docstring = True
napoleon_include_init_with_doc = True
napoleon_include_special_with_doc = True
napoleon_numpy_docstring = False
napoleon_use_rtype = False
autodoc_inherit_docstrings = False
autodoc_member_order = "bysource"
if DEPLOY:
intersphinx_timeout = 10
else:
# skip this when building locally
intersphinx_timeout = 0.1
intersphinx_mapping = {
"python": ("https://docs.python.org/3.6", None),
"numpy": ("https://docs.scipy.org/doc/numpy/", None),
"torch": ("https://pytorch.org/docs/master/", None),
}
# -------------------------
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
source_suffix = [".rst", ".md"]
# The master toctree document.
master_doc = "index"
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "build", "README.md", "tutorials/README.md"]
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"
# -- Options for HTML output -------------------------------------------------
html_theme = "sphinx_rtd_theme"
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself. Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = "detectron2doc"
# -- Options for LaTeX output ------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, "detectron2.tex", "detectron2 Documentation", "detectron2 contributors", "manual")
]
# -- Options for manual page output ------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(master_doc, "detectron2", "detectron2 Documentation", [author], 1)]
# -- Options for Texinfo output ----------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(
master_doc,
"detectron2",
"detectron2 Documentation",
author,
"detectron2",
"One line description of project.",
"Miscellaneous",
)
]
# -- Options for todo extension ----------------------------------------------
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = True
_DEPRECATED_NAMES = set()
def autodoc_skip_member(app, what, name, obj, skip, options):
# we hide something deliberately
if getattr(obj, "__HIDE_SPHINX_DOC__", False):
return True
# Hide some names that are deprecated or not intended to be used
if name in _DEPRECATED_NAMES:
return True
return None
_PAPER_DATA = {
"resnet": ("1512.03385", "Deep Residual Learning for Image Recognition"),
"fpn": ("1612.03144", "Feature Pyramid Networks for Object Detection"),
"mask r-cnn": ("1703.06870", "Mask R-CNN"),
"faster r-cnn": (
"1506.01497",
"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks",
),
"deformconv": ("1703.06211", "Deformable Convolutional Networks"),
"deformconv2": ("1811.11168", "Deformable ConvNets v2: More Deformable, Better Results"),
"panopticfpn": ("1901.02446", "Panoptic Feature Pyramid Networks"),
"retinanet": ("1708.02002", "Focal Loss for Dense Object Detection"),
"cascade r-cnn": ("1712.00726", "Cascade R-CNN: Delving into High Quality Object Detection"),
"lvis": ("1908.03195", "LVIS: A Dataset for Large Vocabulary Instance Segmentation"),
"rrpn": ("1703.01086", "Arbitrary-Oriented Scene Text Detection via Rotation Proposals"),
"in1k1h": ("1706.02677", "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour"),
}
def paper_ref_role(
typ: str,
rawtext: str,
text: str,
lineno: int,
inliner,
options: Dict = {},
content: List[str] = [],
):
"""
Parse :paper:`xxx`. Similar to the "extlinks" sphinx extension.
"""
from docutils import nodes, utils
from sphinx.util.nodes import split_explicit_title
text = utils.unescape(text)
has_explicit_title, title, link = split_explicit_title(text)
link = link.lower()
if link not in _PAPER_DATA:
inliner.reporter.warning("Cannot find paper " + link)
paper_url, paper_title = "#", link
else:
paper_url, paper_title = _PAPER_DATA[link]
if "/" not in paper_url:
paper_url = "https://arxiv.org/abs/" + paper_url
if not has_explicit_title:
title = paper_title
pnode = nodes.reference(title, title, internal=False, refuri=paper_url)
return [pnode], []
def setup(app):
from recommonmark.transform import AutoStructify
app.add_domain(GithubURLDomain)
app.connect("autodoc-skip-member", autodoc_skip_member)
app.add_role("paper", paper_ref_role)
app.add_config_value(
"recommonmark_config",
{"enable_math": True, "enable_inline_math": True, "enable_eval_rst": True},
True,
)
app.add_transform(AutoStructify)

View File

@@ -0,0 +1,14 @@
.. detectron2 documentation master file, created by
sphinx-quickstart on Sat Sep 21 13:46:45 2019.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to detectron2's documentation!
======================================
.. toctree::
:maxdepth: 2
tutorials/index
notes/index
modules/index

View File

@@ -0,0 +1,7 @@
detectron2.checkpoint package
=============================
.. automodule:: detectron2.checkpoint
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,17 @@
detectron2.config package
=========================
.. automodule:: detectron2.config
:members:
:undoc-members:
:show-inheritance:
:inherited-members:
Config References
-----------------
.. literalinclude:: ../../detectron2/config/defaults.py
:language: python
:linenos:
:lines: 4-

View File

@@ -0,0 +1,40 @@
detectron2.data package
=======================
.. automodule:: detectron2.data
:members:
:undoc-members:
:show-inheritance:
detectron2.data.detection\_utils module
---------------------------------------
.. automodule:: detectron2.data.detection_utils
:members:
:undoc-members:
:show-inheritance:
detectron2.data.datasets module
---------------------------------------
.. automodule:: detectron2.data.datasets
:members:
:undoc-members:
:show-inheritance:
detectron2.data.samplers module
---------------------------------------
.. automodule:: detectron2.data.samplers
:members:
:undoc-members:
:show-inheritance:
detectron2.data.transforms module
---------------------------------------
.. automodule:: detectron2.data.transforms
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,25 @@
detectron2.engine package
=========================
.. automodule:: detectron2.engine
:members:
:undoc-members:
:show-inheritance:
detectron2.engine.defaults module
---------------------------------
.. automodule:: detectron2.engine.defaults
:members:
:undoc-members:
:show-inheritance:
detectron2.engine.hooks module
---------------------------------
.. automodule:: detectron2.engine.hooks
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,7 @@
detectron2.evaluation package
=============================
.. automodule:: detectron2.evaluation
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,7 @@
detectron2.export package
=========================
.. automodule:: detectron2.export
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,17 @@
API Documentation
==================
.. toctree::
checkpoint
config
data
engine
evaluation
layers
model_zoo
modeling
solver
structures
utils
export

View File

@@ -0,0 +1,7 @@
detectron2.layers package
=========================
.. automodule:: detectron2.layers
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,7 @@
detectron2.model_zoo package
============================
.. automodule:: detectron2.model_zoo
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,58 @@
detectron2.modeling package
===========================
.. automodule:: detectron2.modeling
:members:
:undoc-members:
:show-inheritance:
detectron2.modeling.poolers module
---------------------------------------
.. automodule:: detectron2.modeling.poolers
:members:
:undoc-members:
:show-inheritance:
detectron2.modeling.sampling module
------------------------------------
.. automodule:: detectron2.modeling.sampling
:members:
:undoc-members:
:show-inheritance:
detectron2.modeling.box_regression module
------------------------------------------
.. automodule:: detectron2.modeling.box_regression
:members:
:undoc-members:
:show-inheritance:
Model Registries
-----------------
These are different registries provided in modeling.
Each registry provide you the ability to replace it with your customized component,
without having to modify detectron2's code.
Note that it is impossible to allow users to customize any line of code directly.
Even just to add one line at some place,
you'll likely need to find out the smallest registry which contains that line,
and register your component to that registry.
.. autodata:: detectron2.modeling.META_ARCH_REGISTRY
.. autodata:: detectron2.modeling.BACKBONE_REGISTRY
.. autodata:: detectron2.modeling.PROPOSAL_GENERATOR_REGISTRY
.. autodata:: detectron2.modeling.RPN_HEAD_REGISTRY
.. autodata:: detectron2.modeling.ANCHOR_GENERATOR_REGISTRY
.. autodata:: detectron2.modeling.ROI_HEADS_REGISTRY
.. autodata:: detectron2.modeling.ROI_BOX_HEAD_REGISTRY
.. autodata:: detectron2.modeling.ROI_MASK_HEAD_REGISTRY
.. autodata:: detectron2.modeling.ROI_KEYPOINT_HEAD_REGISTRY

View File

@@ -0,0 +1,7 @@
detectron2.solver package
=========================
.. automodule:: detectron2.solver
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,7 @@
detectron2.structures package
=============================
.. automodule:: detectron2.structures
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,80 @@
detectron2.utils package
========================
detectron2.utils.colormap module
--------------------------------
.. automodule:: detectron2.utils.colormap
:members:
:undoc-members:
:show-inheritance:
detectron2.utils.comm module
----------------------------
.. automodule:: detectron2.utils.comm
:members:
:undoc-members:
:show-inheritance:
detectron2.utils.events module
------------------------------
.. automodule:: detectron2.utils.events
:members:
:undoc-members:
:show-inheritance:
detectron2.utils.logger module
------------------------------
.. automodule:: detectron2.utils.logger
:members:
:undoc-members:
:show-inheritance:
detectron2.utils.registry module
--------------------------------
.. automodule:: detectron2.utils.registry
:members:
:undoc-members:
:show-inheritance:
detectron2.utils.memory module
----------------------------------
.. automodule:: detectron2.utils.memory
:members:
:undoc-members:
:show-inheritance:
detectron2.utils.analysis module
----------------------------------
.. automodule:: detectron2.utils.analysis
:members:
:undoc-members:
:show-inheritance:
detectron2.utils.visualizer module
----------------------------------
.. automodule:: detectron2.utils.visualizer
:members:
:undoc-members:
:show-inheritance:
detectron2.utils.video\_visualizer module
-----------------------------------------
.. automodule:: detectron2.utils.video_visualizer
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,196 @@
# Benchmarks
Here we benchmark the training speed of a Mask R-CNN in detectron2,
with some other popular open source Mask R-CNN implementations.
### Settings
* Hardware: 8 NVIDIA V100s with NVLink.
* Software: Python 3.7, CUDA 10.1, cuDNN 7.6.5, PyTorch 1.5,
TensorFlow 1.15.0rc2, Keras 2.2.5, MxNet 1.6.0b20190820.
* Model: an end-to-end R-50-FPN Mask-RCNN model, using the same hyperparameter as the
[Detectron baseline config](https://github.com/facebookresearch/Detectron/blob/master/configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml)
(it does no have scale augmentation).
* Metrics: We use the average throughput in iterations 100-500 to skip GPU warmup time.
Note that for R-CNN-style models, the throughput of a model typically changes during training, because
it depends on the predictions of the model. Therefore this metric is not directly comparable with
"train speed" in model zoo, which is the average speed of the entire training run.
### Main Results
```eval_rst
+-------------------------------+--------------------+
| Implementation | Throughput (img/s) |
+===============================+====================+
| |D2| |PT| | 62 |
+-------------------------------+--------------------+
| mmdetection_ |PT| | 53 |
+-------------------------------+--------------------+
| maskrcnn-benchmark_ |PT| | 53 |
+-------------------------------+--------------------+
| tensorpack_ |TF| | 50 |
+-------------------------------+--------------------+
| simpledet_ |mxnet| | 39 |
+-------------------------------+--------------------+
| Detectron_ |C2| | 19 |
+-------------------------------+--------------------+
| `matterport/Mask_RCNN`__ |TF| | 14 |
+-------------------------------+--------------------+
.. _maskrcnn-benchmark: https://github.com/facebookresearch/maskrcnn-benchmark/
.. _tensorpack: https://github.com/tensorpack/tensorpack/tree/master/examples/FasterRCNN
.. _mmdetection: https://github.com/open-mmlab/mmdetection/
.. _simpledet: https://github.com/TuSimple/simpledet/
.. _Detectron: https://github.com/facebookresearch/Detectron
__ https://github.com/matterport/Mask_RCNN/
.. |D2| image:: https://github.com/facebookresearch/detectron2/raw/master/.github/Detectron2-Logo-Horz.svg?sanitize=true
:height: 15pt
:target: https://github.com/facebookresearch/detectron2/
.. |PT| image:: https://pytorch.org/assets/images/logo-icon.svg
:width: 15pt
:height: 15pt
:target: https://pytorch.org
.. |TF| image:: https://static.nvidiagrid.net/ngc/containers/tensorflow.png
:width: 15pt
:height: 15pt
:target: https://tensorflow.org
.. |mxnet| image:: https://github.com/dmlc/web-data/raw/master/mxnet/image/mxnet_favicon.png
:width: 15pt
:height: 15pt
:target: https://mxnet.apache.org/
.. |C2| image:: https://caffe2.ai/static/logo.svg
:width: 15pt
:height: 15pt
:target: https://caffe2.ai
```
Details for each implementation:
* __Detectron2__: with release v0.1.2, run:
```
python tools/train_net.py --config-file configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml --num-gpus 8
```
* __mmdetection__: at commit `b0d845f`, run
```
./tools/dist_train.sh configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py 8
```
* __maskrcnn-benchmark__: use commit `0ce8f6f` with `sed -i s/torch.uint8/torch.bool/g **/*.py; sed -i 's/AT_CHECK/TORCH_CHECK/g' **/*.cu`
to make it compatible with PyTorch 1.5. Then, run training with
```
python -m torch.distributed.launch --nproc_per_node=8 tools/train_net.py --config-file configs/e2e_mask_rcnn_R_50_FPN_1x.yaml
```
The speed we observed is faster than its model zoo, likely due to different software versions.
* __tensorpack__: at commit `caafda`, `export TF_CUDNN_USE_AUTOTUNE=0`, then run
```
mpirun -np 8 ./train.py --config DATA.BASEDIR=/data/coco TRAINER=horovod BACKBONE.STRIDE_1X1=True TRAIN.STEPS_PER_EPOCH=50 --load ImageNet-R50-AlignPadding.npz
```
* __SimpleDet__: at commit `9187a1`, run
```
python detection_train.py --config config/mask_r50v1_fpn_1x.py
```
* __Detectron__: run
```
python tools/train_net.py --cfg configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml
```
Note that many of its ops run on CPUs, therefore the performance is limited.
* __matterport/Mask_RCNN__: at commit `3deaec`, apply the following diff, `export TF_CUDNN_USE_AUTOTUNE=0`, then run
```
python coco.py train --dataset=/data/coco/ --model=imagenet
```
Note that many small details in this implementation might be different
from Detectron's standards.
<details>
<summary>
(diff to make it use the same hyperparameters - click to expand)
</summary>
```diff
diff --git i/mrcnn/model.py w/mrcnn/model.py
index 62cb2b0..61d7779 100644
--- i/mrcnn/model.py
+++ w/mrcnn/model.py
@@ -2367,8 +2367,8 @@ class MaskRCNN():
epochs=epochs,
steps_per_epoch=self.config.STEPS_PER_EPOCH,
callbacks=callbacks,
- validation_data=val_generator,
- validation_steps=self.config.VALIDATION_STEPS,
+ #validation_data=val_generator,
+ #validation_steps=self.config.VALIDATION_STEPS,
max_queue_size=100,
workers=workers,
use_multiprocessing=True,
diff --git i/mrcnn/parallel_model.py w/mrcnn/parallel_model.py
index d2bf53b..060172a 100644
--- i/mrcnn/parallel_model.py
+++ w/mrcnn/parallel_model.py
@@ -32,6 +32,7 @@ class ParallelModel(KM.Model):
keras_model: The Keras model to parallelize
gpu_count: Number of GPUs. Must be > 1
"""
+ super().__init__()
self.inner_model = keras_model
self.gpu_count = gpu_count
merged_outputs = self.make_parallel()
diff --git i/samples/coco/coco.py w/samples/coco/coco.py
index 5d172b5..239ed75 100644
--- i/samples/coco/coco.py
+++ w/samples/coco/coco.py
@@ -81,7 +81,10 @@ class CocoConfig(Config):
IMAGES_PER_GPU = 2
# Uncomment to train on 8 GPUs (default is 1)
- # GPU_COUNT = 8
+ GPU_COUNT = 8
+ BACKBONE = "resnet50"
+ STEPS_PER_EPOCH = 50
+ TRAIN_ROIS_PER_IMAGE = 512
# Number of classes (including background)
NUM_CLASSES = 1 + 80 # COCO has 80 classes
@@ -496,29 +499,10 @@ if __name__ == '__main__':
# *** This training schedule is an example. Update to your needs ***
# Training - Stage 1
- print("Training network heads")
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE,
epochs=40,
- layers='heads',
- augmentation=augmentation)
-
- # Training - Stage 2
- # Finetune layers from ResNet stage 4 and up
- print("Fine tune Resnet stage 4 and up")
- model.train(dataset_train, dataset_val,
- learning_rate=config.LEARNING_RATE,
- epochs=120,
- layers='4+',
- augmentation=augmentation)
-
- # Training - Stage 3
- # Fine tune all layers
- print("Fine tune all layers")
- model.train(dataset_train, dataset_val,
- learning_rate=config.LEARNING_RATE / 10,
- epochs=160,
- layers='all',
+ layers='3+',
augmentation=augmentation)
elif args.command == "evaluate":
```
</details>

View File

@@ -0,0 +1,26 @@
# Change Log
### Releases
See release log at
[https://github.com/facebookresearch/detectron2/releases](https://github.com/facebookresearch/detectron2/releases).
### Notable Backward Incompatible Changes:
* 03/30/2020: Custom box head's `output_size` changed to `output_shape`.
* 02/14/2020,02/18/2020: Mask head and keypoint head now include logic for losses & inference. Custom heads
should overwrite the feature computation by `layers()` method.
* 11/11/2019: `detectron2.data.detection_utils.read_image` transposes images with exif information.
### Config Version Change Log
* v1: Rename `RPN_HEAD.NAME` to `RPN.HEAD_NAME`.
* v2: A batch of rename of many configurations before release.
### Silent Regression in Historical Versions:
We list a few silent regressions since they may silently produce incorrect results and will be hard to debug.
* 04/01/2020 - 05/11/2020: Bad accuracy if `TRAIN_ON_PRED_BOXES` is set to True.
* 03/30/2020 - 04/01/2020: ResNets are not correctly built.
* 12/19/2019 - 12/26/2019: Using aspect ratio grouping causes a drop in accuracy.
* release - 11/9/2019: Test time augmentation does not predict the last category.

View File

@@ -0,0 +1,83 @@
# Compatibility with Other Libraries
## Compatibility with Detectron (and maskrcnn-benchmark)
Detectron2 addresses some legacy issues left in Detectron. As a result, their models
are not compatible:
running inference with the same model weights will produce different results in the two code bases.
The major differences regarding inference are:
- The height and width of a box with corners (x1, y1) and (x2, y2) is now computed more naturally as
width = x2 - x1 and height = y2 - y1;
In Detectron, a "+ 1" was added both height and width.
Note that the relevant ops in Caffe2 have [adopted this change of convention](https://github.com/pytorch/pytorch/pull/20550)
with an extra option.
So it is still possible to run inference with a Detectron2-trained model in Caffe2.
The change in height/width calculations most notably changes:
- encoding/decoding in bounding box regression.
- non-maximum suppression. The effect here is very negligible, though.
- RPN now uses simpler anchors with fewer quantization artifacts.
In Detectron, the anchors were quantized and
[do not have accurate areas](https://github.com/facebookresearch/Detectron/issues/227).
In Detectron2, the anchors are center-aligned to feature grid points and not quantized.
- Classification layers have a different ordering of class labels.
This involves any trainable parameter with shape (..., num_categories + 1, ...).
In Detectron2, integer labels [0, K-1] correspond to the K = num_categories object categories
and the label "K" corresponds to the special "background" category.
In Detectron, label "0" means background, and labels [1, K] correspond to the K categories.
- ROIAlign is implemented differently. The new implementation is [available in Caffe2](https://github.com/pytorch/pytorch/pull/23706).
1. All the ROIs are shifted by half a pixel compared to Detectron in order to create better image-feature-map alignment.
See `layers/roi_align.py` for details.
To enable the old behavior, use `ROIAlign(aligned=False)`, or `POOLER_TYPE=ROIAlign` instead of
`ROIAlignV2` (the default).
1. The ROIs are not required to have a minimum size of 1.
This will lead to tiny differences in the output, but should be negligible.
- Mask inference function is different.
In Detectron2, the "paste_mask" function is different and should be more accurate than in Detectron. This change
can improve mask AP on COCO by ~0.5% absolute.
There are some other differences in training as well, but they won't affect
model-level compatibility. The major ones are:
- We fixed a [bug](https://github.com/facebookresearch/Detectron/issues/459) in
Detectron, by making `RPN.POST_NMS_TOPK_TRAIN` per-image, rather than per-batch.
The fix may lead to a small accuracy drop for a few models (e.g. keypoint
detection) and will require some parameter tuning to match the Detectron results.
- For simplicity, we change the default loss in bounding box regression to L1 loss, instead of smooth L1 loss.
We have observed that this tends to slightly decrease box AP50 while improving box AP for higher
overlap thresholds (and leading to a slight overall improvement in box AP).
- We interpret the coordinates in COCO bounding box and segmentation annotations
as coordinates in range `[0, width]` or `[0, height]`. The coordinates in
COCO keypoint annotations are interpreted as pixel indices in range `[0, width - 1]` or `[0, height - 1]`.
Note that this affects how flip augmentation is implemented.
We will later share more details and rationale behind the above mentioned issues
about pixels, coordinates, and "+1"s.
## Compatibility with Caffe2
As mentioned above, despite the incompatibilities with Detectron, the relevant
ops have been implemented in Caffe2.
Therefore, models trained with detectron2 can be converted in Caffe2.
See [Deployment](../tutorials/deployment.md) for the tutorial.
## Compatibility with TensorFlow
Most ops are available in TensorFlow, although some tiny differences in
the implementation of resize / ROIAlign / padding need to be addressed.
A working conversion script is provided by [tensorpack FasterRCNN](https://github.com/tensorpack/tensorpack/tree/master/examples/FasterRCNN/convert_d2)
to run a standard detectron2 model in TensorFlow.

View File

@@ -0,0 +1,49 @@
# Contributing to detectron2
## Issues
We use GitHub issues to track public bugs and questions.
Please make sure to follow one of the
[issue templates](https://github.com/facebookresearch/detectron2/issues/new/choose)
when reporting any issues.
Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
disclosure of security bugs. In those cases, please go through the process
outlined on that page and do not file a public issue.
## Pull Requests
We actively welcome your pull requests.
However, if you're adding any significant features (e.g. > 50 lines), please
make sure to have a corresponding issue to discuss your motivation and proposals,
before sending a PR. We do not always accept new features, and we take the following
factors into consideration:
1. Whether the same feature can be achieved without modifying detectron2.
Detectron2 is designed so that you can implement many extensions from the outside, e.g.
those in [projects](https://github.com/facebookresearch/detectron2/tree/master/projects).
If some part is not as extensible, you can also bring up the issue to make it more extensible.
2. Whether the feature is potentially useful to a large audience, or only to a small portion of users.
3. Whether the proposed solution has a good design / interface.
4. Whether the proposed solution adds extra mental/practical overhead to users who don't
need such feature.
5. Whether the proposed solution breaks existing APIs.
When sending a PR, please do:
1. If a PR contains multiple orthogonal changes, split it to several PRs.
2. If you've added code that should be tested, add tests.
3. For PRs that need experiments (e.g. adding a new model or new methods),
you don't need to update model zoo, but do provide experiment results in the description of the PR.
4. If APIs are changed, update the documentation.
5. Make sure your code lints with `./dev/linter.sh`.
## Contributor License Agreement ("CLA")
In order to accept your pull request, we need you to submit a CLA. You only need
to do this once to work on any of Facebook's open source projects.
Complete your CLA here: <https://code.facebook.com/cla>
## License
By contributing to detectron2, you agree that your contributions will be licensed
under the LICENSE file in the root directory of this source tree.

View File

@@ -0,0 +1,10 @@
Notes
======================================
.. toctree::
:maxdepth: 2
benchmarks
compatibility
contributing
changelog

View File

@@ -0,0 +1,4 @@
# Read the docs:
The latest documentation built from this directory is available at [detectron2.readthedocs.io](https://detectron2.readthedocs.io/).
Documents in this directory are not meant to be read on github.

View File

@@ -0,0 +1,99 @@
# Setup Builtin Datasets
Detectron2 has builtin support for a few datasets.
The datasets are assumed to exist in a directory specified by the environment variable
`DETECTRON2_DATASETS`.
Under this directory, detectron2 expects to find datasets in the structure described below.
You can set the location for builtin datasets by `export DETECTRON2_DATASETS=/path/to/datasets`.
If left unset, the default is `./datasets` relative to your current working directory.
The [model zoo](https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md)
contains configs and models that use these builtin datasets.
## Expected dataset structure for COCO instance/keypoint detection:
```
coco/
annotations/
instances_{train,val}2017.json
person_keypoints_{train,val}2017.json
{train,val}2017/
# image files that are mentioned in the corresponding json
```
You can use the 2014 version of the dataset as well.
Some of the builtin tests (`dev/run_*_tests.sh`) uses a tiny version of the COCO dataset,
which you can download with `./prepare_for_tests.sh`.
## Expected dataset structure for PanopticFPN:
```
coco/
annotations/
panoptic_{train,val}2017.json
panoptic_{train,val}2017/ # png annotations
panoptic_stuff_{train,val}2017/ # generated by the script mentioned below
```
Install panopticapi by:
```
pip install git+https://github.com/cocodataset/panopticapi.git
```
Then, run `python prepare_panoptic_fpn.py`, to extract semantic annotations from panoptic annotations.
## Expected dataset structure for LVIS instance segmentation:
```
coco/
{train,val,test}2017/
lvis/
lvis_v0.5_{train,val}.json
lvis_v0.5_image_info_test.json
```
Install lvis-api by:
```
pip install git+https://github.com/lvis-dataset/lvis-api.git
```
Run `python prepare_cocofied_lvis.py` to prepare "cocofied" LVIS annotations for evaluation of models trained on the COCO dataset.
## Expected dataset structure for cityscapes:
```
cityscapes/
gtFine/
train/
aachen/
color.png, instanceIds.png, labelIds.png, polygons.json,
labelTrainIds.png
...
val/
test/
leftImg8bit/
train/
val/
test/
```
Install cityscapes scripts by:
```
pip install git+https://github.com/mcordts/cityscapesScripts.git
```
Note: labelTrainIds.png are created using cityscapesescript with:
```
CITYSCAPES_DATASET=$DETECTRON2_DATASETS/cityscapes python cityscapesscripts/preparation/createTrainIdLabelImgs.py
```
They are not needed for instance segmentation.
## Expected dataset structure for Pascal VOC:
```
VOC20{07,12}/
Annotations/
ImageSets/
Main/
trainval.txt
test.txt
# train.txt or val.txt, if you use these splits
JPEGImages/
```

View File

@@ -0,0 +1,58 @@
# Configs
Detectron2 provides a key-value based config system that can be
used to obtain standard, common behaviors.
Detectron2's config system uses YAML and [yacs](https://github.com/rbgirshick/yacs).
In addition to the [basic operations](../modules/config.html#detectron2.config.CfgNode)
that access and update a config, we provide the following extra functionalities:
1. The config can have `_BASE_: base.yaml` field, which will load a base config first.
Values in the base config will be overwritten in sub-configs, if there are any conflicts.
We provided several base configs for standard model architectures.
2. We provide config versioning, for backward compatibility.
If your config file is versioned with a config line like `VERSION: 2`,
detectron2 will still recognize it even if we change some keys in the future.
"Config" is a very limited abstraction.
We do not expect all features in detectron2 to be available through configs.
If you need something that's not available in the config space,
please write code using detectron2's API.
### Basic Usage
Some basic usage of the `CfgNode` object is shown here. See more in [documentation](../modules/config.html#detectron2.config.CfgNode).
```python
from detectron2.config import get_cfg
cfg = get_cfg() # obtain detectron2's default config
cfg.xxx = yyy # add new configs for your own custom components
cfg.merge_from_file("my_cfg.yaml") # load values from a file
cfg.merge_from_list(["MODEL.WEIGHTS", "weights.pth"]) # can also load values from a list of str
print(cfg.dump()) # print formatted configs
```
Many builtin tools in detectron2 accepts command line config overwrite:
Key-value pairs provided in the command line will overwrite the existing values in the config file.
For example, [demo.py](../../demo/demo.py) can be used with
```
./demo.py --config-file config.yaml [--other-options] \
--opts MODEL.WEIGHTS /path/to/weights INPUT.MIN_SIZE_TEST 1000
```
To see a list of available configs in detectron2 and what they mean,
check [Config References](../modules/config.html#config-references)
### Best Practice with Configs
1. Treat the configs you write as "code": avoid copying them or duplicating them; use `_BASE_`
to share common parts between configs.
2. Keep the configs you write simple: don't include keys that do not affect the experimental setting.
3. Keep a version number in your configs (or the base config), e.g., `VERSION: 2`,
for backward compatibility.
We print a warning when reading a config without version number.
The official configs do not include version number because they are meant to
be always up-to-date.

View File

@@ -0,0 +1,77 @@
# Use Custom Dataloaders
## How the Existing Dataloader Works
Detectron2 contains a builtin data loading pipeline.
It's good to understand how it works, in case you need to write a custom one.
Detectron2 provides two functions
[build_detection_{train,test}_loader](../modules/data.html#detectron2.data.build_detection_train_loader)
that create a default data loader from a given config.
Here is how `build_detection_{train,test}_loader` work:
1. It takes the name of a registered dataset (e.g., "coco_2017_train") and loads a `list[dict]` representing the dataset items
in a lightweight, canonical format. These dataset items are not yet ready to be used by the model (e.g., images are
not loaded into memory, random augmentations have not been applied, etc.).
Details about the dataset format and dataset registration can be found in
[datasets](./datasets.md).
2. Each dict in this list is mapped by a function ("mapper"):
* Users can customize this mapping function by specifying the "mapper" argument in
`build_detection_{train,test}_loader`. The default mapper is [DatasetMapper](../modules/data.html#detectron2.data.DatasetMapper).
* The output format of such function can be arbitrary, as long as it is accepted by the consumer of this data loader (usually the model).
The outputs of the default mapper, after batching, follow the default model input format documented in
[Use Models](./models.html#model-input-format).
* The role of the mapper is to transform the lightweight, canonical representation of a dataset item into a format
that is ready for the model to consume (including, e.g., read images, perform random data augmentation and convert to torch Tensors).
If you would like to perform custom transformations to data, you often want a custom mapper.
3. The outputs of the mapper are batched (simply into a list).
4. This batched data is the output of the data loader. Typically, it's also the input of
`model.forward()`.
## Write a Custom Dataloader
Using a different "mapper" with `build_detection_{train,test}_loader(mapper=)` works for most use cases
of custom data loading.
For example, if you want to resize all images to a fixed size for Mask R-CNN training, write this:
```python
from detectron2.data import build_detection_train_loader
from detectron2.data import transforms as T
from detectron2.data import detection_utils as utils
def mapper(dataset_dict):
# Implement a mapper, similar to the default DatasetMapper, but with your own customizations
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
image = utils.read_image(dataset_dict["file_name"], format="BGR")
image, transforms = T.apply_transform_gens([T.Resize((800, 800))], image)
dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
annos = [
utils.transform_instance_annotations(obj, transforms, image.shape[:2])
for obj in dataset_dict.pop("annotations")
if obj.get("iscrowd", 0) == 0
]
instances = utils.annotations_to_instances(annos, image.shape[:2])
dataset_dict["instances"] = utils.filter_empty_instances(instances)
return dataset_dict
data_loader = build_detection_train_loader(cfg, mapper=mapper)
# use this dataloader instead of the default
```
Refer to [API documentation of detectron2.data](../modules/data) for details.
If you want to change not only the mapper (e.g., to write different sampling or batching logic),
you can write your own data loader. The data loader is simply a
python iterator that produces [the format](./models.md) your model accepts.
You can implement it using any tools you like.
## Use a Custom Dataloader
If you use [DefaultTrainer](../modules/engine.html#detectron2.engine.defaults.DefaultTrainer),
you can overwrite its `build_{train,test}_loader` method to use your own dataloader.
See the [densepose dataloader](../../projects/DensePose/train_net.py)
for an example.
If you write your own training loop, you can plug in your data loader easily.

View File

@@ -0,0 +1,221 @@
# Use Custom Datasets
Datasets that have builtin support in detectron2 are listed in [datasets](../../datasets).
If you want to use a custom dataset while also reusing detectron2's data loaders,
you will need to
1. __Register__ your dataset (i.e., tell detectron2 how to obtain your dataset).
2. Optionally, __register metadata__ for your dataset.
Next, we explain the above two concepts in detail.
The [Colab tutorial](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
has a live example of how to register and train on a dataset of custom formats.
### Register a Dataset
To let detectron2 know how to obtain a dataset named "my_dataset", you will implement
a function that returns the items in your dataset and then tell detectron2 about this
function:
```python
def my_dataset_function():
...
return list[dict] in the following format
from detectron2.data import DatasetCatalog
DatasetCatalog.register("my_dataset", my_dataset_function)
```
Here, the snippet associates a dataset "my_dataset" with a function that returns the data.
The registration stays effective until the process exists.
The function can processes data from its original format into either one of the following:
1. Detectron2's standard dataset dict, described below. This will work with many other builtin
features in detectron2, so it's recommended to use it when it's sufficient for your task.
2. Your custom dataset dict. You can also return arbitrary dicts in your own format,
such as adding extra keys for new tasks.
Then you will need to handle them properly downstream as well.
See below for more details.
#### Standard Dataset Dicts
For standard tasks
(instance detection, instance/semantic/panoptic segmentation, keypoint detection),
we load the original dataset into `list[dict]` with a specification similar to COCO's json annotations.
This is our standard representation for a dataset.
Each dict contains information about one image.
The dict may have the following fields,
and the required fields vary based on what the dataloader or the task needs (see more below).
+ `file_name`: the full path to the image file. Will apply rotation and flipping if the image has such exif information.
+ `height`, `width`: integer. The shape of image.
+ `image_id` (str or int): a unique id that identifies this image. Used
during evaluation to identify the images, but a dataset may use it for different purposes.
+ `annotations` (list[dict]): each dict corresponds to annotations of one instance
in this image. Required by instance detection/segmentation or keypoint detection tasks.
Images with empty `annotations` will by default be removed from training,
but can be included using `DATALOADER.FILTER_EMPTY_ANNOTATIONS`.
Each dict contains the following keys, of which `bbox`,`bbox_mode` and `category_id` are required:
+ `bbox` (list[float]): list of 4 numbers representing the bounding box of the instance.
+ `bbox_mode` (int): the format of bbox.
It must be a member of
[structures.BoxMode](../modules/structures.html#detectron2.structures.BoxMode).
Currently supports: `BoxMode.XYXY_ABS`, `BoxMode.XYWH_ABS`.
+ `category_id` (int): an integer in the range [0, num_categories) representing the category label.
The value num_categories is reserved to represent the "background" category, if applicable.
+ `segmentation` (list[list[float]] or dict): the segmentation mask of the instance.
+ If `list[list[float]]`, it represents a list of polygons, one for each connected component
of the object. Each `list[float]` is one simple polygon in the format of `[x1, y1, ..., xn, yn]`.
The Xs and Ys are either relative coordinates in [0, 1], or absolute coordinates,
depend on whether "bbox_mode" is relative.
+ If `dict`, it represents the per-pixel segmentation mask in COCO's RLE format. The dict should have
keys "size" and "counts". You can convert a uint8 segmentation mask of 0s and 1s into
RLE format by `pycocotools.mask.encode(np.asarray(mask, order="F"))`.
+ `keypoints` (list[float]): in the format of [x1, y1, v1,..., xn, yn, vn].
v[i] means the [visibility](http://cocodataset.org/#format-data) of this keypoint.
`n` must be equal to the number of keypoint categories.
The Xs and Ys are either relative coordinates in [0, 1], or absolute coordinates,
depend on whether "bbox_mode" is relative.
Note that the coordinate annotations in COCO format are integers in range [0, H-1 or W-1].
By default, detectron2 adds 0.5 to absolute keypoint coordinates to convert them from discrete
pixel indices to floating point coordinates.
+ `iscrowd`: 0 (default) or 1. Whether this instance is labeled as COCO's "crowd
region". Don't include this field if you don't know what it means.
+ `sem_seg_file_name`: the full path to the ground truth semantic segmentation file.
Required by semantic segmentation task.
It should be an image whose pixel values are integer labels.
Fast R-CNN (with precomputed proposals) is rarely used today.
To train a Fast R-CNN, the following extra keys are needed:
+ `proposal_boxes` (array): 2D numpy array with shape (K, 4) representing K precomputed proposal boxes for this image.
+ `proposal_objectness_logits` (array): numpy array with shape (K, ), which corresponds to the objectness
logits of proposals in 'proposal_boxes'.
+ `proposal_bbox_mode` (int): the format of the precomputed proposal bbox.
It must be a member of
[structures.BoxMode](../modules/structures.html#detectron2.structures.BoxMode).
Default is `BoxMode.XYXY_ABS`.
#### Custom Dataset Dicts for New Tasks
In the `list[dict]` that your dataset function returns, the dictionary can also have arbitrary custom data.
This will be useful for a new task that needs extra information not supported
by the standard dataset dicts. In this case, you need to make sure the downstream code can handle your data
correctly. Usually this requires writing a new `mapper` for the dataloader (see [Use Custom Dataloaders](./data_loading.md)).
When designing a custom format, note that all dicts are stored in memory
(sometimes serialized and with multiple copies).
To save memory, each dict is meant to contain small but sufficient information
about each sample, such as file names and annotations.
Loading full samples typically happens in the data loader.
For attributes shared among the entire dataset, use `Metadata` (see below).
To avoid extra memory, do not save such information repeatly for each sample.
### "Metadata" for Datasets
Each dataset is associated with some metadata, accessible through
`MetadataCatalog.get(dataset_name).some_metadata`.
Metadata is a key-value mapping that contains information that's shared among
the entire dataset, and usually is used to interpret what's in the dataset, e.g.,
names of classes, colors of classes, root of files, etc.
This information will be useful for augmentation, evaluation, visualization, logging, etc.
The structure of metadata depends on the what is needed from the corresponding downstream code.
If you register a new dataset through `DatasetCatalog.register`,
you may also want to add its corresponding metadata through
`MetadataCatalog.get(dataset_name).some_key = some_value`, to enable any features that need the metadata.
You can do it like this (using the metadata key "thing_classes" as an example):
```python
from detectron2.data import MetadataCatalog
MetadataCatalog.get("my_dataset").thing_classes = ["person", "dog"]
```
Here is a list of metadata keys that are used by builtin features in detectron2.
If you add your own dataset without these metadata, some features may be
unavailable to you:
* `thing_classes` (list[str]): Used by all instance detection/segmentation tasks.
A list of names for each instance/thing category.
If you load a COCO format dataset, it will be automatically set by the function `load_coco_json`.
* `thing_colors` (list[tuple(r, g, b)]): Pre-defined color (in [0, 255]) for each thing category.
Used for visualization. If not given, random colors are used.
* `stuff_classes` (list[str]): Used by semantic and panoptic segmentation tasks.
A list of names for each stuff category.
* `stuff_colors` (list[tuple(r, g, b)]): Pre-defined color (in [0, 255]) for each stuff category.
Used for visualization. If not given, random colors are used.
* `keypoint_names` (list[str]): Used by keypoint localization. A list of names for each keypoint.
* `keypoint_flip_map` (list[tuple[str]]): Used by the keypoint localization task. A list of pairs of names,
where each pair are the two keypoints that should be flipped if the image is
flipped horizontally during augmentation.
* `keypoint_connection_rules`: list[tuple(str, str, (r, g, b))]. Each tuple specifies a pair of keypoints
that are connected and the color to use for the line between them when visualized.
Some additional metadata that are specific to the evaluation of certain datasets (e.g. COCO):
* `thing_dataset_id_to_contiguous_id` (dict[int->int]): Used by all instance detection/segmentation tasks in the COCO format.
A mapping from instance class ids in the dataset to contiguous ids in range [0, #class).
Will be automatically set by the function `load_coco_json`.
* `stuff_dataset_id_to_contiguous_id` (dict[int->int]): Used when generating prediction json files for
semantic/panoptic segmentation.
A mapping from semantic segmentation class ids in the dataset
to contiguous ids in [0, num_categories). It is useful for evaluation only.
* `json_file`: The COCO annotation json file. Used by COCO evaluation for COCO-format datasets.
* `panoptic_root`, `panoptic_json`: Used by panoptic evaluation.
* `evaluator_type`: Used by the builtin main training script to select
evaluator. Don't use it in a new training script.
You can just provide the [DatasetEvaluator](../modules/evaluation.html#detectron2.evaluation.DatasetEvaluator)
for your dataset directly in your main script.
NOTE: For background on the concept of "thing" and "stuff", see
[On Seeing Stuff: The Perception of Materials by Humans and Machines](http://persci.mit.edu/pub_pdfs/adelson_spie_01.pdf).
In detectron2, the term "thing" is used for instance-level tasks,
and "stuff" is used for semantic segmentation tasks.
Both are used in panoptic segmentation.
### Register a COCO Format Dataset
If your dataset is already a json file in the COCO format,
the dataset and its associated metadata can be registered easily with:
```python
from detectron2.data.datasets import register_coco_instances
register_coco_instances("my_dataset", {}, "json_annotation.json", "path/to/image/dir")
```
If your dataset is in COCO format but with extra custom per-instance annotations,
the [load_coco_json](../modules/data.html#detectron2.data.datasets.load_coco_json)
function might be useful.
### Update the Config for New Datasets
Once you've registered the dataset, you can use the name of the dataset (e.g., "my_dataset" in
example above) in `cfg.DATASETS.{TRAIN,TEST}`.
There are other configs you might want to change to train or evaluate on new datasets:
* `MODEL.ROI_HEADS.NUM_CLASSES` and `MODEL.RETINANET.NUM_CLASSES` are the number of thing classes
for R-CNN and RetinaNet models, respectively.
* `MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS` sets the number of keypoints for Keypoint R-CNN.
You'll also need to set [Keypoint OKS](http://cocodataset.org/#keypoints-eval)
with `TEST.KEYPOINT_OKS_SIGMAS` for evaluation.
* `MODEL.SEM_SEG_HEAD.NUM_CLASSES` sets the number of stuff classes for Semantic FPN & Panoptic FPN.
* If you're training Fast R-CNN (with precomputed proposals), `DATASETS.PROPOSAL_FILES_{TRAIN,TEST}`
need to match the datasets. The format of proposal files are documented
[here](../modules/data.html#detectron2.data.load_proposals_into_dataset).
New models
(e.g. [TensorMask](../../projects/TensorMask),
[PointRend](../../projects/PointRend))
often have similar configs of their own that need to be changed as well.

View File

@@ -0,0 +1,92 @@
# Deployment
## Caffe2 Deployment
We currently support converting a detectron2 model to Caffe2 format through ONNX.
The converted Caffe2 model is able to run without detectron2 dependency in either Python or C++.
It has a runtime optimized for CPU & mobile inference, but not for GPU inference.
Caffe2 conversion requires PyTorch ≥ 1.4 and ONNX ≥ 1.6.
### Coverage
It supports 3 most common meta architectures: `GeneralizedRCNN`, `RetinaNet`, `PanopticFPN`,
and most official models under these 3 meta architectures.
Users' custom extensions under these architectures (added through registration) are supported
as long as they do not contain control flow or operators not available in Caffe2 (e.g. deformable convolution).
For example, custom backbones and heads are often supported out of the box.
### Usage
The conversion APIs are documented at [the API documentation](../modules/export).
We provide a tool, `caffe2_converter.py` as an example that uses
these APIs to convert a standard model.
To convert an official Mask R-CNN trained on COCO, first
[prepare the COCO dataset](../../datasets/), then pick the model from [Model Zoo](../../MODEL_ZOO.md), and run:
```
cd tools/deploy/ && ./caffe2_converter.py --config-file ../../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
--output ./caffe2_model --run-eval \
MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl \
MODEL.DEVICE cpu
```
Note that:
1. The conversion needs valid sample inputs & weights to trace the model. That's why the script requires the dataset.
You can modify the script to obtain sample inputs in other ways.
2. With the `--run-eval` flag, it will evaluate the converted models to verify its accuracy.
The accuracy is typically slightly different (within 0.1 AP) from PyTorch due to
numerical precisions between different implementations.
It's recommended to always verify the accuracy in case your custom model is not supported by the
conversion.
The converted model is available at the specified `caffe2_model/` directory. Two files `model.pb`
and `model_init.pb` that contain network structure and network parameters are necessary for deployment.
These files can then be loaded in C++ or Python using Caffe2's APIs.
The script generates `model.svg` file which contains a visualization of the network.
You can also load `model.pb` to tools such as [netron](https://github.com/lutzroeder/netron) to visualize it.
### Use the model in C++/Python
The model can be loaded in C++. An example [caffe2_mask_rcnn.cpp](../../tools/deploy/) is given,
which performs CPU/GPU inference using `COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x`.
The C++ example needs to be built with:
* PyTorch with caffe2 inside
* gflags, glog, opencv
* protobuf headers that match the version of your caffe2
* MKL headers if caffe2 is built with MKL
The following can compile the example inside [official detectron2 docker](../../docker/):
```
sudo apt update && sudo apt install libgflags-dev libgoogle-glog-dev libopencv-dev
pip install mkl-include
wget https://github.com/protocolbuffers/protobuf/releases/download/v3.6.1/protobuf-cpp-3.6.1.tar.gz
tar xf protobuf-cpp-3.6.1.tar.gz
export CPATH=$(readlink -f ./protobuf-3.6.1/src/):$HOME/.local/include
export CMAKE_PREFIX_PATH=$HOME/.local/lib/python3.6/site-packages/torch/
mkdir build && cd build
cmake -DTORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST .. && make
# To run:
./caffe2_mask_rcnn --predict_net=./model.pb --init_net=./model_init.pb --input=input.jpg
```
Note that:
* All converted models (the .pb files) take two input tensors:
"data" is an NCHW image, and "im_info" is an Nx3 tensor consisting of (height, width, 1.0) for
each image (the shape of "data" might be larger than that in "im_info" due to padding).
* The converted models do not contain post-processing operations that
transform raw layer outputs into formatted predictions.
The example only produces raw outputs (28x28 masks) from the final
layers that are not post-processed, because in actual deployment, an application often needs
its custom lightweight post-processing (e.g. full-image masks for every detected object is often not necessary).
We also provide a python wrapper around the converted model, in the
[Caffe2Model.\_\_call\_\_](../modules/export.html#detectron2.export.Caffe2Model.__call__) method.
This method has an interface that's identical to the [pytorch versions of models](./models.md),
and it internally applies pre/post-processing code to match the formats.
They can serve as a reference for pre/post-processing in actual deployment.

View File

@@ -0,0 +1,43 @@
# Evaluation
Evaluation is a process that takes a number of inputs/outputs pairs and aggregate them.
You can always [use the model](./models.md) directly and just parse its inputs/outputs manually to perform
evaluation.
Alternatively, evaluation is implemented in detectron2 using the [DatasetEvaluator](../modules/evaluation.html#detectron2.evaluation.DatasetEvaluator)
interface.
Detectron2 includes a few `DatasetEvaluator` that computes metrics using standard dataset-specific
APIs (e.g., COCO, LVIS).
You can also implement your own `DatasetEvaluator` that performs some other jobs
using the inputs/outputs pairs.
For example, to count how many instances are detected on the validation set:
```
class Counter(DatasetEvaluator):
def reset(self):
self.count = 0
def process(self, inputs, outputs):
for output in outputs:
self.count += len(output["instances"])
def evaluate(self):
# save self.count somewhere, or print it, or return it.
return {"count": self.count}
```
Once you have some `DatasetEvaluator`, you can run it with
[inference_on_dataset](../modules/evaluation.html#detectron2.evaluation.inference_on_dataset).
For example,
```python
val_results = inference_on_dataset(
model,
val_data_loader,
DatasetEvaluators([COCOEvaluator(...), Counter()]))
```
Compared to running the evaluation manually using the model, the benefit of this function is that
you can merge evaluators together using [DatasetEvaluators](../modules/evaluation.html#detectron2.evaluation.DatasetEvaluators).
In this way you can run all evaluations without having to go through the dataset multiple times.
The `inference_on_dataset` function also provides accurate speed benchmarks for the
given model and dataset.

View File

@@ -0,0 +1,53 @@
# Extend Detectron2's Defaults
__Research is about doing things in new ways__.
This brings a tension in how to create abstractions in code,
which is a challenge for any research engineering project of a significant size:
1. On one hand, it needs to have very thin abstractions to allow for the possibility of doing
everything in new ways. It should be reasonably easy to break existing
abstractions and replace them with new ones.
2. On the other hand, such a project also needs reasonably high-level
abstractions, so that users can easily do things in standard ways,
without worrying too much about the details that only certain researchers care about.
In detectron2, there are two types of interfaces that address this tension together:
1. Functions and classes that take a config (`cfg`) argument
(sometimes with only a few extra arguments).
Such functions and classes implement
the "standard default" behavior: it will read what it needs from the
config and do the "standard" thing.
Users only need to load a given config and pass it around, without having to worry about
which arguments are used and what they all mean.
2. Functions and classes that have well-defined explicit arguments.
Each of these is a small building block of the entire system.
They require users' expertise to understand what each argument should be,
and require more effort to stitch together to a larger system.
But they can be stitched together in more flexible ways.
When you need to implement something not supported by the "standard defaults"
included in detectron2, these well-defined components can be reused.
3. (experimental) A few classes are implemented with the
[@configurable](../../modules/config.html#detectron2.config.configurable)
decorator - they can be called with either a config, or with explicit arguments.
Their explicit argument interfaces are currently __experimental__ and subject to change.
If you only need the standard behavior, the [Beginner's Tutorial](./getting_started.md)
should suffice. If you need to extend detectron2 to your own needs,
see the following tutorials for more details:
* Detectron2 includes a few standard datasets. To use custom ones, see
[Use Custom Datasets](./datasets.md).
* Detectron2 contains the standard logic that creates a data loader for training/testing from a
dataset, but you can write your own as well. See [Use Custom Data Loaders](./data_loading.md).
* Detectron2 implements many standard detection models, and provide ways for you
to overwrite their behaviors. See [Use Models](./models.md) and [Write Models](./write-models.md).
* Detectron2 provides a default training loop that is good for common training tasks.
You can customize it with hooks, or write your own loop instead. See [training](./training.md).

View File

@@ -0,0 +1,79 @@
## Getting Started with Detectron2
This document provides a brief intro of the usage of builtin command-line tools in detectron2.
For a tutorial that involves actual coding with the API,
see our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
which covers how to run inference with an
existing model, and how to train a builtin model on a custom dataset.
For more advanced tutorials, refer to our [documentation](https://detectron2.readthedocs.io/tutorials/extend.html).
### Inference Demo with Pre-trained Models
1. Pick a model and its config file from
[model zoo](MODEL_ZOO.md),
for example, `mask_rcnn_R_50_FPN_3x.yaml`.
2. We provide `demo.py` that is able to run builtin standard models. Run it with:
```
cd demo/
python demo.py --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
--input input1.jpg input2.jpg \
[--other-options]
--opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl
```
The configs are made for training, therefore we need to specify `MODEL.WEIGHTS` to a model from model zoo for evaluation.
This command will run the inference and show visualizations in an OpenCV window.
For details of the command line arguments, see `demo.py -h` or look at its source code
to understand its behavior. Some common arguments are:
* To run __on your webcam__, replace `--input files` with `--webcam`.
* To run __on a video__, replace `--input files` with `--video-input video.mp4`.
* To run __on cpu__, add `MODEL.DEVICE cpu` after `--opts`.
* To save outputs to a directory (for images) or a file (for webcam or video), use `--output`.
### Training & Evaluation in Command Line
We provide a script in "tools/{,plain_}train_net.py", that is made to train
all the configs provided in detectron2.
You may want to use it as a reference to write your own training script.
To train a model with "train_net.py", first
setup the corresponding datasets following
[datasets/README.md](./datasets/README.md),
then run:
```
cd tools/
./train_net.py --num-gpus 8 \
--config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
```
The configs are made for 8-GPU training.
To train on 1 GPU, you may need to [change some parameters](https://arxiv.org/abs/1706.02677), e.g.:
```
./train_net.py \
--config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
--num-gpus 1 SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
```
For most models, CPU training is not supported.
To evaluate a model's performance, use
```
./train_net.py \
--config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
--eval-only MODEL.WEIGHTS /path/to/checkpoint_file
```
For more options, see `./train_net.py -h`.
### Use Detectron2 APIs in Your Code
See our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
to learn how to use detectron2 APIs to:
1. run inference with an existing model
2. train a builtin model on a custom dataset
See [detectron2/projects](https://github.com/facebookresearch/detectron2/tree/master/projects)
for more ways to build your project on detectron2.

View File

@@ -0,0 +1,18 @@
Tutorials
======================================
.. toctree::
:maxdepth: 2
install
getting_started
builtin_datasets
extend
datasets
data_loading
models
write-models
training
evaluation
configs
deployment

View File

@@ -0,0 +1,184 @@
## Installation
Our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
has step-by-step instructions that install detectron2.
The [Dockerfile](docker)
also installs detectron2 with a few simple commands.
### Requirements
- Linux or macOS with Python ≥ 3.6
- PyTorch ≥ 1.4
- [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation.
You can install them together at [pytorch.org](https://pytorch.org) to make sure of this.
- OpenCV, optional, needed by demo and visualization
- pycocotools: `pip install cython; pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'`
### Build Detectron2 from Source
gcc & g++ ≥ 5 are required. [ninja](https://ninja-build.org/) is recommended for faster build.
After having them, run:
```
python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
# (add --user if you don't have permission)
# Or, to install it from a local clone:
git clone https://github.com/facebookresearch/detectron2.git
python -m pip install -e detectron2
# Or if you are on macOS
# CC=clang CXX=clang++ python -m pip install -e .
```
To __rebuild__ detectron2 that's built from a local clone, use `rm -rf build/ **/*.so` to clean the
old build first. You often need to rebuild detectron2 after reinstalling PyTorch.
### Install Pre-Built Detectron2 (Linux only)
```
# for CUDA 10.1:
python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/index.html
```
You can replace cu101 with "cu{100,92}" or "cpu".
Note that:
1. Such installation has to be used with certain version of official PyTorch release.
See [releases](https://github.com/facebookresearch/detectron2/releases) for requirements.
It will not work with a different version of PyTorch or a non-official build of PyTorch.
2. Such installation is out-of-date w.r.t. master branch of detectron2. It may not be
compatible with the master branch of a research project that uses detectron2 (e.g. those in
[projects](projects) or [meshrcnn](https://github.com/facebookresearch/meshrcnn/)).
### Common Installation Issues
If you met issues using the pre-built detectron2, please uninstall it and try building it from source.
Click each issue for its solutions:
<details>
<summary>
Undefined torch/aten/caffe2 symbols, or segmentation fault immediately when running the library.
</summary>
<br/>
This usually happens when detectron2 or torchvision is not
compiled with the version of PyTorch you're running.
Pre-built torchvision or detectron2 has to work with the corresponding official release of pytorch.
If the error comes from a pre-built torchvision, uninstall torchvision and pytorch and reinstall them
following [pytorch.org](http://pytorch.org). So the versions will match.
If the error comes from a pre-built detectron2, check [release notes](https://github.com/facebookresearch/detectron2/releases)
to see the corresponding pytorch version required for each pre-built detectron2.
If the error comes from detectron2 or torchvision that you built manually from source,
remove files you built (`build/`, `**/*.so`) and rebuild it so it can pick up the version of pytorch currently in your environment.
If you cannot resolve this problem, please include the output of `gdb -ex "r" -ex "bt" -ex "quit" --args python -m detectron2.utils.collect_env`
in your issue.
</details>
<details>
<summary>
Undefined C++ symbols (e.g. `GLIBCXX`) or C++ symbols not found.
</summary>
<br/>
Usually it's because the library is compiled with a newer C++ compiler but run with an old C++ runtime.
This often happens with old anaconda.
Try `conda update libgcc`. Then rebuild detectron2.
The fundamental solution is to run the code with proper C++ runtime.
One way is to use `LD_PRELOAD=/path/to/libstdc++.so`.
</details>
<details>
<summary>
"Not compiled with GPU support" or "Detectron2 CUDA Compiler: not available".
</summary>
<br/>
CUDA is not found when building detectron2.
You should make sure
```
python -c 'import torch; from torch.utils.cpp_extension import CUDA_HOME; print(torch.cuda.is_available(), CUDA_HOME)'
```
print valid outputs at the time you build detectron2.
Most models can run inference (but not training) without GPU support. To use CPUs, set `MODEL.DEVICE='cpu'` in the config.
</details>
<details>
<summary>
"invalid device function" or "no kernel image is available for execution".
</summary>
<br/>
Two possibilities:
* You build detectron2 with one version of CUDA but run it with a different version.
To check whether it is the case,
use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions.
In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA"
to contain cuda libraries of the same version.
When they are inconsistent,
you need to either install a different build of PyTorch (or build by yourself)
to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
* Detectron2 or PyTorch/torchvision is not built for the correct GPU architecture (compute compatibility).
The GPU architecture for PyTorch/detectron2/torchvision is available in the "architecture flags" in
`python -m detectron2.utils.collect_env`.
The GPU architecture flags of detectron2/torchvision by default matches the GPU model detected
during compilation. This means the compiled code may not work on a different GPU model.
To overwrite the GPU architecture for detectron2/torchvision, use `TORCH_CUDA_ARCH_LIST` environment variable during compilation.
For example, `export TORCH_CUDA_ARCH_LIST=6.0,7.0` makes it compile for both P100s and V100s.
Visit [developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus) to find out
the correct compute compatibility number for your device.
</details>
<details>
<summary>
Undefined CUDA symbols; cannot open libcudart.so; other nvcc failures.
</summary>
<br/>
The version of NVCC you use to build detectron2 or torchvision does
not match the version of CUDA you are running with.
This often happens when using anaconda's CUDA runtime.
Use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions.
In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA"
to contain cuda libraries of the same version.
When they are inconsistent,
you need to either install a different build of PyTorch (or build by yourself)
to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
</details>
<details>
<summary>
"ImportError: cannot import name '_C'".
</summary>
<br/>
Please build and install detectron2 following the instructions above.
If you are running code from detectron2's root directory, `cd` to a different one.
Otherwise you may not import the code that you installed.
</details>
<details>
<summary>
ONNX conversion segfault after some "TraceWarning".
</summary>
<br/>
The ONNX package is compiled with too old compiler.
Please build and install ONNX from its source code using a compiler
whose version is closer to what's used by PyTorch (available in `torch.__config__.show()`).
</details>

View File

@@ -0,0 +1,151 @@
# Use Models
Models (and their sub-models) in detectron2 are built by
functions such as `build_model`, `build_backbone`, `build_roi_heads`:
```python
from detectron2.modeling import build_model
model = build_model(cfg) # returns a torch.nn.Module
```
`build_model` only builds the model structure, and fill it with random parameters.
See below for how to load an existing checkpoint to the model,
and how to use the `model` object.
### Load/Save a Checkpoint
```python
from detectron2.checkpoint import DetectionCheckpointer
DetectionCheckpointer(model).load(file_path) # load a file to model
checkpointer = DetectionCheckpointer(model, save_dir="output")
checkpointer.save("model_999") # save to output/model_999.pth
```
Detectron2's checkpointer recognizes models in pytorch's `.pth` format, as well as the `.pkl` files
in our model zoo.
See [API doc](../modules/checkpoint.html#detectron2.checkpoint.DetectionCheckpointer)
for more details about its usage.
The model files can be arbitrarily manipulated using `torch.{load,save}` for `.pth` files or
`pickle.{dump,load}` for `.pkl` files.
### Use a Model
A model can be called by `outputs = model(inputs)`, where `inputs` is a `list[dict]`.
Each dict corresponds to one image and the required keys
depend on the type of model, and whether the model is in training or evaluation mode.
For example, in order to do inference,
all existing models expect the "image" key, and optionally "height" and "width".
The detailed format of inputs and outputs of existing models are explained below.
When in training mode, all models are required to be used under an `EventStorage`.
The training statistics will be put into the storage:
```python
from detectron2.utils.events import EventStorage
with EventStorage() as storage:
losses = model(inputs)
```
If you only want to do simple inference using an existing model,
[DefaultPredictor](../modules/engine.html#detectron2.engine.defaults.DefaultPredictor)
is a wrapper around model that provides such basic functionality.
It includes default behavior including model loading, preprocessing,
and operates on single image rather than batches.
### Model Input Format
Users can implement custom models that support any arbitrary input format.
Here we describe the standard input format that all builtin models support in detectron2.
They all take a `list[dict]` as the inputs. Each dict
corresponds to information about one image.
The dict may contain the following keys:
* "image": `Tensor` in (C, H, W) format. The meaning of channels are defined by `cfg.INPUT.FORMAT`.
Image normalization, if any, will be performed inside the model using
`cfg.MODEL.PIXEL_{MEAN,STD}`.
* "instances": an [Instances](../modules/structures.html#detectron2.structures.Instances)
object, with the following fields:
+ "gt_boxes": a [Boxes](../modules/structures.html#detectron2.structures.Boxes) object storing N boxes, one for each instance.
+ "gt_classes": `Tensor` of long type, a vector of N labels, in range [0, num_categories).
+ "gt_masks": a [PolygonMasks](../modules/structures.html#detectron2.structures.PolygonMasks)
or [BitMasks](../modules/structures.html#detectron2.structures.BitMasks) object storing N masks, one for each instance.
+ "gt_keypoints": a [Keypoints](../modules/structures.html#detectron2.structures.Keypoints)
object storing N keypoint sets, one for each instance.
* "proposals": an [Instances](../modules/structures.html#detectron2.structures.Instances)
object used only in Fast R-CNN style models, with the following fields:
+ "proposal_boxes": a [Boxes](../modules/structures.html#detectron2.structures.Boxes) object storing P proposal boxes.
+ "objectness_logits": `Tensor`, a vector of P scores, one for each proposal.
* "height", "width": the **desired** output height and width, which is not necessarily the same
as the height or width of the `image` input field.
For example, the `image` input field might be a resized image,
but you may want the outputs to be in **original** resolution.
If provided, the model will produce output in this resolution,
rather than in the resolution of the `image` as input into the model. This is more efficient and accurate.
* "sem_seg": `Tensor[int]` in (H, W) format. The semantic segmentation ground truth.
Values represent category labels starting from 0.
#### How it connects to data loader:
The output of the default [DatasetMapper]( ../modules/data.html#detectron2.data.DatasetMapper) is a dict
that follows the above format.
After the data loader performs batching, it becomes `list[dict]` which the builtin models support.
### Model Output Format
When in training mode, the builtin models output a `dict[str->ScalarTensor]` with all the losses.
When in inference mode, the builtin models output a `list[dict]`, one dict for each image.
Based on the tasks the model is doing, each dict may contain the following fields:
* "instances": [Instances](../modules/structures.html#detectron2.structures.Instances)
object with the following fields:
* "pred_boxes": [Boxes](../modules/structures.html#detectron2.structures.Boxes) object storing N boxes, one for each detected instance.
* "scores": `Tensor`, a vector of N scores.
* "pred_classes": `Tensor`, a vector of N labels in range [0, num_categories).
+ "pred_masks": a `Tensor` of shape (N, H, W), masks for each detected instance.
+ "pred_keypoints": a `Tensor` of shape (N, num_keypoint, 3).
Each row in the last dimension is (x, y, score). Scores are larger than 0.
* "sem_seg": `Tensor` of (num_categories, H, W), the semantic segmentation prediction.
* "proposals": [Instances](../modules/structures.html#detectron2.structures.Instances)
object with the following fields:
* "proposal_boxes": [Boxes](../modules/structures.html#detectron2.structures.Boxes)
object storing N boxes.
* "objectness_logits": a torch vector of N scores.
* "panoptic_seg": A tuple of `(Tensor, list[dict])`. The tensor has shape (H, W), where each element
represent the segment id of the pixel. Each dict describes one segment id and has the following fields:
* "id": the segment id
* "isthing": whether the segment is a thing or stuff
* "category_id": the category id of this segment. It represents the thing
class id when `isthing==True`, and the stuff class id otherwise.
### Partially execute a model:
Sometimes you may want to obtain an intermediate tensor inside a model.
Since there are typically hundreds of intermediate tensors, there isn't an API that provides you
the intermediate result you need.
You have the following options:
1. Write a (sub)model. Following the [tutorial](./write-models.md), you can
rewrite a model component (e.g. a head of a model), such that it
does the same thing as the existing component, but returns the output
you need.
2. Partially execute a model. You can create the model as usual,
but use custom code to execute it instead of its `forward()`. For example,
the following code obtains mask features before mask head.
```python
images = ImageList.from_tensors(...) # preprocessed input tensor
model = build_model(cfg)
features = model.backbone(images.tensor)
proposals, _ = model.proposal_generator(images, features)
instances = model.roi_heads._forward_box(features, proposals)
mask_features = [features[f] for f in model.roi_heads.in_features]
mask_features = model.roi_heads.mask_pooler(mask_features, [x.pred_boxes for x in instances])
```
Note that both options require you to read the existing forward code to understand
how to write code to obtain the outputs you need.

View File

@@ -0,0 +1,50 @@
# Training
From the previous tutorials, you may now have a custom model and data loader.
You are free to create your own optimizer, and write the training logic: it's
usually easy with PyTorch, and allow researchers to see the entire training
logic more clearly and have full control.
One such example is provided in [tools/plain_train_net.py](../../tools/plain_train_net.py).
We also provide a standarized "trainer" abstraction with a
[minimal hook system](../modules/engine.html#detectron2.engine.HookBase)
that helps simplify the standard types of training.
You can use
[SimpleTrainer().train()](../modules/engine.html#detectron2.engine.SimpleTrainer)
which provides minimal abstraction for single-cost single-optimizer single-data-source training.
The builtin `train_net.py` script uses
[DefaultTrainer().train()](../modules/engine.html#detectron2.engine.defaults.DefaultTrainer),
which includes more standard default behavior that one might want to opt in,
including default configurations for learning rate schedule,
logging, evaluation, checkpointing etc.
This also means that it's less likely to support some non-standard behavior
you might want during research.
To customize the training loops, you can:
1. If your customization is similar to what `DefaultTrainer` is already doing,
you can change behavior of `DefaultTrainer` by overwriting [its methods](../modules/engine.html#detectron2.engine.defaults.DefaultTrainer)
in a subclass, like what [tools/train_net.py](../../tools/train_net.py) does.
2. If you need something very novel, you can start from [tools/plain_train_net.py](../../tools/plain_train_net.py) to implement them yourself.
### Logging of Metrics
During training, metrics are saved to a centralized [EventStorage](../modules/utils.html#detectron2.utils.events.EventStorage).
You can use the following code to access it and log metrics to it:
```
from detectron2.utils.events import get_event_storage
# inside the model:
if self.training:
value = # compute the value from inputs
storage = get_event_storage()
storage.put_scalar("some_accuracy", value)
```
Refer to its documentation for more details.
Metrics are then saved to various destinations with [EventWriter](../modules/utils.html#module-detectron2.utils.events).
DefaultTrainer enables a few `EventWriter` with default configurations.
See above for how to customize them.

View File

@@ -0,0 +1,39 @@
# Write Models
If you are trying to do something completely new, you may wish to implement
a model entirely from scratch within detectron2. However, in many situations you may
be interested in modifying or extending some components of an existing model.
Therefore, we also provide a registration mechanism that lets you override the
behavior of certain internal components of standard models.
For example, to add a new backbone, import this code in your code:
```python
from detectron2.modeling import BACKBONE_REGISTRY, Backbone, ShapeSpec
@BACKBONE_REGISTRY.register()
class ToyBackBone(Backbone):
def __init__(self, cfg, input_shape):
# create your own backbone
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=16, padding=3)
def forward(self, image):
return {"conv1": self.conv1(image)}
def output_shape(self):
return {"conv1": ShapeSpec(channels=64, stride=16)}
```
Then, you can use `cfg.MODEL.BACKBONE.NAME = 'ToyBackBone'` in your config object.
`build_model(cfg)` will then call your `ToyBackBone` instead.
As another example, to add new abilities to the ROI heads in the Generalized R-CNN meta-architecture,
you can implement a new
[ROIHeads](../modules/modeling.html#detectron2.modeling.ROIHeads) subclass and put it in the `ROI_HEADS_REGISTRY`.
See [densepose in detectron2](../../projects/DensePose)
and [meshrcnn](https://github.com/facebookresearch/meshrcnn)
for examples that implement new ROIHeads to perform new tasks.
And [projects/](../../projects/)
contains more examples that implement different architectures.
A complete list of registries can be found in [API documentation](../modules/modeling.html#model-registries).
You can register components in these registries to customize different parts of a model, or the
entire model.