Add at new repo again

2025-01-28 21:48:35 +00:00
commit 6e660ddb3c
564 changed files with 75575 additions and 0 deletions
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/README.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/README.md
@@ -0,0 +1,9 @@
+## Unit Tests
+
+To run the unittests, do:
+```
+cd detectron2
+python -m unittest discover -v -s ./tests
+```
+
+There are also end-to-end inference & training tests, in [dev/run_*_tests.sh](../dev).
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/init.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/init.py
@@ -0,0 +1 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/data/init.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/data/init.py
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_coco.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_coco.py
@@ -0,0 +1,77 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import json
+import numpy as np
+import os
+import tempfile
+import unittest
+import pycocotools
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.data.datasets.coco import convert_to_coco_dict, load_coco_json
+from detectron2.structures import BoxMode
+
+
+def make_mask():
+    """
+    Makes a donut shaped binary mask.
+    """
+    H = 100
+    W = 100
+    mask = np.zeros([H, W], dtype=np.uint8)
+    for x in range(W):
+        for y in range(H):
+            d = np.linalg.norm(np.array([W, H]) / 2 - np.array([x, y]))
+            if d > 10 and d < 20:
+                mask[y, x] = 1
+    return mask
+
+
+def make_dataset_dicts(mask):
+    """
+    Returns a list of dicts that represents a single COCO data point for
+    object detection. The single instance given by `mask` is represented by
+    RLE.
+    """
+    record = {}
+    record["file_name"] = "test"
+    record["image_id"] = 0
+    record["height"] = mask.shape[0]
+    record["width"] = mask.shape[1]
+
+    y, x = np.nonzero(mask)
+    segmentation = pycocotools.mask.encode(np.asarray(mask, order="F"))
+    min_x = np.min(x)
+    max_x = np.max(x)
+    min_y = np.min(y)
+    max_y = np.max(y)
+    obj = {
+        "bbox": [min_x, min_y, max_x, max_y],
+        "bbox_mode": BoxMode.XYXY_ABS,
+        "category_id": 0,
+        "iscrowd": 0,
+        "segmentation": segmentation,
+    }
+    record["annotations"] = [obj]
+    return [record]
+
+
+class TestRLEToJson(unittest.TestCase):
+    def test(self):
+        # Make a dummy dataset.
+        mask = make_mask()
+        DatasetCatalog.register("test_dataset", lambda: make_dataset_dicts(mask))
+        MetadataCatalog.get("test_dataset").set(thing_classes=["test_label"])
+
+        # Dump to json.
+        json_dict = convert_to_coco_dict("test_dataset")
+        with tempfile.TemporaryDirectory() as tmpdir:
+            json_file_name = os.path.join(tmpdir, "test.json")
+            with open(json_file_name, "w") as f:
+                json.dump(json_dict, f)
+            # Load from json.
+            dicts = load_coco_json(json_file_name, "")
+
+        # Check the loaded mask matches the original.
+        anno = dicts[0]["annotations"][0]
+        loaded_mask = pycocotools.mask.decode(anno["segmentation"])
+        self.assertTrue(np.array_equal(loaded_mask, mask))
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_detection_utils.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_detection_utils.py
@@ -0,0 +1,116 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+import copy
+import numpy as np
+import unittest
+import pycocotools.mask as mask_util
+
+from detectron2.data import detection_utils
+from detectron2.data import transforms as T
+from detectron2.structures import BitMasks, BoxMode
+
+
+class TestTransformAnnotations(unittest.TestCase):
+    def test_transform_simple_annotation(self):
+        transforms = T.TransformList([T.HFlipTransform(400)])
+        anno = {
+            "bbox": np.asarray([10, 10, 200, 300]),
+            "bbox_mode": BoxMode.XYXY_ABS,
+            "category_id": 3,
+            "segmentation": [[10, 10, 100, 100, 100, 10], [150, 150, 200, 150, 200, 200]],
+        }
+
+        output = detection_utils.transform_instance_annotations(anno, transforms, (400, 400))
+        self.assertTrue(np.allclose(output["bbox"], [200, 10, 390, 300]))
+        self.assertEqual(len(output["segmentation"]), len(anno["segmentation"]))
+        self.assertTrue(np.allclose(output["segmentation"][0], [390, 10, 300, 100, 300, 10]))
+
+        detection_utils.annotations_to_instances([output, output], (400, 400))
+
+    def test_flip_keypoints(self):
+        transforms = T.TransformList([T.HFlipTransform(400)])
+        anno = {
+            "bbox": np.asarray([10, 10, 200, 300]),
+            "bbox_mode": BoxMode.XYXY_ABS,
+            "keypoints": np.random.rand(17, 3) * 50 + 15,
+        }
+
+        output = detection_utils.transform_instance_annotations(
+            copy.deepcopy(anno),
+            transforms,
+            (400, 400),
+            keypoint_hflip_indices=detection_utils.create_keypoint_hflip_indices(
+                ["keypoints_coco_2017_train"]
+            ),
+        )
+        # The first keypoint is nose
+        self.assertTrue(np.allclose(output["keypoints"][0, 0], 400 - anno["keypoints"][0, 0]))
+        # The last 16 keypoints are 8 left-right pairs
+        self.assertTrue(
+            np.allclose(
+                output["keypoints"][1:, 0].reshape(-1, 2)[:, ::-1],
+                400 - anno["keypoints"][1:, 0].reshape(-1, 2),
+            )
+        )
+        self.assertTrue(
+            np.allclose(
+                output["keypoints"][1:, 1:].reshape(-1, 2, 2)[:, ::-1, :],
+                anno["keypoints"][1:, 1:].reshape(-1, 2, 2),
+            )
+        )
+
+    def test_transform_RLE(self):
+        transforms = T.TransformList([T.HFlipTransform(400)])
+        mask = np.zeros((300, 400), order="F").astype("uint8")
+        mask[:, :200] = 1
+
+        anno = {
+            "bbox": np.asarray([10, 10, 200, 300]),
+            "bbox_mode": BoxMode.XYXY_ABS,
+            "segmentation": mask_util.encode(mask[:, :, None])[0],
+            "category_id": 3,
+        }
+        output = detection_utils.transform_instance_annotations(
+            copy.deepcopy(anno), transforms, (300, 400)
+        )
+        mask = output["segmentation"]
+        self.assertTrue((mask[:, 200:] == 1).all())
+        self.assertTrue((mask[:, :200] == 0).all())
+
+        inst = detection_utils.annotations_to_instances(
+            [output, output], (400, 400), mask_format="bitmask"
+        )
+        self.assertTrue(isinstance(inst.gt_masks, BitMasks))
+
+    def test_transform_RLE_resize(self):
+        transforms = T.TransformList(
+            [T.HFlipTransform(400), T.ScaleTransform(300, 400, 400, 400, "bilinear")]
+        )
+        mask = np.zeros((300, 400), order="F").astype("uint8")
+        mask[:, :200] = 1
+
+        anno = {
+            "bbox": np.asarray([10, 10, 200, 300]),
+            "bbox_mode": BoxMode.XYXY_ABS,
+            "segmentation": mask_util.encode(mask[:, :, None])[0],
+            "category_id": 3,
+        }
+        output = detection_utils.transform_instance_annotations(
+            copy.deepcopy(anno), transforms, (400, 400)
+        )
+
+        inst = detection_utils.annotations_to_instances(
+            [output, output], (400, 400), mask_format="bitmask"
+        )
+        self.assertTrue(isinstance(inst.gt_masks, BitMasks))
+
+    def test_gen_crop(self):
+        instance = {"bbox": [10, 10, 100, 100], "bbox_mode": BoxMode.XYXY_ABS}
+        t = detection_utils.gen_crop_transform_with_instance((10, 10), (150, 150), instance)
+        # the box center must fall into the cropped region
+        self.assertTrue(t.x0 <= 55 <= t.x0 + t.w)
+
+    def test_gen_crop_outside_boxes(self):
+        instance = {"bbox": [10, 10, 100, 100], "bbox_mode": BoxMode.XYXY_ABS}
+        with self.assertRaises(AssertionError):
+            detection_utils.gen_crop_transform_with_instance((10, 10), (15, 15), instance)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_rotation_transform.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_rotation_transform.py
@@ -0,0 +1,62 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import numpy as np
+import unittest
+
+from detectron2.data.transforms.transform import RotationTransform
+
+
+class TestRotationTransform(unittest.TestCase):
+    def assertEqualsArrays(self, a1, a2):
+        self.assertTrue(np.allclose(a1, a2))
+
+    def randomData(self, h=5, w=5):
+        image = np.random.rand(h, w)
+        coords = np.array([[i, j] for j in range(h + 1) for i in range(w + 1)], dtype=float)
+        return image, coords, h, w
+
+    def test180(self):
+        image, coords, h, w = self.randomData(6, 6)
+        rot = RotationTransform(h, w, 180, expand=False, center=None)
+        self.assertEqualsArrays(rot.apply_image(image), image[::-1, ::-1])
+        rotated_coords = [[w - c[0], h - c[1]] for c in coords]
+        self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords)
+
+    def test45_coords(self):
+        _, coords, h, w = self.randomData(4, 6)
+        rot = RotationTransform(h, w, 45, expand=False, center=None)
+        rotated_coords = [
+            [(x + y - (h + w) / 2) / np.sqrt(2) + w / 2, h / 2 + (y + (w - h) / 2 - x) / np.sqrt(2)]
+            for (x, y) in coords
+        ]
+        self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords)
+
+    def test90(self):
+        image, coords, h, w = self.randomData()
+        rot = RotationTransform(h, w, 90, expand=False, center=None)
+        self.assertEqualsArrays(rot.apply_image(image), image.T[::-1])
+        rotated_coords = [[c[1], w - c[0]] for c in coords]
+        self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords)
+
+    def test90_expand(self):  # non-square image
+        image, coords, h, w = self.randomData(h=5, w=8)
+        rot = RotationTransform(h, w, 90, expand=True, center=None)
+        self.assertEqualsArrays(rot.apply_image(image), image.T[::-1])
+        rotated_coords = [[c[1], w - c[0]] for c in coords]
+        self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords)
+
+    def test_center_expand(self):
+        # center has no effect if expand=True because it only affects shifting
+        image, coords, h, w = self.randomData(h=5, w=8)
+        angle = np.random.randint(360)
+        rot1 = RotationTransform(h, w, angle, expand=True, center=None)
+        rot2 = RotationTransform(h, w, angle, expand=True, center=(0, 0))
+        rot3 = RotationTransform(h, w, angle, expand=True, center=(h, w))
+        rot4 = RotationTransform(h, w, angle, expand=True, center=(2, 5))
+        for r1 in [rot1, rot2, rot3, rot4]:
+            for r2 in [rot1, rot2, rot3, rot4]:
+                self.assertEqualsArrays(r1.apply_image(image), r2.apply_image(image))
+                self.assertEqualsArrays(r1.apply_coords(coords), r2.apply_coords(coords))
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_sampler.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_sampler.py
@@ -0,0 +1,23 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+import unittest
+from torch.utils.data.sampler import SequentialSampler
+
+from detectron2.data.samplers import GroupedBatchSampler
+
+
+class TestGroupedBatchSampler(unittest.TestCase):
+    def test_missing_group_id(self):
+        sampler = SequentialSampler(list(range(100)))
+        group_ids = [1] * 100
+        samples = GroupedBatchSampler(sampler, group_ids, 2)
+
+        for mini_batch in samples:
+            self.assertEqual(len(mini_batch), 2)
+
+    def test_groups(self):
+        sampler = SequentialSampler(list(range(100)))
+        group_ids = [1, 0] * 50
+        samples = GroupedBatchSampler(sampler, group_ids, 2)
+
+        for mini_batch in samples:
+            self.assertEqual((mini_batch[0] + mini_batch[1]) % 2, 0)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_transforms.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_transforms.py
@@ -0,0 +1,134 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import logging
+import numpy as np
+import unittest
+from unittest import mock
+
+from detectron2.config import get_cfg
+from detectron2.data import detection_utils
+from detectron2.data import transforms as T
+from detectron2.utils.logger import setup_logger
+
+logger = logging.getLogger(__name__)
+
+
+class TestTransforms(unittest.TestCase):
+    def setUp(self):
+        setup_logger()
+
+    def test_apply_rotated_boxes(self):
+        np.random.seed(125)
+        cfg = get_cfg()
+        is_train = True
+        transform_gen = detection_utils.build_transform_gen(cfg, is_train)
+        image = np.random.rand(200, 300)
+        image, transforms = T.apply_transform_gens(transform_gen, image)
+        image_shape = image.shape[:2]  # h, w
+        assert image_shape == (800, 1200)
+        annotation = {"bbox": [179, 97, 62, 40, -56]}
+
+        boxes = np.array([annotation["bbox"]], dtype=np.float64)  # boxes.shape = (1, 5)
+        transformed_bbox = transforms.apply_rotated_box(boxes)[0]
+
+        expected_bbox = np.array([484, 388, 248, 160, 56], dtype=np.float64)
+        err_msg = "transformed_bbox = {}, expected {}".format(transformed_bbox, expected_bbox)
+        assert np.allclose(transformed_bbox, expected_bbox), err_msg
+
+    def test_apply_rotated_boxes_unequal_scaling_factor(self):
+        np.random.seed(125)
+        h, w = 400, 200
+        newh, neww = 800, 800
+        image = np.random.rand(h, w)
+        transform_gen = []
+        transform_gen.append(T.Resize(shape=(newh, neww)))
+        image, transforms = T.apply_transform_gens(transform_gen, image)
+        image_shape = image.shape[:2]  # h, w
+        assert image_shape == (newh, neww)
+
+        boxes = np.array(
+            [
+                [150, 100, 40, 20, 0],
+                [150, 100, 40, 20, 30],
+                [150, 100, 40, 20, 90],
+                [150, 100, 40, 20, -90],
+            ],
+            dtype=np.float64,
+        )
+        transformed_boxes = transforms.apply_rotated_box(boxes)
+
+        expected_bboxes = np.array(
+            [
+                [600, 200, 160, 40, 0],
+                [600, 200, 144.22205102, 52.91502622, 49.10660535],
+                [600, 200, 80, 80, 90],
+                [600, 200, 80, 80, -90],
+            ],
+            dtype=np.float64,
+        )
+        err_msg = "transformed_boxes = {}, expected {}".format(transformed_boxes, expected_bboxes)
+        assert np.allclose(transformed_boxes, expected_bboxes), err_msg
+
+    def test_print_transform_gen(self):
+        t = T.RandomCrop("relative", (100, 100))
+        self.assertTrue(str(t) == "RandomCrop(crop_type='relative', crop_size=(100, 100))")
+
+        t = T.RandomFlip(prob=0.5)
+        self.assertTrue(str(t) == "RandomFlip(prob=0.5)")
+
+        t = T.RandomFlip()
+        self.assertTrue(str(t) == "RandomFlip()")
+
+    def test_random_apply_prob_out_of_range_check(self):
+        # GIVEN
+        test_probabilities = {0.0: True, 0.5: True, 1.0: True, -0.01: False, 1.01: False}
+
+        # WHEN
+        for given_probability, is_valid in test_probabilities.items():
+            # THEN
+            if not is_valid:
+                self.assertRaises(AssertionError, T.RandomApply, None, prob=given_probability)
+            else:
+                T.RandomApply(T.NoOpTransform(), prob=given_probability)
+
+    def test_random_apply_wrapping_transform_gen_probability_occured_evaluation(self):
+        # GIVEN
+        transform_mock = mock.MagicMock(name="MockTransform", spec=T.TransformGen)
+        image_mock = mock.MagicMock(name="MockImage")
+        random_apply = T.RandomApply(transform_mock, prob=0.001)
+
+        # WHEN
+        with mock.patch.object(random_apply, "_rand_range", return_value=0.0001):
+            transform = random_apply.get_transform(image_mock)
+
+        # THEN
+        transform_mock.get_transform.assert_called_once_with(image_mock)
+        self.assertIsNot(transform, transform_mock)
+
+    def test_random_apply_wrapping_std_transform_probability_occured_evaluation(self):
+        # GIVEN
+        transform_mock = mock.MagicMock(name="MockTransform", spec=T.Transform)
+        image_mock = mock.MagicMock(name="MockImage")
+        random_apply = T.RandomApply(transform_mock, prob=0.001)
+
+        # WHEN
+        with mock.patch.object(random_apply, "_rand_range", return_value=0.0001):
+            transform = random_apply.get_transform(image_mock)
+
+        # THEN
+        self.assertIs(transform, transform_mock)
+
+    def test_random_apply_probability_not_occured_evaluation(self):
+        # GIVEN
+        transform_mock = mock.MagicMock(name="MockTransform", spec=T.TransformGen)
+        image_mock = mock.MagicMock(name="MockImage")
+        random_apply = T.RandomApply(transform_mock, prob=0.001)
+
+        # WHEN
+        with mock.patch.object(random_apply, "_rand_range", return_value=0.9):
+            transform = random_apply.get_transform(image_mock)
+
+        # THEN
+        transform_mock.get_transform.assert_not_called()
+        self.assertIsInstance(transform, T.NoOpTransform)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/init.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/init.py
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_mask_ops.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_mask_ops.py
@@ -0,0 +1,190 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import contextlib
+import io
+import numpy as np
+import unittest
+from collections import defaultdict
+import torch
+import tqdm
+from fvcore.common.benchmark import benchmark
+from fvcore.common.file_io import PathManager
+from pycocotools.coco import COCO
+from tabulate import tabulate
+from torch.nn import functional as F
+
+from detectron2.data import MetadataCatalog
+from detectron2.layers.mask_ops import (
+    pad_masks,
+    paste_mask_in_image_old,
+    paste_masks_in_image,
+    scale_boxes,
+)
+from detectron2.structures import BitMasks, Boxes, BoxMode, PolygonMasks
+from detectron2.structures.masks import polygons_to_bitmask
+
+
+def iou_between_full_image_bit_masks(a, b):
+    intersect = (a & b).sum()
+    union = (a | b).sum()
+    return intersect / union
+
+
+def rasterize_polygons_with_grid_sample(full_image_bit_mask, box, mask_size, threshold=0.5):
+    x0, y0, x1, y1 = box[0], box[1], box[2], box[3]
+
+    img_h, img_w = full_image_bit_mask.shape
+
+    mask_y = np.arange(0.0, mask_size) + 0.5  # mask y sample coords in [0.5, mask_size - 0.5]
+    mask_x = np.arange(0.0, mask_size) + 0.5  # mask x sample coords in [0.5, mask_size - 0.5]
+    mask_y = mask_y / mask_size * (y1 - y0) + y0
+    mask_x = mask_x / mask_size * (x1 - x0) + x0
+
+    mask_x = (mask_x - 0.5) / (img_w - 1) * 2 + -1
+    mask_y = (mask_y - 0.5) / (img_h - 1) * 2 + -1
+    gy, gx = torch.meshgrid(torch.from_numpy(mask_y), torch.from_numpy(mask_x))
+    ind = torch.stack([gx, gy], dim=-1).to(dtype=torch.float32)
+
+    full_image_bit_mask = torch.from_numpy(full_image_bit_mask)
+    mask = F.grid_sample(
+        full_image_bit_mask[None, None, :, :].to(dtype=torch.float32),
+        ind[None, :, :, :],
+        align_corners=True,
+    )
+
+    return mask[0, 0] >= threshold
+
+
+class TestMaskCropPaste(unittest.TestCase):
+    def setUp(self):
+        json_file = MetadataCatalog.get("coco_2017_val_100").json_file
+        if not PathManager.isfile(json_file):
+            raise unittest.SkipTest("{} not found".format(json_file))
+        with contextlib.redirect_stdout(io.StringIO()):
+            json_file = PathManager.get_local_path(json_file)
+            self.coco = COCO(json_file)
+
+    def test_crop_paste_consistency(self):
+        """
+        rasterize_polygons_within_box (used in training)
+        and
+        paste_masks_in_image (used in inference)
+        should be inverse operations to each other.
+
+        This function runs several implementation of the above two operations and prints
+        the reconstruction error.
+        """
+
+        anns = self.coco.loadAnns(self.coco.getAnnIds(iscrowd=False))  # avoid crowd annotations
+
+        selected_anns = anns[:100]
+
+        ious = []
+        for ann in tqdm.tqdm(selected_anns):
+            results = self.process_annotation(ann)
+            ious.append([k[2] for k in results])
+
+        ious = np.array(ious)
+        mean_ious = ious.mean(axis=0)
+        table = []
+        res_dic = defaultdict(dict)
+        for row, iou in zip(results, mean_ious):
+            table.append((row[0], row[1], iou))
+            res_dic[row[0]][row[1]] = iou
+        print(tabulate(table, headers=["rasterize", "paste", "iou"], tablefmt="simple"))
+        # assert that the reconstruction is good:
+        self.assertTrue(res_dic["polygon"]["aligned"] > 0.94)
+        self.assertTrue(res_dic["roialign"]["aligned"] > 0.95)
+
+    def process_annotation(self, ann, mask_side_len=28):
+        # Parse annotation data
+        img_info = self.coco.loadImgs(ids=[ann["image_id"]])[0]
+        height, width = img_info["height"], img_info["width"]
+        gt_polygons = [np.array(p, dtype=np.float64) for p in ann["segmentation"]]
+        gt_bbox = BoxMode.convert(ann["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
+        gt_bit_mask = polygons_to_bitmask(gt_polygons, height, width)
+
+        # Run rasterize ..
+        torch_gt_bbox = torch.tensor(gt_bbox).to(dtype=torch.float32).reshape(-1, 4)
+        box_bitmasks = {
+            "polygon": PolygonMasks([gt_polygons]).crop_and_resize(torch_gt_bbox, mask_side_len)[0],
+            "gridsample": rasterize_polygons_with_grid_sample(gt_bit_mask, gt_bbox, mask_side_len),
+            "roialign": BitMasks(torch.from_numpy(gt_bit_mask[None, :, :])).crop_and_resize(
+                torch_gt_bbox, mask_side_len
+            )[0],
+        }
+
+        # Run paste ..
+        results = defaultdict(dict)
+        for k, box_bitmask in box_bitmasks.items():
+            padded_bitmask, scale = pad_masks(box_bitmask[None, :, :], 1)
+            scaled_boxes = scale_boxes(torch_gt_bbox, scale)
+
+            r = results[k]
+            r["old"] = paste_mask_in_image_old(
+                padded_bitmask[0], scaled_boxes[0], height, width, threshold=0.5
+            )
+            r["aligned"] = paste_masks_in_image(
+                box_bitmask[None, :, :], Boxes(torch_gt_bbox), (height, width)
+            )[0]
+
+        table = []
+        for rasterize_method, r in results.items():
+            for paste_method, mask in r.items():
+                mask = np.asarray(mask)
+                iou = iou_between_full_image_bit_masks(gt_bit_mask.astype("uint8"), mask)
+                table.append((rasterize_method, paste_method, iou))
+        return table
+
+    def test_polygon_area(self):
+        # Draw polygon boxes
+        for d in [5.0, 10.0, 1000.0]:
+            polygon = PolygonMasks([[[0, 0, 0, d, d, d, d, 0]]])
+            area = polygon.area()[0]
+            target = d ** 2
+            self.assertEqual(area, target)
+
+        # Draw polygon triangles
+        for d in [5.0, 10.0, 1000.0]:
+            polygon = PolygonMasks([[[0, 0, 0, d, d, d]]])
+            area = polygon.area()[0]
+            target = d ** 2 / 2
+            self.assertEqual(area, target)
+
+
+def benchmark_paste():
+    S = 800
+    H, W = image_shape = (S, S)
+    N = 64
+    torch.manual_seed(42)
+    masks = torch.rand(N, 28, 28)
+
+    center = torch.rand(N, 2) * 600 + 100
+    wh = torch.clamp(torch.randn(N, 2) * 40 + 200, min=50)
+    x0y0 = torch.clamp(center - wh * 0.5, min=0.0)
+    x1y1 = torch.clamp(center + wh * 0.5, max=S)
+    boxes = Boxes(torch.cat([x0y0, x1y1], axis=1))
+
+    def func(device, n=3):
+        m = masks.to(device=device)
+        b = boxes.to(device=device)
+
+        def bench():
+            for _ in range(n):
+                paste_masks_in_image(m, b, image_shape)
+            if device.type == "cuda":
+                torch.cuda.synchronize()
+
+        return bench
+
+    specs = [{"device": torch.device("cpu"), "n": 3}]
+    if torch.cuda.is_available():
+        specs.append({"device": torch.device("cuda"), "n": 3})
+
+    benchmark(func, "paste_masks", specs, num_iters=10, warmup_iters=2)
+
+
+if __name__ == "__main__":
+    benchmark_paste()
+    unittest.main()
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_nms_rotated.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_nms_rotated.py
@@ -0,0 +1,188 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from __future__ import absolute_import, division, print_function, unicode_literals
+import numpy as np
+import unittest
+import torch
+from torchvision import ops
+
+from detectron2.layers import batched_nms, batched_nms_rotated, nms_rotated
+
+
+def nms_edit_distance(keep1, keep2):
+    """
+    Compare the "keep" result of two nms call.
+    They are allowed to be different in terms of edit distance
+    due to floating point precision issues, e.g.,
+    if a box happen to have an IoU of 0.5 with another box,
+    one implentation may choose to keep it while another may discard it.
+    """
+    if torch.equal(keep1, keep2):
+        # they should be equal most of the time
+        return 0
+    keep1, keep2 = tuple(keep1.cpu()), tuple(keep2.cpu())
+    m, n = len(keep1), len(keep2)
+
+    # edit distance with DP
+    f = [np.arange(n + 1), np.arange(n + 1)]
+    for i in range(m):
+        cur_row = i % 2
+        other_row = (i + 1) % 2
+        f[other_row][0] = i + 1
+        for j in range(n):
+            f[other_row][j + 1] = (
+                f[cur_row][j]
+                if keep1[i] == keep2[j]
+                else min(min(f[cur_row][j], f[cur_row][j + 1]), f[other_row][j]) + 1
+            )
+    return f[m % 2][n]
+
+
+class TestNMSRotated(unittest.TestCase):
+    def reference_horizontal_nms(self, boxes, scores, iou_threshold):
+        """
+        Args:
+            box_scores (N, 5): boxes in corner-form and probabilities.
+                (Note here 5 == 4 + 1, i.e., 4-dim horizontal box + 1-dim prob)
+            iou_threshold: intersection over union threshold.
+        Returns:
+             picked: a list of indexes of the kept boxes
+        """
+        picked = []
+        _, indexes = scores.sort(descending=True)
+        while len(indexes) > 0:
+            current = indexes[0]
+            picked.append(current.item())
+            if len(indexes) == 1:
+                break
+            current_box = boxes[current, :]
+            indexes = indexes[1:]
+            rest_boxes = boxes[indexes, :]
+            iou = ops.box_iou(rest_boxes, current_box.unsqueeze(0)).squeeze(1)
+            indexes = indexes[iou <= iou_threshold]
+
+        return torch.as_tensor(picked)
+
+    def _create_tensors(self, N):
+        boxes = torch.rand(N, 4) * 100
+        # Note: the implementation of this function in torchvision is:
+        # boxes[:, 2:] += torch.rand(N, 2) * 100
+        # but it does not guarantee non-negative widths/heights constraints:
+        # boxes[:, 2] >= boxes[:, 0] and boxes[:, 3] >= boxes[:, 1]:
+        boxes[:, 2:] += boxes[:, :2]
+        scores = torch.rand(N)
+        return boxes, scores
+
+    def test_batched_nms_rotated_0_degree_cpu(self):
+        N = 2000
+        num_classes = 50
+        boxes, scores = self._create_tensors(N)
+        idxs = torch.randint(0, num_classes, (N,))
+        rotated_boxes = torch.zeros(N, 5)
+        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+        rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
+        rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
+        err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}"
+        for iou in [0.2, 0.5, 0.8]:
+            backup = boxes.clone()
+            keep_ref = batched_nms(boxes, scores, idxs, iou)
+            assert torch.allclose(boxes, backup), "boxes modified by batched_nms"
+            backup = rotated_boxes.clone()
+            keep = batched_nms_rotated(rotated_boxes, scores, idxs, iou)
+            assert torch.allclose(
+                rotated_boxes, backup
+            ), "rotated_boxes modified by batched_nms_rotated"
+            self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou))
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_batched_nms_rotated_0_degree_cuda(self):
+        N = 2000
+        num_classes = 50
+        boxes, scores = self._create_tensors(N)
+        idxs = torch.randint(0, num_classes, (N,))
+        rotated_boxes = torch.zeros(N, 5)
+        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+        rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
+        rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
+        err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}"
+        for iou in [0.2, 0.5, 0.8]:
+            backup = boxes.clone()
+            keep_ref = batched_nms(boxes.cuda(), scores.cuda(), idxs, iou)
+            self.assertTrue(torch.allclose(boxes, backup), "boxes modified by batched_nms")
+            backup = rotated_boxes.clone()
+            keep = batched_nms_rotated(rotated_boxes.cuda(), scores.cuda(), idxs, iou)
+            self.assertTrue(
+                torch.allclose(rotated_boxes, backup),
+                "rotated_boxes modified by batched_nms_rotated",
+            )
+            self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou))
+
+    def test_nms_rotated_0_degree_cpu(self):
+        N = 1000
+        boxes, scores = self._create_tensors(N)
+        rotated_boxes = torch.zeros(N, 5)
+        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+        rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
+        rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
+        err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}"
+        for iou in [0.5]:
+            keep_ref = self.reference_horizontal_nms(boxes, scores, iou)
+            keep = nms_rotated(rotated_boxes, scores, iou)
+            self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou))
+
+    def test_nms_rotated_90_degrees_cpu(self):
+        N = 1000
+        boxes, scores = self._create_tensors(N)
+        rotated_boxes = torch.zeros(N, 5)
+        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+        # Note for rotated_boxes[:, 2] and rotated_boxes[:, 3]:
+        # widths and heights are intentionally swapped here for 90 degrees case
+        # so that the reference horizontal nms could be used
+        rotated_boxes[:, 2] = boxes[:, 3] - boxes[:, 1]
+        rotated_boxes[:, 3] = boxes[:, 2] - boxes[:, 0]
+
+        rotated_boxes[:, 4] = torch.ones(N) * 90
+        err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}"
+        for iou in [0.2, 0.5, 0.8]:
+            keep_ref = self.reference_horizontal_nms(boxes, scores, iou)
+            keep = nms_rotated(rotated_boxes, scores, iou)
+            assert torch.equal(keep, keep_ref), err_msg.format(iou)
+
+    def test_nms_rotated_180_degrees_cpu(self):
+        N = 1000
+        boxes, scores = self._create_tensors(N)
+        rotated_boxes = torch.zeros(N, 5)
+        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+        rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
+        rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
+        rotated_boxes[:, 4] = torch.ones(N) * 180
+        err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}"
+        for iou in [0.2, 0.5, 0.8]:
+            keep_ref = self.reference_horizontal_nms(boxes, scores, iou)
+            keep = nms_rotated(rotated_boxes, scores, iou)
+            assert torch.equal(keep, keep_ref), err_msg.format(iou)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_nms_rotated_0_degree_cuda(self):
+        N = 1000
+        boxes, scores = self._create_tensors(N)
+        rotated_boxes = torch.zeros(N, 5)
+        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+        rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
+        rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
+        err_msg = "Rotated NMS incompatible between CPU and CUDA for IoU={}"
+
+        for iou in [0.2, 0.5, 0.8]:
+            r_cpu = nms_rotated(rotated_boxes, scores, iou)
+            r_cuda = nms_rotated(rotated_boxes.cuda(), scores.cuda(), iou)
+
+            assert torch.equal(r_cpu, r_cuda.cpu()), err_msg.format(iou)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_roi_align.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_roi_align.py
@@ -0,0 +1,152 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import numpy as np
+import unittest
+import cv2
+import torch
+from fvcore.common.benchmark import benchmark
+
+from detectron2.layers.roi_align import ROIAlign
+
+
+class ROIAlignTest(unittest.TestCase):
+    def test_forward_output(self):
+        input = np.arange(25).reshape(5, 5).astype("float32")
+        """
+        0  1  2   3 4
+        5  6  7   8 9
+        10 11 12 13 14
+        15 16 17 18 19
+        20 21 22 23 24
+        """
+
+        output = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=False)
+        output_correct = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=True)
+
+        # without correction:
+        old_results = [
+            [7.5, 8, 8.5, 9],
+            [10, 10.5, 11, 11.5],
+            [12.5, 13, 13.5, 14],
+            [15, 15.5, 16, 16.5],
+        ]
+
+        # with 0.5 correction:
+        correct_results = [
+            [4.5, 5.0, 5.5, 6.0],
+            [7.0, 7.5, 8.0, 8.5],
+            [9.5, 10.0, 10.5, 11.0],
+            [12.0, 12.5, 13.0, 13.5],
+        ]
+        # This is an upsampled version of [[6, 7], [11, 12]]
+
+        self.assertTrue(np.allclose(output.flatten(), np.asarray(old_results).flatten()))
+        self.assertTrue(
+            np.allclose(output_correct.flatten(), np.asarray(correct_results).flatten())
+        )
+
+        # Also see similar issues in tensorflow at
+        # https://github.com/tensorflow/tensorflow/issues/26278
+
+    def test_resize(self):
+        H, W = 30, 30
+        input = np.random.rand(H, W).astype("float32") * 100
+        box = [10, 10, 20, 20]
+        output = self._simple_roialign(input, box, (5, 5), aligned=True)
+
+        input2x = cv2.resize(input, (W // 2, H // 2), interpolation=cv2.INTER_LINEAR)
+        box2x = [x / 2 for x in box]
+        output2x = self._simple_roialign(input2x, box2x, (5, 5), aligned=True)
+        diff = np.abs(output2x - output)
+        self.assertTrue(diff.max() < 1e-4)
+
+    def _simple_roialign(self, img, box, resolution, aligned=True):
+        """
+        RoiAlign with scale 1.0 and 0 sample ratio.
+        """
+        if isinstance(resolution, int):
+            resolution = (resolution, resolution)
+        op = ROIAlign(resolution, 1.0, 0, aligned=aligned)
+        input = torch.from_numpy(img[None, None, :, :].astype("float32"))
+
+        rois = [0] + list(box)
+        rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32"))
+        output = op.forward(input, rois)
+        if torch.cuda.is_available():
+            output_cuda = op.forward(input.cuda(), rois.cuda()).cpu()
+            self.assertTrue(torch.allclose(output, output_cuda))
+        return output[0, 0]
+
+    def _simple_roialign_with_grad(self, img, box, resolution, device):
+        if isinstance(resolution, int):
+            resolution = (resolution, resolution)
+
+        op = ROIAlign(resolution, 1.0, 0, aligned=True)
+        input = torch.from_numpy(img[None, None, :, :].astype("float32"))
+
+        rois = [0] + list(box)
+        rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32"))
+        input = input.to(device=device)
+        rois = rois.to(device=device)
+        input.requires_grad = True
+        output = op.forward(input, rois)
+        return input, output
+
+    def test_empty_box(self):
+        img = np.random.rand(5, 5)
+        box = [3, 4, 5, 4]
+        o = self._simple_roialign(img, box, 7)
+        self.assertTrue(o.shape == (7, 7))
+        self.assertTrue((o == 0).all())
+
+        for dev in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            input, output = self._simple_roialign_with_grad(img, box, 7, torch.device(dev))
+            output.sum().backward()
+            self.assertTrue(torch.allclose(input.grad, torch.zeros_like(input)))
+
+    def test_empty_batch(self):
+        input = torch.zeros(0, 3, 10, 10, dtype=torch.float32)
+        rois = torch.zeros(0, 5, dtype=torch.float32)
+        op = ROIAlign((7, 7), 1.0, 0, aligned=True)
+        output = op.forward(input, rois)
+        self.assertTrue(output.shape == (0, 3, 7, 7))
+
+
+def benchmark_roi_align():
+    from detectron2 import _C
+
+    def random_boxes(mean_box, stdev, N, maxsize):
+        ret = torch.rand(N, 4) * stdev + torch.tensor(mean_box, dtype=torch.float)
+        ret.clamp_(min=0, max=maxsize)
+        return ret
+
+    def func(N, C, H, W, nboxes_per_img):
+        input = torch.rand(N, C, H, W)
+        boxes = []
+        batch_idx = []
+        for k in range(N):
+            b = random_boxes([80, 80, 130, 130], 24, nboxes_per_img, H)
+            # try smaller boxes:
+            # b = random_boxes([100, 100, 110, 110], 4, nboxes_per_img, H)
+            boxes.append(b)
+            batch_idx.append(torch.zeros(nboxes_per_img, 1, dtype=torch.float32) + k)
+        boxes = torch.cat(boxes, axis=0)
+        batch_idx = torch.cat(batch_idx, axis=0)
+        boxes = torch.cat([batch_idx, boxes], axis=1)
+
+        input = input.cuda()
+        boxes = boxes.cuda()
+
+        def bench():
+            _C.roi_align_forward(input, boxes, 1.0, 7, 7, 0, True)
+            torch.cuda.synchronize()
+
+        return bench
+
+    args = [dict(N=2, C=512, H=256, W=256, nboxes_per_img=500)]
+    benchmark(func, "cuda_roialign", args, num_iters=20, warmup_iters=1)
+
+
+if __name__ == "__main__":
+    if torch.cuda.is_available():
+        benchmark_roi_align()
+    unittest.main()
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_roi_align_rotated.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_roi_align_rotated.py
@@ -0,0 +1,176 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import unittest
+import cv2
+import torch
+from torch.autograd import Variable, gradcheck
+
+from detectron2.layers.roi_align import ROIAlign
+from detectron2.layers.roi_align_rotated import ROIAlignRotated
+
+logger = logging.getLogger(__name__)
+
+
+class ROIAlignRotatedTest(unittest.TestCase):
+    def _box_to_rotated_box(self, box, angle):
+        return [
+            (box[0] + box[2]) / 2.0,
+            (box[1] + box[3]) / 2.0,
+            box[2] - box[0],
+            box[3] - box[1],
+            angle,
+        ]
+
+    def _rot90(self, img, num):
+        num = num % 4  # note: -1 % 4 == 3
+        for _ in range(num):
+            img = img.transpose(0, 1).flip(0)
+        return img
+
+    def test_forward_output_0_90_180_270(self):
+        for i in range(4):
+            # i = 0, 1, 2, 3 corresponding to 0, 90, 180, 270 degrees
+            img = torch.arange(25, dtype=torch.float32).reshape(5, 5)
+            """
+            0  1  2   3 4
+            5  6  7   8 9
+            10 11 12 13 14
+            15 16 17 18 19
+            20 21 22 23 24
+            """
+            box = [1, 1, 3, 3]
+            rotated_box = self._box_to_rotated_box(box=box, angle=90 * i)
+
+            result = self._simple_roi_align_rotated(img=img, box=rotated_box, resolution=(4, 4))
+
+            # Here's an explanation for 0 degree case:
+            # point 0 in the original input lies at [0.5, 0.5]
+            # (the center of bin [0, 1] x [0, 1])
+            # point 1 in the original input lies at [1.5, 0.5], etc.
+            # since the resolution is (4, 4) that divides [1, 3] x [1, 3]
+            # into 4 x 4 equal bins,
+            # the top-left bin is [1, 1.5] x [1, 1.5], and its center
+            # (1.25, 1.25) lies at the 3/4 position
+            # between point 0 and point 1, point 5 and point 6,
+            # point 0 and point 5, point 1 and point 6, so it can be calculated as
+            # 0.25*(0*0.25+1*0.75)+(5*0.25+6*0.75)*0.75 = 4.5
+            result_expected = torch.tensor(
+                [
+                    [4.5, 5.0, 5.5, 6.0],
+                    [7.0, 7.5, 8.0, 8.5],
+                    [9.5, 10.0, 10.5, 11.0],
+                    [12.0, 12.5, 13.0, 13.5],
+                ]
+            )
+            # This is also an upsampled version of [[6, 7], [11, 12]]
+
+            # When the box is rotated by 90 degrees CCW,
+            # the result would be rotated by 90 degrees CW, thus it's -i here
+            result_expected = self._rot90(result_expected, -i)
+
+            assert torch.allclose(result, result_expected)
+
+    def test_resize(self):
+        H, W = 30, 30
+        input = torch.rand(H, W) * 100
+        box = [10, 10, 20, 20]
+        rotated_box = self._box_to_rotated_box(box, angle=0)
+        output = self._simple_roi_align_rotated(img=input, box=rotated_box, resolution=(5, 5))
+
+        input2x = cv2.resize(input.numpy(), (W // 2, H // 2), interpolation=cv2.INTER_LINEAR)
+        input2x = torch.from_numpy(input2x)
+        box2x = [x / 2 for x in box]
+        rotated_box2x = self._box_to_rotated_box(box2x, angle=0)
+        output2x = self._simple_roi_align_rotated(img=input2x, box=rotated_box2x, resolution=(5, 5))
+        assert torch.allclose(output2x, output)
+
+    def _simple_roi_align_rotated(self, img, box, resolution):
+        """
+        RoiAlignRotated with scale 1.0 and 0 sample ratio.
+        """
+        op = ROIAlignRotated(output_size=resolution, spatial_scale=1.0, sampling_ratio=0)
+        input = img[None, None, :, :]
+
+        rois = [0] + list(box)
+        rois = torch.tensor(rois, dtype=torch.float32)[None, :]
+        result_cpu = op.forward(input, rois)
+        if torch.cuda.is_available():
+            result_cuda = op.forward(input.cuda(), rois.cuda())
+            assert torch.allclose(result_cpu, result_cuda.cpu())
+        return result_cpu[0, 0]
+
+    def test_empty_box(self):
+        img = torch.rand(5, 5)
+        out = self._simple_roi_align_rotated(img, [2, 3, 0, 0, 0], (7, 7))
+        self.assertTrue((out == 0).all())
+
+    def test_roi_align_rotated_gradcheck_cpu(self):
+        dtype = torch.float64
+        device = torch.device("cpu")
+        roi_align_rotated_op = ROIAlignRotated(
+            output_size=(5, 5), spatial_scale=0.5, sampling_ratio=1
+        ).to(dtype=dtype, device=device)
+        x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True)
+        # roi format is (batch index, x_center, y_center, width, height, angle)
+        rois = torch.tensor(
+            [[0, 4.5, 4.5, 9, 9, 0], [0, 2, 7, 4, 4, 0], [0, 7, 7, 4, 4, 0]],
+            dtype=dtype,
+            device=device,
+        )
+
+        def func(input):
+            return roi_align_rotated_op(input, rois)
+
+        assert gradcheck(func, (x,)), "gradcheck failed for RoIAlignRotated CPU"
+        assert gradcheck(func, (x.transpose(2, 3),)), "gradcheck failed for RoIAlignRotated CPU"
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_roi_align_rotated_gradient_cuda(self):
+        """
+        Compute gradients for ROIAlignRotated with multiple bounding boxes on the GPU,
+        and compare the result with ROIAlign
+        """
+        # torch.manual_seed(123)
+        dtype = torch.float64
+        device = torch.device("cuda")
+        pool_h, pool_w = (5, 5)
+
+        roi_align = ROIAlign(output_size=(pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to(
+            device=device
+        )
+
+        roi_align_rotated = ROIAlignRotated(
+            output_size=(pool_h, pool_w), spatial_scale=1, sampling_ratio=2
+        ).to(device=device)
+
+        x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True)
+        # x_rotated = x.clone() won't work (will lead to grad_fun=CloneBackward)!
+        x_rotated = Variable(x.data.clone(), requires_grad=True)
+
+        # roi_rotated format is (batch index, x_center, y_center, width, height, angle)
+        rois_rotated = torch.tensor(
+            [[0, 4.5, 4.5, 9, 9, 0], [0, 2, 7, 4, 4, 0], [0, 7, 7, 4, 4, 0]],
+            dtype=dtype,
+            device=device,
+        )
+
+        y_rotated = roi_align_rotated(x_rotated, rois_rotated)
+        s_rotated = y_rotated.sum()
+        s_rotated.backward()
+
+        # roi format is (batch index, x1, y1, x2, y2)
+        rois = torch.tensor(
+            [[0, 0, 0, 9, 9], [0, 0, 5, 4, 9], [0, 5, 5, 9, 9]], dtype=dtype, device=device
+        )
+
+        y = roi_align(x, rois)
+        s = y.sum()
+        s.backward()
+
+        assert torch.allclose(
+            x.grad, x_rotated.grad
+        ), "gradients for ROIAlign and ROIAlignRotated mismatch on CUDA"
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/init.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/init.py
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_anchor_generator.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_anchor_generator.py
@@ -0,0 +1,121 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import unittest
+import torch
+
+from detectron2.config import get_cfg
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.anchor_generator import DefaultAnchorGenerator, RotatedAnchorGenerator
+
+logger = logging.getLogger(__name__)
+
+
+class TestAnchorGenerator(unittest.TestCase):
+    def test_default_anchor_generator(self):
+        cfg = get_cfg()
+        cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]
+        cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1, 4]]
+
+        anchor_generator = DefaultAnchorGenerator(cfg, [ShapeSpec(stride=4)])
+
+        # only the last two dimensions of features matter here
+        num_images = 2
+        features = {"stage3": torch.rand(num_images, 96, 1, 2)}
+        anchors = anchor_generator([features["stage3"]])
+        expected_anchor_tensor = torch.tensor(
+            [
+                [-32.0, -8.0, 32.0, 8.0],
+                [-16.0, -16.0, 16.0, 16.0],
+                [-8.0, -32.0, 8.0, 32.0],
+                [-64.0, -16.0, 64.0, 16.0],
+                [-32.0, -32.0, 32.0, 32.0],
+                [-16.0, -64.0, 16.0, 64.0],
+                [-28.0, -8.0, 36.0, 8.0],  # -28.0 == -32.0 + STRIDE (4)
+                [-12.0, -16.0, 20.0, 16.0],
+                [-4.0, -32.0, 12.0, 32.0],
+                [-60.0, -16.0, 68.0, 16.0],
+                [-28.0, -32.0, 36.0, 32.0],
+                [-12.0, -64.0, 20.0, 64.0],
+            ]
+        )
+
+        assert torch.allclose(anchors[0].tensor, expected_anchor_tensor)
+
+    def test_default_anchor_generator_centered(self):
+        # test explicit args
+        anchor_generator = DefaultAnchorGenerator(
+            sizes=[32, 64], aspect_ratios=[0.25, 1, 4], strides=[4]
+        )
+
+        # only the last two dimensions of features matter here
+        num_images = 2
+        features = {"stage3": torch.rand(num_images, 96, 1, 2)}
+        expected_anchor_tensor = torch.tensor(
+            [
+                [-30.0, -6.0, 34.0, 10.0],
+                [-14.0, -14.0, 18.0, 18.0],
+                [-6.0, -30.0, 10.0, 34.0],
+                [-62.0, -14.0, 66.0, 18.0],
+                [-30.0, -30.0, 34.0, 34.0],
+                [-14.0, -62.0, 18.0, 66.0],
+                [-26.0, -6.0, 38.0, 10.0],
+                [-10.0, -14.0, 22.0, 18.0],
+                [-2.0, -30.0, 14.0, 34.0],
+                [-58.0, -14.0, 70.0, 18.0],
+                [-26.0, -30.0, 38.0, 34.0],
+                [-10.0, -62.0, 22.0, 66.0],
+            ]
+        )
+
+        anchors = anchor_generator([features["stage3"]])
+        assert torch.allclose(anchors[0].tensor, expected_anchor_tensor)
+
+        # doesn't work yet
+        # anchors = torch.jit.script(anchor_generator)([features["stage3"]])
+        # assert torch.allclose(anchors[0].tensor, expected_anchor_tensor)
+
+    def test_rrpn_anchor_generator(self):
+        cfg = get_cfg()
+        cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]
+        cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1, 4]]
+        cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [0, 45]  # test single list[float]
+        anchor_generator = RotatedAnchorGenerator(cfg, [ShapeSpec(stride=4)])
+
+        # only the last two dimensions of features matter here
+        num_images = 2
+        features = {"stage3": torch.rand(num_images, 96, 1, 2)}
+        anchors = anchor_generator([features["stage3"]])
+        expected_anchor_tensor = torch.tensor(
+            [
+                [0.0, 0.0, 64.0, 16.0, 0.0],
+                [0.0, 0.0, 64.0, 16.0, 45.0],
+                [0.0, 0.0, 32.0, 32.0, 0.0],
+                [0.0, 0.0, 32.0, 32.0, 45.0],
+                [0.0, 0.0, 16.0, 64.0, 0.0],
+                [0.0, 0.0, 16.0, 64.0, 45.0],
+                [0.0, 0.0, 128.0, 32.0, 0.0],
+                [0.0, 0.0, 128.0, 32.0, 45.0],
+                [0.0, 0.0, 64.0, 64.0, 0.0],
+                [0.0, 0.0, 64.0, 64.0, 45.0],
+                [0.0, 0.0, 32.0, 128.0, 0.0],
+                [0.0, 0.0, 32.0, 128.0, 45.0],
+                [4.0, 0.0, 64.0, 16.0, 0.0],  # 4.0 == 0.0 + STRIDE (4)
+                [4.0, 0.0, 64.0, 16.0, 45.0],
+                [4.0, 0.0, 32.0, 32.0, 0.0],
+                [4.0, 0.0, 32.0, 32.0, 45.0],
+                [4.0, 0.0, 16.0, 64.0, 0.0],
+                [4.0, 0.0, 16.0, 64.0, 45.0],
+                [4.0, 0.0, 128.0, 32.0, 0.0],
+                [4.0, 0.0, 128.0, 32.0, 45.0],
+                [4.0, 0.0, 64.0, 64.0, 0.0],
+                [4.0, 0.0, 64.0, 64.0, 45.0],
+                [4.0, 0.0, 32.0, 128.0, 0.0],
+                [4.0, 0.0, 32.0, 128.0, 45.0],
+            ]
+        )
+
+        assert torch.allclose(anchors[0].tensor, expected_anchor_tensor)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_box2box_transform.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_box2box_transform.py
@@ -0,0 +1,64 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import unittest
+import torch
+
+from detectron2.modeling.box_regression import Box2BoxTransform, Box2BoxTransformRotated
+
+logger = logging.getLogger(__name__)
+
+
+def random_boxes(mean_box, stdev, N):
+    return torch.rand(N, 4) * stdev + torch.tensor(mean_box, dtype=torch.float)
+
+
+class TestBox2BoxTransform(unittest.TestCase):
+    def test_reconstruction(self):
+        weights = (5, 5, 10, 10)
+        b2b_tfm = Box2BoxTransform(weights=weights)
+        src_boxes = random_boxes([10, 10, 20, 20], 1, 10)
+        dst_boxes = random_boxes([10, 10, 20, 20], 1, 10)
+
+        devices = [torch.device("cpu")]
+        if torch.cuda.is_available():
+            devices.append(torch.device("cuda"))
+        for device in devices:
+            src_boxes = src_boxes.to(device=device)
+            dst_boxes = dst_boxes.to(device=device)
+            deltas = b2b_tfm.get_deltas(src_boxes, dst_boxes)
+            dst_boxes_reconstructed = b2b_tfm.apply_deltas(deltas, src_boxes)
+            assert torch.allclose(dst_boxes, dst_boxes_reconstructed)
+
+
+def random_rotated_boxes(mean_box, std_length, std_angle, N):
+    return torch.cat(
+        [torch.rand(N, 4) * std_length, torch.rand(N, 1) * std_angle], dim=1
+    ) + torch.tensor(mean_box, dtype=torch.float)
+
+
+class TestBox2BoxTransformRotated(unittest.TestCase):
+    def test_reconstruction(self):
+        weights = (5, 5, 10, 10, 1)
+        b2b_transform = Box2BoxTransformRotated(weights=weights)
+        src_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10)
+        dst_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10)
+
+        devices = [torch.device("cpu")]
+        if torch.cuda.is_available():
+            devices.append(torch.device("cuda"))
+        for device in devices:
+            src_boxes = src_boxes.to(device=device)
+            dst_boxes = dst_boxes.to(device=device)
+            deltas = b2b_transform.get_deltas(src_boxes, dst_boxes)
+            dst_boxes_reconstructed = b2b_transform.apply_deltas(deltas, src_boxes)
+            assert torch.allclose(dst_boxes[:, :4], dst_boxes_reconstructed[:, :4], atol=1e-5)
+            # angle difference has to be normalized
+            assert torch.allclose(
+                (dst_boxes[:, 4] - dst_boxes_reconstructed[:, 4] + 180.0) % 360.0 - 180.0,
+                torch.zeros_like(dst_boxes[:, 4]),
+                atol=1e-4,
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_fast_rcnn.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_fast_rcnn.py
@@ -0,0 +1,106 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import unittest
+import torch
+
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.box_regression import Box2BoxTransform, Box2BoxTransformRotated
+from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers
+from detectron2.modeling.roi_heads.rotated_fast_rcnn import RotatedFastRCNNOutputLayers
+from detectron2.structures import Boxes, Instances, RotatedBoxes
+from detectron2.utils.events import EventStorage
+
+logger = logging.getLogger(__name__)
+
+
+class FastRCNNTest(unittest.TestCase):
+    def test_fast_rcnn(self):
+        torch.manual_seed(132)
+
+        box_head_output_size = 8
+
+        box_predictor = FastRCNNOutputLayers(
+            ShapeSpec(channels=box_head_output_size),
+            box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)),
+            num_classes=5,
+        )
+        feature_pooled = torch.rand(2, box_head_output_size)
+        predictions = box_predictor(feature_pooled)
+
+        proposal_boxes = torch.tensor([[0.8, 1.1, 3.2, 2.8], [2.3, 2.5, 7, 8]], dtype=torch.float32)
+        gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32)
+        proposal = Instances((10, 10))
+        proposal.proposal_boxes = Boxes(proposal_boxes)
+        proposal.gt_boxes = Boxes(gt_boxes)
+        proposal.gt_classes = torch.tensor([1, 2])
+
+        with EventStorage():  # capture events in a new storage to discard them
+            losses = box_predictor.losses(predictions, [proposal])
+
+        expected_losses = {
+            "loss_cls": torch.tensor(1.7951188087),
+            "loss_box_reg": torch.tensor(4.0357131958),
+        }
+        for name in expected_losses.keys():
+            assert torch.allclose(losses[name], expected_losses[name])
+
+    def test_fast_rcnn_empty_batch(self, device="cpu"):
+        box_predictor = FastRCNNOutputLayers(
+            ShapeSpec(channels=10),
+            box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)),
+            num_classes=8,
+        ).to(device=device)
+
+        logits = torch.randn(0, 100, requires_grad=True, device=device)
+        deltas = torch.randn(0, 4, requires_grad=True, device=device)
+        losses = box_predictor.losses([logits, deltas], [])
+        for value in losses.values():
+            self.assertTrue(torch.allclose(value, torch.zeros_like(value)))
+        sum(losses.values()).backward()
+        self.assertTrue(logits.grad is not None)
+        self.assertTrue(deltas.grad is not None)
+
+        predictions, _ = box_predictor.inference([logits, deltas], [])
+        self.assertEqual(len(predictions), 0)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_fast_rcnn_empty_batch_cuda(self):
+        self.test_fast_rcnn_empty_batch(device=torch.device("cuda"))
+
+    def test_fast_rcnn_rotated(self):
+        torch.manual_seed(132)
+        box_head_output_size = 8
+
+        box_predictor = RotatedFastRCNNOutputLayers(
+            ShapeSpec(channels=box_head_output_size),
+            box2box_transform=Box2BoxTransformRotated(weights=(10, 10, 5, 5, 1)),
+            num_classes=5,
+        )
+        feature_pooled = torch.rand(2, box_head_output_size)
+        predictions = box_predictor(feature_pooled)
+        proposal_boxes = torch.tensor(
+            [[2, 1.95, 2.4, 1.7, 0], [4.65, 5.25, 4.7, 5.5, 0]], dtype=torch.float32
+        )
+        gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32)
+        proposal = Instances((10, 10))
+        proposal.proposal_boxes = RotatedBoxes(proposal_boxes)
+        proposal.gt_boxes = RotatedBoxes(gt_boxes)
+        proposal.gt_classes = torch.tensor([1, 2])
+
+        with EventStorage():  # capture events in a new storage to discard them
+            losses = box_predictor.losses(predictions, [proposal])
+
+        # Note: the expected losses are slightly different even if
+        # the boxes are essentially the same as in the FastRCNNOutput test, because
+        # bbox_pred in FastRCNNOutputLayers have different Linear layers/initialization
+        # between the two cases.
+        expected_losses = {
+            "loss_cls": torch.tensor(1.7920907736),
+            "loss_box_reg": torch.tensor(4.0410838127),
+        }
+        for name in expected_losses.keys():
+            assert torch.allclose(losses[name], expected_losses[name])
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_model_e2e.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_model_e2e.py
@@ -0,0 +1,154 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+
+import unittest
+import torch
+
+import detectron2.model_zoo as model_zoo
+from detectron2.config import get_cfg
+from detectron2.modeling import build_model
+from detectron2.structures import BitMasks, Boxes, ImageList, Instances
+from detectron2.utils.events import EventStorage
+
+
+def get_model_zoo(config_path):
+    """
+    Like model_zoo.get, but do not load any weights (even pretrained)
+    """
+    cfg_file = model_zoo.get_config_file(config_path)
+    cfg = get_cfg()
+    cfg.merge_from_file(cfg_file)
+    if not torch.cuda.is_available():
+        cfg.MODEL.DEVICE = "cpu"
+    return build_model(cfg)
+
+
+def create_model_input(img, inst=None):
+    if inst is not None:
+        return {"image": img, "instances": inst}
+    else:
+        return {"image": img}
+
+
+def get_empty_instance(h, w):
+    inst = Instances((h, w))
+    inst.gt_boxes = Boxes(torch.rand(0, 4))
+    inst.gt_classes = torch.tensor([]).to(dtype=torch.int64)
+    inst.gt_masks = BitMasks(torch.rand(0, h, w))
+    return inst
+
+
+def get_regular_bitmask_instances(h, w):
+    inst = Instances((h, w))
+    inst.gt_boxes = Boxes(torch.rand(3, 4))
+    inst.gt_boxes.tensor[:, 2:] += inst.gt_boxes.tensor[:, :2]
+    inst.gt_classes = torch.tensor([3, 4, 5]).to(dtype=torch.int64)
+    inst.gt_masks = BitMasks((torch.rand(3, h, w) > 0.5))
+    return inst
+
+
+class ModelE2ETest:
+    def setUp(self):
+        torch.manual_seed(43)
+        self.model = get_model_zoo(self.CONFIG_PATH)
+
+    def _test_eval(self, input_sizes):
+        inputs = [create_model_input(torch.rand(3, s[0], s[1])) for s in input_sizes]
+        self.model.eval()
+        self.model(inputs)
+
+    def _test_train(self, input_sizes, instances):
+        assert len(input_sizes) == len(instances)
+        inputs = [
+            create_model_input(torch.rand(3, s[0], s[1]), inst)
+            for s, inst in zip(input_sizes, instances)
+        ]
+        self.model.train()
+        with EventStorage():
+            losses = self.model(inputs)
+            sum(losses.values()).backward()
+            del losses
+
+    def _inf_tensor(self, *shape):
+        return 1.0 / torch.zeros(*shape, device=self.model.device)
+
+    def _nan_tensor(self, *shape):
+        return torch.zeros(*shape, device=self.model.device).fill_(float("nan"))
+
+    def test_empty_data(self):
+        instances = [get_empty_instance(200, 250), get_empty_instance(200, 249)]
+        self._test_eval([(200, 250), (200, 249)])
+        self._test_train([(200, 250), (200, 249)], instances)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
+    def test_eval_tocpu(self):
+        model = get_model_zoo(self.CONFIG_PATH).cpu()
+        model.eval()
+        input_sizes = [(200, 250), (200, 249)]
+        inputs = [create_model_input(torch.rand(3, s[0], s[1])) for s in input_sizes]
+        model(inputs)
+
+
+class MaskRCNNE2ETest(ModelE2ETest, unittest.TestCase):
+    CONFIG_PATH = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
+
+    def test_half_empty_data(self):
+        instances = [get_empty_instance(200, 250), get_regular_bitmask_instances(200, 249)]
+        self._test_train([(200, 250), (200, 249)], instances)
+
+    # This test is flaky because in some environment the output features are zero due to relu
+    # def test_rpn_inf_nan_data(self):
+    #     self.model.eval()
+    #     for tensor in [self._inf_tensor, self._nan_tensor]:
+    #         images = ImageList(tensor(1, 3, 512, 512), [(510, 510)])
+    #         features = {
+    #             "p2": tensor(1, 256, 256, 256),
+    #             "p3": tensor(1, 256, 128, 128),
+    #             "p4": tensor(1, 256, 64, 64),
+    #             "p5": tensor(1, 256, 32, 32),
+    #             "p6": tensor(1, 256, 16, 16),
+    #         }
+    #         props, _ = self.model.proposal_generator(images, features)
+    #         self.assertEqual(len(props[0]), 0)
+
+    def test_roiheads_inf_nan_data(self):
+        self.model.eval()
+        for tensor in [self._inf_tensor, self._nan_tensor]:
+            images = ImageList(tensor(1, 3, 512, 512), [(510, 510)])
+            features = {
+                "p2": tensor(1, 256, 256, 256),
+                "p3": tensor(1, 256, 128, 128),
+                "p4": tensor(1, 256, 64, 64),
+                "p5": tensor(1, 256, 32, 32),
+                "p6": tensor(1, 256, 16, 16),
+            }
+            props = [Instances((510, 510))]
+            props[0].proposal_boxes = Boxes([[10, 10, 20, 20]]).to(device=self.model.device)
+            props[0].objectness_logits = torch.tensor([1.0]).reshape(1, 1)
+            det, _ = self.model.roi_heads(images, features, props)
+            self.assertEqual(len(det[0]), 0)
+
+
+class RetinaNetE2ETest(ModelE2ETest, unittest.TestCase):
+    CONFIG_PATH = "COCO-Detection/retinanet_R_50_FPN_1x.yaml"
+
+    def test_inf_nan_data(self):
+        self.model.eval()
+        self.model.score_threshold = -999999999
+        for tensor in [self._inf_tensor, self._nan_tensor]:
+            images = ImageList(tensor(1, 3, 512, 512), [(510, 510)])
+            features = [
+                tensor(1, 256, 128, 128),
+                tensor(1, 256, 64, 64),
+                tensor(1, 256, 32, 32),
+                tensor(1, 256, 16, 16),
+                tensor(1, 256, 8, 8),
+            ]
+            anchors = self.model.anchor_generator(features)
+            box_cls, box_delta = self.model.head(features)
+            box_cls = [tensor(*k.shape) for k in box_cls]
+            box_delta = [tensor(*k.shape) for k in box_delta]
+            det = self.model.inference(box_cls, box_delta, anchors, images.image_sizes)
+            # all predictions (if any) are infinite or nan
+            if len(det[0]):
+                self.assertTrue(torch.isfinite(det[0].pred_boxes.tensor).sum() == 0)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_roi_heads.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_roi_heads.py
@@ -0,0 +1,108 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import unittest
+import torch
+
+from detectron2.config import get_cfg
+from detectron2.modeling.backbone import build_backbone
+from detectron2.modeling.proposal_generator.build import build_proposal_generator
+from detectron2.modeling.roi_heads import build_roi_heads
+from detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes
+from detectron2.utils.events import EventStorage
+
+logger = logging.getLogger(__name__)
+
+
+class ROIHeadsTest(unittest.TestCase):
+    def test_roi_heads(self):
+        torch.manual_seed(121)
+        cfg = get_cfg()
+        cfg.MODEL.ROI_HEADS.NAME = "StandardROIHeads"
+        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
+        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
+        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
+        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5)
+        backbone = build_backbone(cfg)
+        num_images = 2
+        images_tensor = torch.rand(num_images, 20, 30)
+        image_sizes = [(10, 10), (20, 30)]
+        images = ImageList(images_tensor, image_sizes)
+        num_channels = 1024
+        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
+
+        image_shape = (15, 15)
+        gt_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32)
+        gt_instance0 = Instances(image_shape)
+        gt_instance0.gt_boxes = Boxes(gt_boxes0)
+        gt_instance0.gt_classes = torch.tensor([2, 1])
+        gt_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]], dtype=torch.float32)
+        gt_instance1 = Instances(image_shape)
+        gt_instance1.gt_boxes = Boxes(gt_boxes1)
+        gt_instance1.gt_classes = torch.tensor([1, 2])
+        gt_instances = [gt_instance0, gt_instance1]
+
+        proposal_generator = build_proposal_generator(cfg, backbone.output_shape())
+        roi_heads = build_roi_heads(cfg, backbone.output_shape())
+
+        with EventStorage():  # capture events in a new storage to discard them
+            proposals, proposal_losses = proposal_generator(images, features, gt_instances)
+            _, detector_losses = roi_heads(images, features, proposals, gt_instances)
+
+        expected_losses = {
+            "loss_cls": torch.tensor(4.4236516953),
+            "loss_box_reg": torch.tensor(0.0091214813),
+        }
+        for name in expected_losses.keys():
+            self.assertTrue(torch.allclose(detector_losses[name], expected_losses[name]))
+
+    def test_rroi_heads(self):
+        torch.manual_seed(121)
+        cfg = get_cfg()
+        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN"
+        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
+        cfg.MODEL.ROI_HEADS.NAME = "RROIHeads"
+        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
+        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
+        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
+        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
+        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated"
+        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1)
+        backbone = build_backbone(cfg)
+        num_images = 2
+        images_tensor = torch.rand(num_images, 20, 30)
+        image_sizes = [(10, 10), (20, 30)]
+        images = ImageList(images_tensor, image_sizes)
+        num_channels = 1024
+        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
+
+        image_shape = (15, 15)
+        gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]], dtype=torch.float32)
+        gt_instance0 = Instances(image_shape)
+        gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0)
+        gt_instance0.gt_classes = torch.tensor([2, 1])
+        gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]], dtype=torch.float32)
+        gt_instance1 = Instances(image_shape)
+        gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1)
+        gt_instance1.gt_classes = torch.tensor([1, 2])
+        gt_instances = [gt_instance0, gt_instance1]
+
+        proposal_generator = build_proposal_generator(cfg, backbone.output_shape())
+        roi_heads = build_roi_heads(cfg, backbone.output_shape())
+
+        with EventStorage():  # capture events in a new storage to discard them
+            proposals, proposal_losses = proposal_generator(images, features, gt_instances)
+            _, detector_losses = roi_heads(images, features, proposals, gt_instances)
+
+        expected_losses = {
+            "loss_cls": torch.tensor(4.381618499755859),
+            "loss_box_reg": torch.tensor(0.0011829272843897343),
+        }
+        for name in expected_losses.keys():
+            err_msg = "detector_losses[{}] = {}, expected losses = {}".format(
+                name, detector_losses[name], expected_losses[name]
+            )
+            self.assertTrue(torch.allclose(detector_losses[name], expected_losses[name]), err_msg)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_roi_pooler.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_roi_pooler.py
@@ -0,0 +1,85 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import unittest
+import torch
+
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.structures import Boxes, RotatedBoxes
+
+logger = logging.getLogger(__name__)
+
+
+class TestROIPooler(unittest.TestCase):
+    def _rand_boxes(self, num_boxes, x_max, y_max):
+        coords = torch.rand(num_boxes, 4)
+        coords[:, 0] *= x_max
+        coords[:, 1] *= y_max
+        coords[:, 2] *= x_max
+        coords[:, 3] *= y_max
+        boxes = torch.zeros(num_boxes, 4)
+        boxes[:, 0] = torch.min(coords[:, 0], coords[:, 2])
+        boxes[:, 1] = torch.min(coords[:, 1], coords[:, 3])
+        boxes[:, 2] = torch.max(coords[:, 0], coords[:, 2])
+        boxes[:, 3] = torch.max(coords[:, 1], coords[:, 3])
+        return boxes
+
+    def _test_roialignv2_roialignrotated_match(self, device):
+        pooler_resolution = 14
+        canonical_level = 4
+        canonical_scale_factor = 2 ** canonical_level
+        pooler_scales = (1.0 / canonical_scale_factor,)
+        sampling_ratio = 0
+
+        N, C, H, W = 2, 4, 10, 8
+        N_rois = 10
+        std = 11
+        mean = 0
+        feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean
+
+        features = [feature.to(device)]
+
+        rois = []
+        rois_rotated = []
+        for _ in range(N):
+            boxes = self._rand_boxes(
+                num_boxes=N_rois, x_max=W * canonical_scale_factor, y_max=H * canonical_scale_factor
+            )
+
+            rotated_boxes = torch.zeros(N_rois, 5)
+            rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+            rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+            rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
+            rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
+            rois.append(Boxes(boxes).to(device))
+            rois_rotated.append(RotatedBoxes(rotated_boxes).to(device))
+
+        roialignv2_pooler = ROIPooler(
+            output_size=pooler_resolution,
+            scales=pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type="ROIAlignV2",
+        )
+
+        roialignv2_out = roialignv2_pooler(features, rois)
+
+        roialignrotated_pooler = ROIPooler(
+            output_size=pooler_resolution,
+            scales=pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type="ROIAlignRotated",
+        )
+
+        roialignrotated_out = roialignrotated_pooler(features, rois_rotated)
+
+        self.assertTrue(torch.allclose(roialignv2_out, roialignrotated_out, atol=1e-4))
+
+    def test_roialignv2_roialignrotated_match_cpu(self):
+        self._test_roialignv2_roialignrotated_match(device="cpu")
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_roialignv2_roialignrotated_match_cuda(self):
+        self._test_roialignv2_roialignrotated_match(device="cuda")
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_rpn.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_rpn.py
@@ -0,0 +1,234 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import unittest
+import torch
+
+from detectron2.config import get_cfg
+from detectron2.modeling.backbone import build_backbone
+from detectron2.modeling.proposal_generator.build import build_proposal_generator
+from detectron2.modeling.proposal_generator.rpn_outputs import find_top_rpn_proposals
+from detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes
+from detectron2.utils.events import EventStorage
+
+logger = logging.getLogger(__name__)
+
+
+class RPNTest(unittest.TestCase):
+    def test_rpn(self):
+        torch.manual_seed(121)
+        cfg = get_cfg()
+        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RPN"
+        cfg.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator"
+        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1)
+        backbone = build_backbone(cfg)
+        proposal_generator = build_proposal_generator(cfg, backbone.output_shape())
+        num_images = 2
+        images_tensor = torch.rand(num_images, 20, 30)
+        image_sizes = [(10, 10), (20, 30)]
+        images = ImageList(images_tensor, image_sizes)
+        image_shape = (15, 15)
+        num_channels = 1024
+        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
+        gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32)
+        gt_instances = Instances(image_shape)
+        gt_instances.gt_boxes = Boxes(gt_boxes)
+        with EventStorage():  # capture events in a new storage to discard them
+            proposals, proposal_losses = proposal_generator(
+                images, features, [gt_instances[0], gt_instances[1]]
+            )
+
+        expected_losses = {
+            "loss_rpn_cls": torch.tensor(0.0804563984),
+            "loss_rpn_loc": torch.tensor(0.0990132466),
+        }
+        for name in expected_losses.keys():
+            err_msg = "proposal_losses[{}] = {}, expected losses = {}".format(
+                name, proposal_losses[name], expected_losses[name]
+            )
+            self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg)
+
+        expected_proposal_boxes = [
+            Boxes(torch.tensor([[0, 0, 10, 10], [7.3365392685, 0, 10, 10]])),
+            Boxes(
+                torch.tensor(
+                    [
+                        [0, 0, 30, 20],
+                        [0, 0, 16.7862777710, 13.1362524033],
+                        [0, 0, 30, 13.3173446655],
+                        [0, 0, 10.8602609634, 20],
+                        [7.7165775299, 0, 27.3875980377, 20],
+                    ]
+                )
+            ),
+        ]
+
+        expected_objectness_logits = [
+            torch.tensor([0.1225359365, -0.0133192837]),
+            torch.tensor([0.1415634006, 0.0989848152, 0.0565387346, -0.0072308783, -0.0428492837]),
+        ]
+
+        for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip(
+            proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits
+        ):
+            self.assertEqual(len(proposal), len(expected_proposal_box))
+            self.assertEqual(proposal.image_size, im_size)
+            self.assertTrue(
+                torch.allclose(proposal.proposal_boxes.tensor, expected_proposal_box.tensor)
+            )
+            self.assertTrue(torch.allclose(proposal.objectness_logits, expected_objectness_logit))
+
+    def test_rrpn(self):
+        torch.manual_seed(121)
+        cfg = get_cfg()
+        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN"
+        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
+        cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]
+        cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1]]
+        cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [[0, 60]]
+        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
+        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
+        backbone = build_backbone(cfg)
+        proposal_generator = build_proposal_generator(cfg, backbone.output_shape())
+        num_images = 2
+        images_tensor = torch.rand(num_images, 20, 30)
+        image_sizes = [(10, 10), (20, 30)]
+        images = ImageList(images_tensor, image_sizes)
+        image_shape = (15, 15)
+        num_channels = 1024
+        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
+        gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32)
+        gt_instances = Instances(image_shape)
+        gt_instances.gt_boxes = RotatedBoxes(gt_boxes)
+        with EventStorage():  # capture events in a new storage to discard them
+            proposals, proposal_losses = proposal_generator(
+                images, features, [gt_instances[0], gt_instances[1]]
+            )
+
+        expected_losses = {
+            "loss_rpn_cls": torch.tensor(0.043263837695121765),
+            "loss_rpn_loc": torch.tensor(0.14432406425476074),
+        }
+        for name in expected_losses.keys():
+            err_msg = "proposal_losses[{}] = {}, expected losses = {}".format(
+                name, proposal_losses[name], expected_losses[name]
+            )
+            self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg)
+
+        expected_proposal_boxes = [
+            RotatedBoxes(
+                torch.tensor(
+                    [
+                        [0.60189795, 1.24095452, 61.98131943, 18.03621292, -4.07244873],
+                        [15.64940453, 1.69624567, 59.59749603, 16.34339333, 2.62692475],
+                        [-3.02982378, -2.69752932, 67.90952301, 59.62455750, 59.97010040],
+                        [16.71863365, 1.98309708, 35.61507797, 32.81484985, 62.92267227],
+                        [0.49432933, -7.92979717, 67.77606201, 62.93098450, -1.85656738],
+                        [8.00880814, 1.36017394, 121.81007385, 32.74150467, 50.44297409],
+                        [16.44299889, -4.82221127, 63.39775848, 61.22503662, 54.12270737],
+                        [5.00000000, 5.00000000, 10.00000000, 10.00000000, -0.76943970],
+                        [17.64130402, -0.98095351, 61.40377808, 16.28918839, 55.53118134],
+                        [0.13016054, 4.60568953, 35.80157471, 32.30180359, 62.52872086],
+                        [-4.26460743, 0.39604485, 124.30079651, 31.84611320, -1.58203125],
+                        [7.52815342, -0.91636634, 62.39784622, 15.45565224, 60.79549789],
+                    ]
+                )
+            ),
+            RotatedBoxes(
+                torch.tensor(
+                    [
+                        [0.07734215, 0.81635046, 65.33510590, 17.34688377, -1.51821899],
+                        [-3.41833067, -3.11320257, 64.17595673, 60.55617905, 58.27033234],
+                        [20.67383385, -6.16561556, 63.60531998, 62.52315903, 54.85546494],
+                        [15.00000000, 10.00000000, 30.00000000, 20.00000000, -0.18218994],
+                        [9.22646523, -6.84775209, 62.09895706, 65.46472931, -2.74307251],
+                        [15.00000000, 4.93451595, 30.00000000, 9.86903191, -0.60272217],
+                        [8.88342094, 2.65560246, 120.95362854, 32.45022202, 55.75970078],
+                        [16.39088631, 2.33887148, 34.78761292, 35.61492920, 60.81977463],
+                        [9.78298569, 10.00000000, 19.56597137, 20.00000000, -0.86660767],
+                        [1.28576660, 5.49873352, 34.93610382, 33.22600174, 60.51599884],
+                        [17.58912468, -1.63270092, 62.96052551, 16.45713997, 52.91245270],
+                        [5.64749718, -1.90428460, 62.37649155, 16.19474792, 61.09543991],
+                        [0.82255805, 2.34931135, 118.83985901, 32.83671188, 56.50753784],
+                        [-5.33874989, 1.64404404, 125.28501892, 33.35424042, -2.80731201],
+                    ]
+                )
+            ),
+        ]
+
+        expected_objectness_logits = [
+            torch.tensor(
+                [
+                    0.10111768,
+                    0.09112845,
+                    0.08466332,
+                    0.07589971,
+                    0.06650183,
+                    0.06350251,
+                    0.04299347,
+                    0.01864817,
+                    0.00986163,
+                    0.00078543,
+                    -0.04573630,
+                    -0.04799230,
+                ]
+            ),
+            torch.tensor(
+                [
+                    0.11373727,
+                    0.09377633,
+                    0.05281663,
+                    0.05143715,
+                    0.04040275,
+                    0.03250912,
+                    0.01307789,
+                    0.01177734,
+                    0.00038105,
+                    -0.00540255,
+                    -0.01194804,
+                    -0.01461012,
+                    -0.03061717,
+                    -0.03599222,
+                ]
+            ),
+        ]
+
+        torch.set_printoptions(precision=8, sci_mode=False)
+
+        for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip(
+            proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits
+        ):
+            self.assertEqual(len(proposal), len(expected_proposal_box))
+            self.assertEqual(proposal.image_size, im_size)
+            # It seems that there's some randomness in the result across different machines:
+            # This test can be run on a local machine for 100 times with exactly the same result,
+            # However, a different machine might produce slightly different results,
+            # thus the atol here.
+            err_msg = "computed proposal boxes = {}, expected {}".format(
+                proposal.proposal_boxes.tensor, expected_proposal_box.tensor
+            )
+            self.assertTrue(
+                torch.allclose(
+                    proposal.proposal_boxes.tensor, expected_proposal_box.tensor, atol=1e-5
+                ),
+                err_msg,
+            )
+
+            err_msg = "computed objectness logits = {}, expected {}".format(
+                proposal.objectness_logits, expected_objectness_logit
+            )
+            self.assertTrue(
+                torch.allclose(proposal.objectness_logits, expected_objectness_logit, atol=1e-5),
+                err_msg,
+            )
+
+    def test_rpn_proposals_inf(self):
+        N, Hi, Wi, A = 3, 3, 3, 3
+        proposals = [torch.rand(N, Hi * Wi * A, 4)]
+        pred_logits = [torch.rand(N, Hi * Wi * A)]
+        pred_logits[0][1][3:5].fill_(float("inf"))
+        images = ImageList.from_tensors([torch.rand(3, 10, 10)] * 3)
+        find_top_rpn_proposals(proposals, pred_logits, images, 0.5, 1000, 1000, 0, False)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/init.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/init.py
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_boxes.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_boxes.py
@@ -0,0 +1,182 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import json
+import math
+import numpy as np
+import unittest
+import torch
+
+from detectron2.structures import Boxes, BoxMode, pairwise_iou
+
+
+class TestBoxMode(unittest.TestCase):
+    def _convert_xy_to_wh(self, x):
+        return BoxMode.convert(x, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+
+    def _convert_xywha_to_xyxy(self, x):
+        return BoxMode.convert(x, BoxMode.XYWHA_ABS, BoxMode.XYXY_ABS)
+
+    def _convert_xywh_to_xywha(self, x):
+        return BoxMode.convert(x, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS)
+
+    def test_box_convert_list(self):
+        for tp in [list, tuple]:
+            box = tp([5.0, 5.0, 10.0, 10.0])
+            output = self._convert_xy_to_wh(box)
+            self.assertIsInstance(output, tp)
+            self.assertIsInstance(output[0], float)
+            self.assertEqual(output, tp([5.0, 5.0, 5.0, 5.0]))
+
+            with self.assertRaises(Exception):
+                self._convert_xy_to_wh([box])
+
+    def test_box_convert_array(self):
+        box = np.asarray([[5, 5, 10, 10], [1, 1, 2, 3]])
+        output = self._convert_xy_to_wh(box)
+        self.assertEqual(output.dtype, box.dtype)
+        self.assertEqual(output.shape, box.shape)
+        self.assertTrue((output[0] == [5, 5, 5, 5]).all())
+        self.assertTrue((output[1] == [1, 1, 1, 2]).all())
+
+    def test_box_convert_cpu_tensor(self):
+        box = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]])
+        output = self._convert_xy_to_wh(box)
+        self.assertEqual(output.dtype, box.dtype)
+        self.assertEqual(output.shape, box.shape)
+        output = output.numpy()
+        self.assertTrue((output[0] == [5, 5, 5, 5]).all())
+        self.assertTrue((output[1] == [1, 1, 1, 2]).all())
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_box_convert_cuda_tensor(self):
+        box = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]]).cuda()
+        output = self._convert_xy_to_wh(box)
+        self.assertEqual(output.dtype, box.dtype)
+        self.assertEqual(output.shape, box.shape)
+        self.assertEqual(output.device, box.device)
+        output = output.cpu().numpy()
+        self.assertTrue((output[0] == [5, 5, 5, 5]).all())
+        self.assertTrue((output[1] == [1, 1, 1, 2]).all())
+
+    def test_box_convert_xywha_to_xyxy_list(self):
+        for tp in [list, tuple]:
+            box = tp([50, 50, 30, 20, 0])
+            output = self._convert_xywha_to_xyxy(box)
+            self.assertIsInstance(output, tp)
+            self.assertEqual(output, tp([35, 40, 65, 60]))
+
+            with self.assertRaises(Exception):
+                self._convert_xywha_to_xyxy([box])
+
+    def test_box_convert_xywha_to_xyxy_array(self):
+        for dtype in [np.float64, np.float32]:
+            box = np.asarray(
+                [
+                    [50, 50, 30, 20, 0],
+                    [50, 50, 30, 20, 90],
+                    [1, 1, math.sqrt(2), math.sqrt(2), -45],
+                ],
+                dtype=dtype,
+            )
+            output = self._convert_xywha_to_xyxy(box)
+            self.assertEqual(output.dtype, box.dtype)
+            expected = np.asarray([[35, 40, 65, 60], [40, 35, 60, 65], [0, 0, 2, 2]], dtype=dtype)
+            self.assertTrue(np.allclose(output, expected, atol=1e-6), "output={}".format(output))
+
+    def test_box_convert_xywha_to_xyxy_tensor(self):
+        for dtype in [torch.float32, torch.float64]:
+            box = torch.tensor(
+                [
+                    [50, 50, 30, 20, 0],
+                    [50, 50, 30, 20, 90],
+                    [1, 1, math.sqrt(2), math.sqrt(2), -45],
+                ],
+                dtype=dtype,
+            )
+            output = self._convert_xywha_to_xyxy(box)
+            self.assertEqual(output.dtype, box.dtype)
+            expected = torch.tensor([[35, 40, 65, 60], [40, 35, 60, 65], [0, 0, 2, 2]], dtype=dtype)
+
+            self.assertTrue(torch.allclose(output, expected, atol=1e-6), "output={}".format(output))
+
+    def test_box_convert_xywh_to_xywha_list(self):
+        for tp in [list, tuple]:
+            box = tp([50, 50, 30, 20])
+            output = self._convert_xywh_to_xywha(box)
+            self.assertIsInstance(output, tp)
+            self.assertEqual(output, tp([65, 60, 30, 20, 0]))
+
+            with self.assertRaises(Exception):
+                self._convert_xywh_to_xywha([box])
+
+    def test_box_convert_xywh_to_xywha_array(self):
+        for dtype in [np.float64, np.float32]:
+            box = np.asarray([[30, 40, 70, 60], [30, 40, 60, 70], [-1, -1, 2, 2]], dtype=dtype)
+            output = self._convert_xywh_to_xywha(box)
+            self.assertEqual(output.dtype, box.dtype)
+            expected = np.asarray(
+                [[65, 70, 70, 60, 0], [60, 75, 60, 70, 0], [0, 0, 2, 2, 0]], dtype=dtype
+            )
+            self.assertTrue(np.allclose(output, expected, atol=1e-6), "output={}".format(output))
+
+    def test_box_convert_xywh_to_xywha_tensor(self):
+        for dtype in [torch.float32, torch.float64]:
+            box = torch.tensor([[30, 40, 70, 60], [30, 40, 60, 70], [-1, -1, 2, 2]], dtype=dtype)
+            output = self._convert_xywh_to_xywha(box)
+            self.assertEqual(output.dtype, box.dtype)
+            expected = torch.tensor(
+                [[65, 70, 70, 60, 0], [60, 75, 60, 70, 0], [0, 0, 2, 2, 0]], dtype=dtype
+            )
+
+            self.assertTrue(torch.allclose(output, expected, atol=1e-6), "output={}".format(output))
+
+    def test_json_serializable(self):
+        payload = {"box_mode": BoxMode.XYWH_REL}
+        try:
+            json.dumps(payload)
+        except Exception:
+            self.fail("JSON serialization failed")
+
+    def test_json_deserializable(self):
+        payload = '{"box_mode": 2}'
+        obj = json.loads(payload)
+        try:
+            obj["box_mode"] = BoxMode(obj["box_mode"])
+        except Exception:
+            self.fail("JSON deserialization failed")
+
+
+class TestBoxIOU(unittest.TestCase):
+    def test_pairwise_iou(self):
+        boxes1 = torch.tensor([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]])
+
+        boxes2 = torch.tensor(
+            [
+                [0.0, 0.0, 1.0, 1.0],
+                [0.0, 0.0, 0.5, 1.0],
+                [0.0, 0.0, 1.0, 0.5],
+                [0.0, 0.0, 0.5, 0.5],
+                [0.5, 0.5, 1.0, 1.0],
+                [0.5, 0.5, 1.5, 1.5],
+            ]
+        )
+
+        expected_ious = torch.tensor(
+            [
+                [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
+                [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
+            ]
+        )
+
+        ious = pairwise_iou(Boxes(boxes1), Boxes(boxes2))
+
+        self.assertTrue(torch.allclose(ious, expected_ious))
+
+
+class TestBoxes(unittest.TestCase):
+    def test_empty_cat(self):
+        x = Boxes.cat([])
+        self.assertTrue(x.tensor.shape, (0, 4))
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_imagelist.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_imagelist.py
@@ -0,0 +1,38 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import unittest
+from typing import Sequence
+import torch
+
+from detectron2.structures import ImageList
+
+
+class TestImageList(unittest.TestCase):
+    def test_imagelist_padding_shape(self):
+        class TensorToImageList(torch.nn.Module):
+            def forward(self, tensors: Sequence[torch.Tensor]):
+                return ImageList.from_tensors(tensors, 4).tensor
+
+        func = torch.jit.trace(
+            TensorToImageList(), ([torch.ones((3, 10, 10), dtype=torch.float32)],)
+        )
+        ret = func([torch.ones((3, 15, 20), dtype=torch.float32)])
+        self.assertEqual(list(ret.shape), [1, 3, 16, 20], str(ret.shape))
+
+        func = torch.jit.trace(
+            TensorToImageList(),
+            (
+                [
+                    torch.ones((3, 16, 10), dtype=torch.float32),
+                    torch.ones((3, 13, 11), dtype=torch.float32),
+                ],
+            ),
+        )
+        ret = func(
+            [
+                torch.ones((3, 25, 20), dtype=torch.float32),
+                torch.ones((3, 10, 10), dtype=torch.float32),
+            ]
+        )
+        # does not support calling with different #images
+        self.assertEqual(list(ret.shape), [2, 3, 28, 20], str(ret.shape))
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_instances.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_instances.py
@@ -0,0 +1,25 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import unittest
+import torch
+
+from detectron2.structures import Instances
+
+
+class TestInstancesIndexing(unittest.TestCase):
+    def test_int_indexing(self):
+        attr1 = torch.tensor([[0.0, 0.0, 1.0], [0.0, 0.0, 0.5], [0.0, 0.0, 1.0], [0.0, 0.5, 0.5]])
+        attr2 = torch.tensor([0.1, 0.2, 0.3, 0.4])
+        instances = Instances((100, 100))
+        instances.attr1 = attr1
+        instances.attr2 = attr2
+        for i in range(-len(instances), len(instances)):
+            inst = instances[i]
+            self.assertEqual((inst.attr1 == attr1[i]).all(), True)
+            self.assertEqual((inst.attr2 == attr2[i]).all(), True)
+
+        self.assertRaises(IndexError, lambda: instances[len(instances)])
+        self.assertRaises(IndexError, lambda: instances[-len(instances) - 1])
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_rotated_boxes.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_rotated_boxes.py
@@ -0,0 +1,357 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from __future__ import absolute_import, division, print_function, unicode_literals
+import logging
+import math
+import random
+import unittest
+import torch
+from fvcore.common.benchmark import benchmark
+
+from detectron2.layers.rotated_boxes import pairwise_iou_rotated
+from detectron2.structures.boxes import Boxes
+from detectron2.structures.rotated_boxes import RotatedBoxes, pairwise_iou
+
+logger = logging.getLogger(__name__)
+
+
+class TestRotatedBoxesLayer(unittest.TestCase):
+    def test_iou_0_dim_cpu(self):
+        boxes1 = torch.rand(0, 5, dtype=torch.float32)
+        boxes2 = torch.rand(10, 5, dtype=torch.float32)
+        expected_ious = torch.zeros(0, 10, dtype=torch.float32)
+        ious = pairwise_iou_rotated(boxes1, boxes2)
+        self.assertTrue(torch.allclose(ious, expected_ious))
+
+        boxes1 = torch.rand(10, 5, dtype=torch.float32)
+        boxes2 = torch.rand(0, 5, dtype=torch.float32)
+        expected_ious = torch.zeros(10, 0, dtype=torch.float32)
+        ious = pairwise_iou_rotated(boxes1, boxes2)
+        self.assertTrue(torch.allclose(ious, expected_ious))
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_iou_0_dim_cuda(self):
+        boxes1 = torch.rand(0, 5, dtype=torch.float32)
+        boxes2 = torch.rand(10, 5, dtype=torch.float32)
+        expected_ious = torch.zeros(0, 10, dtype=torch.float32)
+        ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda())
+        self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious))
+
+        boxes1 = torch.rand(10, 5, dtype=torch.float32)
+        boxes2 = torch.rand(0, 5, dtype=torch.float32)
+        expected_ious = torch.zeros(10, 0, dtype=torch.float32)
+        ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda())
+        self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious))
+
+    def test_iou_half_overlap_cpu(self):
+        boxes1 = torch.tensor([[0.5, 0.5, 1.0, 1.0, 0.0]], dtype=torch.float32)
+        boxes2 = torch.tensor([[0.25, 0.5, 0.5, 1.0, 0.0]], dtype=torch.float32)
+        expected_ious = torch.tensor([[0.5]], dtype=torch.float32)
+        ious = pairwise_iou_rotated(boxes1, boxes2)
+        self.assertTrue(torch.allclose(ious, expected_ious))
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_iou_half_overlap_cuda(self):
+        boxes1 = torch.tensor([[0.5, 0.5, 1.0, 1.0, 0.0]], dtype=torch.float32)
+        boxes2 = torch.tensor([[0.25, 0.5, 0.5, 1.0, 0.0]], dtype=torch.float32)
+        expected_ious = torch.tensor([[0.5]], dtype=torch.float32)
+        ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda())
+        self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious))
+
+    def test_iou_precision(self):
+        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            boxes1 = torch.tensor([[565, 565, 10, 10.0, 0]], dtype=torch.float32, device=device)
+            boxes2 = torch.tensor([[565, 565, 10, 8.3, 0]], dtype=torch.float32, device=device)
+            iou = 8.3 / 10.0
+            expected_ious = torch.tensor([[iou]], dtype=torch.float32)
+            ious = pairwise_iou_rotated(boxes1, boxes2)
+            self.assertTrue(torch.allclose(ious.cpu(), expected_ious))
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_iou_too_many_boxes_cuda(self):
+        s1, s2 = 5, 1289035
+        boxes1 = torch.zeros(s1, 5)
+        boxes2 = torch.zeros(s2, 5)
+        ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda())
+        self.assertTupleEqual(tuple(ious_cuda.shape), (s1, s2))
+
+    def test_iou_extreme(self):
+        # Cause floating point issues in cuda kernels (#1266)
+        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            boxes1 = torch.tensor([[160.0, 153.0, 230.0, 23.0, -37.0]], device=device)
+            boxes2 = torch.tensor(
+                [
+                    [
+                        -1.117407639806935e17,
+                        1.3858420478349148e18,
+                        1000.0000610351562,
+                        1000.0000610351562,
+                        1612.0,
+                    ]
+                ],
+                device=device,
+            )
+            ious = pairwise_iou_rotated(boxes1, boxes2)
+            self.assertTrue(ious.min() >= 0, ious)
+
+
+class TestRotatedBoxesStructure(unittest.TestCase):
+    def test_clip_area_0_degree(self):
+        for _ in range(50):
+            num_boxes = 100
+            boxes_5d = torch.zeros(num_boxes, 5)
+            boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
+            boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
+            boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500)
+            boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500)
+            # Convert from (x_ctr, y_ctr, w, h, 0) to  (x1, y1, x2, y2)
+            boxes_4d = torch.zeros(num_boxes, 4)
+            boxes_4d[:, 0] = boxes_5d[:, 0] - boxes_5d[:, 2] / 2.0
+            boxes_4d[:, 1] = boxes_5d[:, 1] - boxes_5d[:, 3] / 2.0
+            boxes_4d[:, 2] = boxes_5d[:, 0] + boxes_5d[:, 2] / 2.0
+            boxes_4d[:, 3] = boxes_5d[:, 1] + boxes_5d[:, 3] / 2.0
+
+            image_size = (500, 600)
+            test_boxes_4d = Boxes(boxes_4d)
+            test_boxes_5d = RotatedBoxes(boxes_5d)
+            # Before clip
+            areas_4d = test_boxes_4d.area()
+            areas_5d = test_boxes_5d.area()
+            self.assertTrue(torch.allclose(areas_4d, areas_5d, atol=1e-1, rtol=1e-5))
+            # After clip
+            test_boxes_4d.clip(image_size)
+            test_boxes_5d.clip(image_size)
+            areas_4d = test_boxes_4d.area()
+            areas_5d = test_boxes_5d.area()
+            self.assertTrue(torch.allclose(areas_4d, areas_5d, atol=1e-1, rtol=1e-5))
+
+    def test_clip_area_arbitrary_angle(self):
+        num_boxes = 100
+        boxes_5d = torch.zeros(num_boxes, 5)
+        boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
+        boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
+        boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500)
+        boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500)
+        boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800)
+        clip_angle_threshold = random.uniform(0, 180)
+
+        image_size = (500, 600)
+        test_boxes_5d = RotatedBoxes(boxes_5d)
+        # Before clip
+        areas_before = test_boxes_5d.area()
+        # After clip
+        test_boxes_5d.clip(image_size, clip_angle_threshold)
+        areas_diff = test_boxes_5d.area() - areas_before
+
+        # the areas should only decrease after clipping
+        self.assertTrue(torch.all(areas_diff <= 0))
+        # whenever the box is clipped (thus the area shrinks),
+        # the angle for the box must be within the clip_angle_threshold
+        # Note that the clip function will normalize the angle range
+        # to be within (-180, 180]
+        self.assertTrue(
+            torch.all(torch.abs(boxes_5d[:, 4][torch.where(areas_diff < 0)]) < clip_angle_threshold)
+        )
+
+    def test_normalize_angles(self):
+        # torch.manual_seed(0)
+        for _ in range(50):
+            num_boxes = 100
+            boxes_5d = torch.zeros(num_boxes, 5)
+            boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
+            boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
+            boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500)
+            boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500)
+            boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800)
+            rotated_boxes = RotatedBoxes(boxes_5d)
+            normalized_boxes = rotated_boxes.clone()
+            normalized_boxes.normalize_angles()
+            self.assertTrue(torch.all(normalized_boxes.tensor[:, 4] >= -180))
+            self.assertTrue(torch.all(normalized_boxes.tensor[:, 4] < 180))
+            # x, y, w, h should not change
+            self.assertTrue(torch.allclose(boxes_5d[:, :4], normalized_boxes.tensor[:, :4]))
+            # the cos/sin values of the angles should stay the same
+
+            self.assertTrue(
+                torch.allclose(
+                    torch.cos(boxes_5d[:, 4] * math.pi / 180),
+                    torch.cos(normalized_boxes.tensor[:, 4] * math.pi / 180),
+                    atol=1e-5,
+                )
+            )
+
+            self.assertTrue(
+                torch.allclose(
+                    torch.sin(boxes_5d[:, 4] * math.pi / 180),
+                    torch.sin(normalized_boxes.tensor[:, 4] * math.pi / 180),
+                    atol=1e-5,
+                )
+            )
+
+    def test_pairwise_iou_0_degree(self):
+        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            boxes1 = torch.tensor(
+                [[0.5, 0.5, 1.0, 1.0, 0.0], [0.5, 0.5, 1.0, 1.0, 0.0]],
+                dtype=torch.float32,
+                device=device,
+            )
+            boxes2 = torch.tensor(
+                [
+                    [0.5, 0.5, 1.0, 1.0, 0.0],
+                    [0.25, 0.5, 0.5, 1.0, 0.0],
+                    [0.5, 0.25, 1.0, 0.5, 0.0],
+                    [0.25, 0.25, 0.5, 0.5, 0.0],
+                    [0.75, 0.75, 0.5, 0.5, 0.0],
+                    [1.0, 1.0, 1.0, 1.0, 0.0],
+                ],
+                dtype=torch.float32,
+                device=device,
+            )
+            expected_ious = torch.tensor(
+                [
+                    [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
+                    [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
+                ],
+                dtype=torch.float32,
+                device=device,
+            )
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_iou_45_degrees(self):
+        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            boxes1 = torch.tensor(
+                [
+                    [1, 1, math.sqrt(2), math.sqrt(2), 45],
+                    [1, 1, 2 * math.sqrt(2), 2 * math.sqrt(2), -45],
+                ],
+                dtype=torch.float32,
+                device=device,
+            )
+            boxes2 = torch.tensor([[1, 1, 2, 2, 0]], dtype=torch.float32, device=device)
+            expected_ious = torch.tensor([[0.5], [0.5]], dtype=torch.float32, device=device)
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_iou_orthogonal(self):
+        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            boxes1 = torch.tensor([[5, 5, 10, 6, 55]], dtype=torch.float32, device=device)
+            boxes2 = torch.tensor([[5, 5, 10, 6, -35]], dtype=torch.float32, device=device)
+            iou = (6.0 * 6.0) / (6.0 * 6.0 + 4.0 * 6.0 + 4.0 * 6.0)
+            expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device)
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_iou_large_close_boxes(self):
+        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            boxes1 = torch.tensor(
+                [[299.500000, 417.370422, 600.000000, 364.259186, 27.1828]],
+                dtype=torch.float32,
+                device=device,
+            )
+            boxes2 = torch.tensor(
+                [[299.500000, 417.370422, 600.000000, 364.259155, 27.1828]],
+                dtype=torch.float32,
+                device=device,
+            )
+            iou = 364.259155 / 364.259186
+            expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device)
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_iou_many_boxes(self):
+        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            num_boxes1 = 100
+            num_boxes2 = 200
+            boxes1 = torch.stack(
+                [
+                    torch.tensor(
+                        [5 + 20 * i, 5 + 20 * i, 10, 10, 0], dtype=torch.float32, device=device
+                    )
+                    for i in range(num_boxes1)
+                ]
+            )
+            boxes2 = torch.stack(
+                [
+                    torch.tensor(
+                        [5 + 20 * i, 5 + 20 * i, 10, 1 + 9 * i / num_boxes2, 0],
+                        dtype=torch.float32,
+                        device=device,
+                    )
+                    for i in range(num_boxes2)
+                ]
+            )
+            expected_ious = torch.zeros(num_boxes1, num_boxes2, dtype=torch.float32, device=device)
+            for i in range(min(num_boxes1, num_boxes2)):
+                expected_ious[i][i] = (1 + 9 * i / num_boxes2) / 10.0
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_iou_issue1207_simplified(self):
+        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            # Simplified test case of D2-issue-1207
+            boxes1 = torch.tensor([[3, 3, 8, 2, -45.0]], device=device)
+            boxes2 = torch.tensor([[6, 0, 8, 2, -45.0]], device=device)
+            iou = 0.0
+            expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device)
+
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_iou_issue1207(self):
+        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            # The original test case in D2-issue-1207
+            boxes1 = torch.tensor([[160.0, 153.0, 230.0, 23.0, -37.0]], device=device)
+            boxes2 = torch.tensor([[190.0, 127.0, 80.0, 21.0, -46.0]], device=device)
+
+            iou = 0.0
+            expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device)
+
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_empty_cat(self):
+        x = RotatedBoxes.cat([])
+        self.assertTrue(x.tensor.shape, (0, 5))
+
+
+def benchmark_rotated_iou():
+    num_boxes1 = 200
+    num_boxes2 = 500
+    boxes1 = torch.stack(
+        [
+            torch.tensor([5 + 20 * i, 5 + 20 * i, 10, 10, 0], dtype=torch.float32)
+            for i in range(num_boxes1)
+        ]
+    )
+    boxes2 = torch.stack(
+        [
+            torch.tensor(
+                [5 + 20 * i, 5 + 20 * i, 10, 1 + 9 * i / num_boxes2, 0], dtype=torch.float32
+            )
+            for i in range(num_boxes2)
+        ]
+    )
+
+    def func(dev, n=1):
+        b1 = boxes1.to(device=dev)
+        b2 = boxes2.to(device=dev)
+
+        def bench():
+            for _ in range(n):
+                pairwise_iou_rotated(b1, b2)
+            if dev.type == "cuda":
+                torch.cuda.synchronize()
+
+        return bench
+
+    # only run it once per timed loop, since it's slow
+    args = [{"dev": torch.device("cpu"), "n": 1}]
+    if torch.cuda.is_available():
+        args.append({"dev": torch.device("cuda"), "n": 10})
+
+    benchmark(func, "rotated_iou", args, warmup_iters=3)
+
+
+if __name__ == "__main__":
+    unittest.main()
+    benchmark_rotated_iou()
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/test_checkpoint.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/test_checkpoint.py
@@ -0,0 +1,48 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import unittest
+from collections import OrderedDict
+import torch
+from torch import nn
+
+from detectron2.checkpoint.c2_model_loading import align_and_update_state_dicts
+from detectron2.utils.logger import setup_logger
+
+
+class TestCheckpointer(unittest.TestCase):
+    def setUp(self):
+        setup_logger()
+
+    def create_complex_model(self):
+        m = nn.Module()
+        m.block1 = nn.Module()
+        m.block1.layer1 = nn.Linear(2, 3)
+        m.layer2 = nn.Linear(3, 2)
+        m.res = nn.Module()
+        m.res.layer2 = nn.Linear(3, 2)
+
+        state_dict = OrderedDict()
+        state_dict["layer1.weight"] = torch.rand(3, 2)
+        state_dict["layer1.bias"] = torch.rand(3)
+        state_dict["layer2.weight"] = torch.rand(2, 3)
+        state_dict["layer2.bias"] = torch.rand(2)
+        state_dict["res.layer2.weight"] = torch.rand(2, 3)
+        state_dict["res.layer2.bias"] = torch.rand(2)
+        return m, state_dict
+
+    def test_complex_model_loaded(self):
+        for add_data_parallel in [False, True]:
+            model, state_dict = self.create_complex_model()
+            if add_data_parallel:
+                model = nn.DataParallel(model)
+            model_sd = model.state_dict()
+
+            align_and_update_state_dicts(model_sd, state_dict)
+            for loaded, stored in zip(model_sd.values(), state_dict.values()):
+                # different tensor references
+                self.assertFalse(id(loaded) == id(stored))
+                # same content
+                self.assertTrue(loaded.equal(stored))
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/test_config.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/test_config.py
@@ -0,0 +1,240 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+
+import os
+import tempfile
+import unittest
+import torch
+
+from detectron2.config import configurable, downgrade_config, get_cfg, upgrade_config
+from detectron2.layers import ShapeSpec
+
+_V0_CFG = """
+MODEL:
+  RPN_HEAD:
+    NAME: "TEST"
+VERSION: 0
+"""
+
+_V1_CFG = """
+MODEL:
+  WEIGHT: "/path/to/weight"
+"""
+
+
+class TestConfigVersioning(unittest.TestCase):
+    def test_upgrade_downgrade_consistency(self):
+        cfg = get_cfg()
+        # check that custom is preserved
+        cfg.USER_CUSTOM = 1
+
+        down = downgrade_config(cfg, to_version=0)
+        up = upgrade_config(down)
+        self.assertTrue(up == cfg)
+
+    def _merge_cfg_str(self, cfg, merge_str):
+        f = tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False)
+        try:
+            f.write(merge_str)
+            f.close()
+            cfg.merge_from_file(f.name)
+        finally:
+            os.remove(f.name)
+        return cfg
+
+    def test_auto_upgrade(self):
+        cfg = get_cfg()
+        latest_ver = cfg.VERSION
+        cfg.USER_CUSTOM = 1
+
+        self._merge_cfg_str(cfg, _V0_CFG)
+
+        self.assertEqual(cfg.MODEL.RPN.HEAD_NAME, "TEST")
+        self.assertEqual(cfg.VERSION, latest_ver)
+
+    def test_guess_v1(self):
+        cfg = get_cfg()
+        latest_ver = cfg.VERSION
+        self._merge_cfg_str(cfg, _V1_CFG)
+        self.assertEqual(cfg.VERSION, latest_ver)
+
+
+class _TestClassA(torch.nn.Module):
+    @configurable
+    def __init__(self, arg1, arg2, arg3=3):
+        super().__init__()
+        self.arg1 = arg1
+        self.arg2 = arg2
+        self.arg3 = arg3
+        assert arg1 == 1
+        assert arg2 == 2
+        assert arg3 == 3
+
+    @classmethod
+    def from_config(cls, cfg):
+        args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2}
+        return args
+
+
+class _TestClassB(_TestClassA):
+    @configurable
+    def __init__(self, input_shape, arg1, arg2, arg3=3):
+        """
+        Doc of _TestClassB
+        """
+        assert input_shape == "shape"
+        super().__init__(arg1, arg2, arg3)
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):  # test extra positional arg in from_config
+        args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2}
+        args["input_shape"] = input_shape
+        return args
+
+
+class _LegacySubClass(_TestClassB):
+    # an old subclass written in cfg style
+    def __init__(self, cfg, input_shape, arg4=4):
+        super().__init__(cfg, input_shape)
+        assert self.arg1 == 1
+        assert self.arg2 == 2
+        assert self.arg3 == 3
+
+
+class _NewSubClassNewInit(_TestClassB):
+    # test new subclass with a new __init__
+    @configurable
+    def __init__(self, input_shape, arg4=4, **kwargs):
+        super().__init__(input_shape, **kwargs)
+        assert self.arg1 == 1
+        assert self.arg2 == 2
+        assert self.arg3 == 3
+
+
+class _LegacySubClassNotCfg(_TestClassB):
+    # an old subclass written in cfg style, but argument is not called "cfg"
+    def __init__(self, config, input_shape):
+        super().__init__(config, input_shape)
+        assert self.arg1 == 1
+        assert self.arg2 == 2
+        assert self.arg3 == 3
+
+
+class _TestClassC(_TestClassB):
+    @classmethod
+    def from_config(cls, cfg, input_shape, **kwargs):  # test extra kwarg overwrite
+        args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2}
+        args["input_shape"] = input_shape
+        args.update(kwargs)
+        return args
+
+
+class _TestClassD(_TestClassA):
+    @configurable
+    def __init__(self, input_shape: ShapeSpec, arg1: int, arg2, arg3=3):
+        assert input_shape == "shape"
+        super().__init__(arg1, arg2, arg3)
+
+    # _TestClassA.from_config does not have input_shape args.
+    # Test whether input_shape will be forwarded to __init__
+
+
+class TestConfigurable(unittest.TestCase):
+    def testInitWithArgs(self):
+        _ = _TestClassA(arg1=1, arg2=2, arg3=3)
+        _ = _TestClassB("shape", arg1=1, arg2=2)
+        _ = _TestClassC("shape", arg1=1, arg2=2)
+        _ = _TestClassD("shape", arg1=1, arg2=2, arg3=3)
+
+    def testPatchedAttr(self):
+        self.assertTrue("Doc" in _TestClassB.__init__.__doc__)
+        self.assertEqual(_TestClassD.__init__.__annotations__["arg1"], int)
+
+    def testInitWithCfg(self):
+        cfg = get_cfg()
+        cfg.ARG1 = 1
+        cfg.ARG2 = 2
+        cfg.ARG3 = 3
+        _ = _TestClassA(cfg)
+        _ = _TestClassB(cfg, input_shape="shape")
+        _ = _TestClassC(cfg, input_shape="shape")
+        _ = _TestClassD(cfg, input_shape="shape")
+        _ = _LegacySubClass(cfg, input_shape="shape")
+        _ = _NewSubClassNewInit(cfg, input_shape="shape")
+        _ = _LegacySubClassNotCfg(cfg, input_shape="shape")
+        with self.assertRaises(TypeError):
+            # disallow forwarding positional args to __init__ since it's prone to errors
+            _ = _TestClassD(cfg, "shape")
+
+        # call with kwargs instead
+        _ = _TestClassA(cfg=cfg)
+        _ = _TestClassB(cfg=cfg, input_shape="shape")
+        _ = _TestClassC(cfg=cfg, input_shape="shape")
+        _ = _TestClassD(cfg=cfg, input_shape="shape")
+        _ = _LegacySubClass(cfg=cfg, input_shape="shape")
+        _ = _NewSubClassNewInit(cfg=cfg, input_shape="shape")
+        _ = _LegacySubClassNotCfg(config=cfg, input_shape="shape")
+
+    def testInitWithCfgOverwrite(self):
+        cfg = get_cfg()
+        cfg.ARG1 = 1
+        cfg.ARG2 = 999  # wrong config
+        with self.assertRaises(AssertionError):
+            _ = _TestClassA(cfg, arg3=3)
+
+        # overwrite arg2 with correct config later:
+        _ = _TestClassA(cfg, arg2=2, arg3=3)
+        _ = _TestClassB(cfg, input_shape="shape", arg2=2, arg3=3)
+        _ = _TestClassC(cfg, input_shape="shape", arg2=2, arg3=3)
+        _ = _TestClassD(cfg, input_shape="shape", arg2=2, arg3=3)
+
+        # call with kwargs cfg=cfg instead
+        _ = _TestClassA(cfg=cfg, arg2=2, arg3=3)
+        _ = _TestClassB(cfg=cfg, input_shape="shape", arg2=2, arg3=3)
+        _ = _TestClassC(cfg=cfg, input_shape="shape", arg2=2, arg3=3)
+        _ = _TestClassD(cfg=cfg, input_shape="shape", arg2=2, arg3=3)
+
+    def testInitWithCfgWrongArgs(self):
+        cfg = get_cfg()
+        cfg.ARG1 = 1
+        cfg.ARG2 = 2
+        with self.assertRaises(TypeError):
+            _ = _TestClassB(cfg, "shape", not_exist=1)
+        with self.assertRaises(TypeError):
+            _ = _TestClassC(cfg, "shape", not_exist=1)
+        with self.assertRaises(TypeError):
+            _ = _TestClassD(cfg, "shape", not_exist=1)
+
+    def testBadClass(self):
+        class _BadClass1:
+            @configurable
+            def __init__(self, a=1, b=2):
+                pass
+
+        class _BadClass2:
+            @configurable
+            def __init__(self, a=1, b=2):
+                pass
+
+            def from_config(self, cfg):  # noqa
+                pass
+
+        class _BadClass3:
+            @configurable
+            def __init__(self, a=1, b=2):
+                pass
+
+            # bad name: must be cfg
+            @classmethod
+            def from_config(cls, config):  # noqa
+                pass
+
+        with self.assertRaises(AttributeError):
+            _ = _BadClass1(a=1)
+
+        with self.assertRaises(TypeError):
+            _ = _BadClass2(a=1)
+
+        with self.assertRaises(TypeError):
+            _ = _BadClass3(get_cfg())
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/test_export_caffe2.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/test_export_caffe2.py
@@ -0,0 +1,71 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# -*- coding: utf-8 -*-
+
+import copy
+import numpy as np
+import os
+import tempfile
+import unittest
+import cv2
+import torch
+from fvcore.common.file_io import PathManager
+
+from detectron2 import model_zoo
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import get_cfg
+from detectron2.data import DatasetCatalog
+from detectron2.modeling import build_model
+from detectron2.utils.logger import setup_logger
+
+
+@unittest.skipIf(os.environ.get("CIRCLECI"), "Require COCO data and model zoo.")
+class TestCaffe2Export(unittest.TestCase):
+    def setUp(self):
+        setup_logger()
+
+    def _test_model(self, config_path, device="cpu"):
+        # requires extra dependencies
+        from detectron2.export import Caffe2Model, add_export_config, export_caffe2_model
+
+        cfg = get_cfg()
+        cfg.merge_from_file(model_zoo.get_config_file(config_path))
+        cfg = add_export_config(cfg)
+        cfg.MODEL.DEVICE = device
+
+        model = build_model(cfg)
+        DetectionCheckpointer(model).load(model_zoo.get_checkpoint_url(config_path))
+
+        inputs = [{"image": self._get_test_image()}]
+        c2_model = export_caffe2_model(cfg, model, copy.deepcopy(inputs))
+
+        with tempfile.TemporaryDirectory(prefix="detectron2_unittest") as d:
+            c2_model.save_protobuf(d)
+            c2_model.save_graph(os.path.join(d, "test.svg"), inputs=copy.deepcopy(inputs))
+            c2_model = Caffe2Model.load_protobuf(d)
+        c2_model(inputs)[0]["instances"]
+
+    def _get_test_image(self):
+        try:
+            file_name = DatasetCatalog.get("coco_2017_train")[0]["file_name"]
+            assert PathManager.exists(file_name)
+        except Exception:
+            self.skipTest("COCO dataset not available.")
+
+        with PathManager.open(file_name, "rb") as f:
+            buf = f.read()
+        img = cv2.imdecode(np.frombuffer(buf, dtype=np.uint8), cv2.IMREAD_COLOR)
+        assert img is not None, file_name
+        return torch.from_numpy(img.transpose(2, 0, 1))
+
+    def testMaskRCNN(self):
+        self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def testMaskRCNNGPU(self):
+        self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", device="cuda")
+
+    def testRetinaNet(self):
+        self._test_model("COCO-Detection/retinanet_R_50_FPN_3x.yaml")
+
+    def testPanopticFPN(self):
+        self._test_model("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml")
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/test_model_analysis.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/test_model_analysis.py
@@ -0,0 +1,58 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+
+import unittest
+import torch
+
+import detectron2.model_zoo as model_zoo
+from detectron2.config import get_cfg
+from detectron2.modeling import build_model
+from detectron2.utils.analysis import flop_count_operators, parameter_count
+
+
+def get_model_zoo(config_path):
+    """
+    Like model_zoo.get, but do not load any weights (even pretrained)
+    """
+    cfg_file = model_zoo.get_config_file(config_path)
+    cfg = get_cfg()
+    cfg.merge_from_file(cfg_file)
+    if not torch.cuda.is_available():
+        cfg.MODEL.DEVICE = "cpu"
+    return build_model(cfg)
+
+
+class RetinaNetTest(unittest.TestCase):
+    def setUp(self):
+        self.model = get_model_zoo("COCO-Detection/retinanet_R_50_FPN_1x.yaml")
+
+    def test_flop(self):
+        # RetinaNet supports flop-counting with random inputs
+        inputs = [{"image": torch.rand(3, 800, 800)}]
+        res = flop_count_operators(self.model, inputs)
+        self.assertTrue(int(res["conv"]), 146)  # 146B flops
+
+    def test_param_count(self):
+        res = parameter_count(self.model)
+        self.assertTrue(res[""], 37915572)
+        self.assertTrue(res["backbone"], 31452352)
+
+
+class FasterRCNNTest(unittest.TestCase):
+    def setUp(self):
+        self.model = get_model_zoo("COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml")
+
+    def test_flop(self):
+        # Faster R-CNN supports flop-counting with random inputs
+        inputs = [{"image": torch.rand(3, 800, 800)}]
+        res = flop_count_operators(self.model, inputs)
+
+        # This only checks flops for backbone & proposal generator
+        # Flops for box head is not conv, and depends on #proposals, which is
+        # almost 0 for random inputs.
+        self.assertTrue(int(res["conv"]), 117)
+
+    def test_param_count(self):
+        res = parameter_count(self.model)
+        self.assertTrue(res[""], 41699936)
+        self.assertTrue(res["backbone"], 26799296)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/test_model_zoo.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/test_model_zoo.py
@@ -0,0 +1,29 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import unittest
+
+from detectron2 import model_zoo
+from detectron2.modeling import FPN, GeneralizedRCNN
+
+logger = logging.getLogger(__name__)
+
+
+class TestModelZoo(unittest.TestCase):
+    def test_get_returns_model(self):
+        model = model_zoo.get("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml", trained=False)
+        self.assertIsInstance(model, GeneralizedRCNN)
+        self.assertIsInstance(model.backbone, FPN)
+
+    def test_get_invalid_model(self):
+        self.assertRaises(RuntimeError, model_zoo.get, "Invalid/config.yaml")
+
+    def test_get_url(self):
+        url = model_zoo.get_checkpoint_url("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml")
+        self.assertEqual(
+            url,
+            "https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/model_final_01ca85.pkl",  # noqa
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/test_visualizer.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tests/test_visualizer.py
@@ -0,0 +1,143 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# File:
+
+import numpy as np
+import unittest
+import torch
+
+from detectron2.data import MetadataCatalog
+from detectron2.structures import BoxMode, Instances, RotatedBoxes
+from detectron2.utils.visualizer import Visualizer
+
+
+class TestVisualizer(unittest.TestCase):
+    def _random_data(self):
+        H, W = 100, 100
+        N = 10
+        img = np.random.rand(H, W, 3) * 255
+        boxxy = np.random.rand(N, 2) * (H // 2)
+        boxes = np.concatenate((boxxy, boxxy + H // 2), axis=1)
+
+        def _rand_poly():
+            return np.random.rand(3, 2).flatten() * H
+
+        polygons = [[_rand_poly() for _ in range(np.random.randint(1, 5))] for _ in range(N)]
+
+        mask = np.zeros_like(img[:, :, 0], dtype=np.bool)
+        mask[:10, 10:20] = 1
+
+        labels = [str(i) for i in range(N)]
+        return img, boxes, labels, polygons, [mask] * N
+
+    @property
+    def metadata(self):
+        return MetadataCatalog.get("coco_2017_train")
+
+    def test_draw_dataset_dict(self):
+        img = np.random.rand(512, 512, 3) * 255
+        dic = {
+            "annotations": [
+                {
+                    "bbox": [
+                        368.9946492271106,
+                        330.891438763377,
+                        13.148537455410235,
+                        13.644708680142685,
+                    ],
+                    "bbox_mode": BoxMode.XYWH_ABS,
+                    "category_id": 0,
+                    "iscrowd": 1,
+                    "segmentation": {
+                        "counts": "_jh52m?2N2N2N2O100O10O001N1O2MceP2",
+                        "size": [512, 512],
+                    },
+                }
+            ],
+            "height": 512,
+            "image_id": 1,
+            "width": 512,
+        }
+        v = Visualizer(img, self.metadata)
+        v.draw_dataset_dict(dic)
+
+    def test_overlay_instances(self):
+        img, boxes, labels, polygons, masks = self._random_data()
+
+        v = Visualizer(img, self.metadata)
+        output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image()
+        self.assertEqual(output.shape, img.shape)
+
+        # Test 2x scaling
+        v = Visualizer(img, self.metadata, scale=2.0)
+        output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image()
+        self.assertEqual(output.shape[0], img.shape[0] * 2)
+
+        # Test overlay masks
+        v = Visualizer(img, self.metadata)
+        output = v.overlay_instances(masks=masks, boxes=boxes, labels=labels).get_image()
+        self.assertEqual(output.shape, img.shape)
+
+    def test_overlay_instances_no_boxes(self):
+        img, boxes, labels, polygons, _ = self._random_data()
+        v = Visualizer(img, self.metadata)
+        v.overlay_instances(masks=polygons, boxes=None, labels=labels).get_image()
+
+    def test_draw_instance_predictions(self):
+        img, boxes, _, _, masks = self._random_data()
+        num_inst = len(boxes)
+        inst = Instances((img.shape[0], img.shape[1]))
+        inst.pred_classes = torch.randint(0, 80, size=(num_inst,))
+        inst.scores = torch.rand(num_inst)
+        inst.pred_boxes = torch.from_numpy(boxes)
+        inst.pred_masks = torch.from_numpy(np.asarray(masks))
+
+        v = Visualizer(img, self.metadata)
+        v.draw_instance_predictions(inst)
+
+    def test_draw_empty_mask_predictions(self):
+        img, boxes, _, _, masks = self._random_data()
+        num_inst = len(boxes)
+        inst = Instances((img.shape[0], img.shape[1]))
+        inst.pred_classes = torch.randint(0, 80, size=(num_inst,))
+        inst.scores = torch.rand(num_inst)
+        inst.pred_boxes = torch.from_numpy(boxes)
+        inst.pred_masks = torch.from_numpy(np.zeros_like(np.asarray(masks)))
+
+        v = Visualizer(img, self.metadata)
+        v.draw_instance_predictions(inst)
+
+    def test_correct_output_shape(self):
+        img = np.random.rand(928, 928, 3) * 255
+        v = Visualizer(img, self.metadata)
+        out = v.output.get_image()
+        self.assertEqual(out.shape, img.shape)
+
+    def test_overlay_rotated_instances(self):
+        H, W = 100, 150
+        img = np.random.rand(H, W, 3) * 255
+        num_boxes = 50
+        boxes_5d = torch.zeros(num_boxes, 5)
+        boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-0.1 * W, 1.1 * W)
+        boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-0.1 * H, 1.1 * H)
+        boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H))
+        boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H))
+        boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800)
+        rotated_boxes = RotatedBoxes(boxes_5d)
+        labels = [str(i) for i in range(num_boxes)]
+
+        v = Visualizer(img, self.metadata)
+        output = v.overlay_instances(boxes=rotated_boxes, labels=labels).get_image()
+        self.assertEqual(output.shape, img.shape)
+
+    def test_draw_no_metadata(self):
+        img, boxes, _, _, masks = self._random_data()
+        num_inst = len(boxes)
+        inst = Instances((img.shape[0], img.shape[1]))
+        inst.pred_classes = torch.randint(0, 80, size=(num_inst,))
+        inst.scores = torch.rand(num_inst)
+        inst.pred_boxes = torch.from_numpy(boxes)
+        inst.pred_masks = torch.from_numpy(np.asarray(masks))
+
+        v = Visualizer(img, MetadataCatalog.get("asdfasdf"))
+        v.draw_instance_predictions(inst)
				`@@ -0,0 +1 @@`
				`# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved`