Add at new repo again

2025-01-28 21:48:35 +00:00 · 2025-01-28 21:48:35 +00:00 · 6e660ddb3c
commit 6e660ddb3c
564 changed files with 75575 additions and 0 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,15 @@
 torch==2.4.0
 torchvision==0.19.0 
 accelerate==0.31.0
 diffusers==0.31.0
 transformers==4.39.3
 gradio==5.8.0
 numpy==1.23.0
 scikit-image==0.24.0
 huggingface_hub==0.26.5
 onnxruntime==1.20.1
 opencv-python
 matplotlib==3.8.3
 einops==0.7.0
 fastapi[all]
--- a/tests/imgs/garment.jpg
+++ b/tests/imgs/garment.jpg
--- a/tests/imgs/person.jpg
+++ b/tests/imgs/person.jpg
--- a/tests/results/result_20250128_214611_0.png
+++ b/tests/results/result_20250128_214611_0.png
--- a/tests/test.py
+++ b/tests/test.py
@ -0,0 +1,55 @@
 import requests
 import base64
 import os
 from PIL import Image
 import io
 from datetime import datetime
 def image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode()
 def base64_to_image(base64_str):
    image_data = base64.b64decode(base64_str)
    return Image.open(io.BytesIO(image_data))
 def save_results(generated_images, output_dir="results"):
    os.makedirs(output_dir, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    saved_paths = []
    for idx, img_base64 in enumerate(generated_images):
        img = base64_to_image(img_base64)
        output_path = os.path.join(output_dir, f"result_{timestamp}_{idx}.png")
        img.save(output_path)
        saved_paths.append(output_path)
        print(f"Saved image to {output_path}")
    return saved_paths
 data = {
    "model_image": image_to_base64("imgs/person.jpg"),
    "garment_image": image_to_base64("imgs/garment.jpg"),
    "category": "Upper-body",
    "resolution": "768x1024",
    "n_steps": 30,
    "image_scale": 2.0,
    "num_images": 1
 }
 try:
    response = requests.post("http://localhost:8001/try-on", json=data)
    response.raise_for_status()
    result = response.json()
    if result["status"] == "success":
        saved_files = save_results(result["generated_images"])
        print(f"Successfully generated {len(saved_files)} images")
        print(f"Seed used: {result['seed']}")
    else:
        print("Generation failed:", result.get("detail", "Unknown error"))
 except requests.exceptions.RequestException as e:
    print(f"Error making request: {e}")
 except Exception as e:
    print(f"Error processing results: {e}")
--- a/vton-api/preprocess/dwpose/init.py
+++ b/vton-api/preprocess/dwpose/init.py
@ -0,0 +1,68 @@
 # Openpose
 # Original from CMU https://github.com/CMU-Perceptual-Computing-Lab/openpose
 # 2nd Edited by https://github.com/Hzzone/pytorch-openpose
 # 3rd Edited by ControlNet
 # 4th Edited by ControlNet (added face and correct hands)
 import os
 os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
 import torch
 import numpy as np
 from . import util
 from .wholebody import Wholebody
 def draw_pose(pose, H, W):
    bodies = pose['bodies']
    faces = pose['faces']
    hands = pose['hands']
    candidate = bodies['candidate']
    subset = bodies['subset']
    canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8)
    canvas = util.draw_bodypose(canvas, candidate, subset)
    canvas = util.draw_handpose(canvas, hands)
    canvas = util.draw_facepose(canvas, faces)
    return canvas
 class DWposeDetector:
    def __init__(self, model_root, device):
        self.pose_estimation = Wholebody(model_root, device)
    def __call__(self, oriImg):
        oriImg = oriImg.copy()
        H, W, C = oriImg.shape
        with torch.no_grad():
            candidate, subset = self.pose_estimation(oriImg)
            nums, keys, locs = candidate.shape
            candidate[..., 0] /= float(W)
            candidate[..., 1] /= float(H)
            body = candidate[:,:18].copy()
            body = body.reshape(nums*18, locs)
            ori_score = subset[:,:18].copy()
            score = subset[:,:18].copy()
            for i in range(len(score)):
                for j in range(len(score[i])):
                    if score[i][j] > 0.3:
                        score[i][j] = int(18*i+j)
                    else:
                        score[i][j] = -1
            un_visible = subset<0.3
            candidate[un_visible] = -1
            foot = candidate[:,18:24]
            faces = candidate[:,24:92]
            hands = candidate[:,92:113]
            hands = np.vstack([hands, candidate[:,113:]])
            bodies = dict(candidate=body, subset=score)
            pose = dict(bodies=bodies, hands=hands, faces=faces)
            return draw_pose(pose, H, W), body, ori_score, candidate
--- a/vton-api/preprocess/dwpose/pycache/init.cpython-311.pyc
+++ b/vton-api/preprocess/dwpose/pycache/init.cpython-311.pyc
--- a/vton-api/preprocess/dwpose/pycache/onnxdet.cpython-311.pyc
+++ b/vton-api/preprocess/dwpose/pycache/onnxdet.cpython-311.pyc
--- a/vton-api/preprocess/dwpose/pycache/onnxpose.cpython-311.pyc
+++ b/vton-api/preprocess/dwpose/pycache/onnxpose.cpython-311.pyc
--- a/vton-api/preprocess/dwpose/pycache/util.cpython-311.pyc
+++ b/vton-api/preprocess/dwpose/pycache/util.cpython-311.pyc
--- a/vton-api/preprocess/dwpose/pycache/wholebody.cpython-311.pyc
+++ b/vton-api/preprocess/dwpose/pycache/wholebody.cpython-311.pyc
--- a/vton-api/preprocess/dwpose/onnxdet.py
+++ b/vton-api/preprocess/dwpose/onnxdet.py
@ -0,0 +1,125 @@
 import cv2
 import numpy as np
 import onnxruntime
 def nms(boxes, scores, nms_thr):
    """Single class NMS implemented in Numpy."""
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()[::-1]
    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])
        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        ovr = inter / (areas[i] + areas[order[1:]] - inter)
        inds = np.where(ovr <= nms_thr)[0]
        order = order[inds + 1]
    return keep
 def multiclass_nms(boxes, scores, nms_thr, score_thr):
    """Multiclass NMS implemented in Numpy. Class-aware version."""
    final_dets = []
    num_classes = scores.shape[1]
    for cls_ind in range(num_classes):
        cls_scores = scores[:, cls_ind]
        valid_score_mask = cls_scores > score_thr
        if valid_score_mask.sum() == 0:
            continue
        else:
            valid_scores = cls_scores[valid_score_mask]
            valid_boxes = boxes[valid_score_mask]
            keep = nms(valid_boxes, valid_scores, nms_thr)
            if len(keep) > 0:
                cls_inds = np.ones((len(keep), 1)) * cls_ind
                dets = np.concatenate(
                    [valid_boxes[keep], valid_scores[keep, None], cls_inds], 1
                )
                final_dets.append(dets)
    if len(final_dets) == 0:
        return None
    return np.concatenate(final_dets, 0)
 def demo_postprocess(outputs, img_size, p6=False):
    grids = []
    expanded_strides = []
    strides = [8, 16, 32] if not p6 else [8, 16, 32, 64]
    hsizes = [img_size[0] // stride for stride in strides]
    wsizes = [img_size[1] // stride for stride in strides]
    for hsize, wsize, stride in zip(hsizes, wsizes, strides):
        xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))
        grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
        grids.append(grid)
        shape = grid.shape[:2]
        expanded_strides.append(np.full((*shape, 1), stride))
    grids = np.concatenate(grids, 1)
    expanded_strides = np.concatenate(expanded_strides, 1)
    outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides
    outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides
    return outputs
 def preprocess(img, input_size, swap=(2, 0, 1)):
    if len(img.shape) == 3:
        padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
    else:
        padded_img = np.ones(input_size, dtype=np.uint8) * 114
    r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
    resized_img = cv2.resize(
        img,
        (int(img.shape[1] * r), int(img.shape[0] * r)),
        interpolation=cv2.INTER_LINEAR,
    ).astype(np.uint8)
    padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
    padded_img = padded_img.transpose(swap)
    padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
    return padded_img, r
 def inference_detector(session, oriImg):
    input_shape = (640,640)
    img, ratio = preprocess(oriImg, input_shape)
    ort_inputs = {session.get_inputs()[0].name: img[None, :, :, :]}
    output = session.run(None, ort_inputs)
    predictions = demo_postprocess(output[0], input_shape)[0]
    boxes = predictions[:, :4]
    scores = predictions[:, 4:5] * predictions[:, 5:]
    boxes_xyxy = np.ones_like(boxes)
    boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2]/2.
    boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3]/2.
    boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2]/2.
    boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3]/2.
    boxes_xyxy /= ratio
    dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.1)
    if dets is not None:
        final_boxes, final_scores, final_cls_inds = dets[:, :4], dets[:, 4], dets[:, 5]
        isscore = final_scores>0.3
        iscat = final_cls_inds == 0
        isbbox = [ i and j for (i, j) in zip(isscore, iscat)]
        final_boxes = final_boxes[isbbox]
    else:
        final_boxes = np.array([])
    return final_boxes
--- a/vton-api/preprocess/dwpose/onnxpose.py
+++ b/vton-api/preprocess/dwpose/onnxpose.py
@ -0,0 +1,360 @@
 from typing import List, Tuple
 import cv2
 import numpy as np
 import onnxruntime as ort
 def preprocess(
    img: np.ndarray, out_bbox, input_size: Tuple[int, int] = (192, 256)
 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Do preprocessing for RTMPose model inference.
    Args:
        img (np.ndarray): Input image in shape.
        input_size (tuple): Input image size in shape (w, h).
    Returns:
        tuple:
        - resized_img (np.ndarray): Preprocessed image.
        - center (np.ndarray): Center of image.
        - scale (np.ndarray): Scale of image.
    """
    # get shape of image
    img_shape = img.shape[:2]
    out_img, out_center, out_scale = [], [], []
    if len(out_bbox) == 0:
        out_bbox = [[0, 0, img_shape[1], img_shape[0]]]
    for i in range(len(out_bbox)):
        x0 = out_bbox[i][0]
        y0 = out_bbox[i][1]
        x1 = out_bbox[i][2]
        y1 = out_bbox[i][3]
        bbox = np.array([x0, y0, x1, y1])
        # get center and scale
        center, scale = bbox_xyxy2cs(bbox, padding=1.25)
        # do affine transformation
        resized_img, scale = top_down_affine(input_size, scale, center, img)
        # normalize image
        mean = np.array([123.675, 116.28, 103.53])
        std = np.array([58.395, 57.12, 57.375])
        resized_img = (resized_img - mean) / std
        out_img.append(resized_img)
        out_center.append(center)
        out_scale.append(scale)
    return out_img, out_center, out_scale
 def inference(sess: ort.InferenceSession, img: np.ndarray) -> np.ndarray:
    """Inference RTMPose model.
    Args:
        sess (ort.InferenceSession): ONNXRuntime session.
        img (np.ndarray): Input image in shape.
    Returns:
        outputs (np.ndarray): Output of RTMPose model.
    """
    all_out = []
    # build input
    for i in range(len(img)):
        input = [img[i].transpose(2, 0, 1)]
        # build output
        sess_input = {sess.get_inputs()[0].name: input}
        sess_output = []
        for out in sess.get_outputs():
            sess_output.append(out.name)
        # run model
        outputs = sess.run(sess_output, sess_input)
        all_out.append(outputs)
    return all_out
 def postprocess(outputs: List[np.ndarray],
                model_input_size: Tuple[int, int],
                center: Tuple[int, int],
                scale: Tuple[int, int],
                simcc_split_ratio: float = 2.0
                ) -> Tuple[np.ndarray, np.ndarray]:
    """Postprocess for RTMPose model output.
    Args:
        outputs (np.ndarray): Output of RTMPose model.
        model_input_size (tuple): RTMPose model Input image size.
        center (tuple): Center of bbox in shape (x, y).
        scale (tuple): Scale of bbox in shape (w, h).
        simcc_split_ratio (float): Split ratio of simcc.
    Returns:
        tuple:
        - keypoints (np.ndarray): Rescaled keypoints.
        - scores (np.ndarray): Model predict scores.
    """
    all_key = []
    all_score = []
    for i in range(len(outputs)):
        # use simcc to decode
        simcc_x, simcc_y = outputs[i]
        keypoints, scores = decode(simcc_x, simcc_y, simcc_split_ratio)
        # rescale keypoints
        keypoints = keypoints / model_input_size * scale[i] + center[i] - scale[i] / 2
        all_key.append(keypoints[0])
        all_score.append(scores[0])
    return np.array(all_key), np.array(all_score)
 def bbox_xyxy2cs(bbox: np.ndarray,
                 padding: float = 1.) -> Tuple[np.ndarray, np.ndarray]:
    """Transform the bbox format from (x,y,w,h) into (center, scale)
    Args:
        bbox (ndarray): Bounding box(es) in shape (4,) or (n, 4), formatted
            as (left, top, right, bottom)
        padding (float): BBox padding factor that will be multilied to scale.
            Default: 1.0
    Returns:
        tuple: A tuple containing center and scale.
        - np.ndarray[float32]: Center (x, y) of the bbox in shape (2,) or
            (n, 2)
        - np.ndarray[float32]: Scale (w, h) of the bbox in shape (2,) or
            (n, 2)
    """
    # convert single bbox from (4, ) to (1, 4)
    dim = bbox.ndim
    if dim == 1:
        bbox = bbox[None, :]
    # get bbox center and scale
    x1, y1, x2, y2 = np.hsplit(bbox, [1, 2, 3])
    center = np.hstack([x1 + x2, y1 + y2]) * 0.5
    scale = np.hstack([x2 - x1, y2 - y1]) * padding
    if dim == 1:
        center = center[0]
        scale = scale[0]
    return center, scale
 def _fix_aspect_ratio(bbox_scale: np.ndarray,
                      aspect_ratio: float) -> np.ndarray:
    """Extend the scale to match the given aspect ratio.
    Args:
        scale (np.ndarray): The image scale (w, h) in shape (2, )
        aspect_ratio (float): The ratio of ``w/h``
    Returns:
        np.ndarray: The reshaped image scale in (2, )
    """
    w, h = np.hsplit(bbox_scale, [1])
    bbox_scale = np.where(w > h * aspect_ratio,
                          np.hstack([w, w / aspect_ratio]),
                          np.hstack([h * aspect_ratio, h]))
    return bbox_scale
 def _rotate_point(pt: np.ndarray, angle_rad: float) -> np.ndarray:
    """Rotate a point by an angle.
    Args:
        pt (np.ndarray): 2D point coordinates (x, y) in shape (2, )
        angle_rad (float): rotation angle in radian
    Returns:
        np.ndarray: Rotated point in shape (2, )
    """
    sn, cs = np.sin(angle_rad), np.cos(angle_rad)
    rot_mat = np.array([[cs, -sn], [sn, cs]])
    return rot_mat @ pt
 def _get_3rd_point(a: np.ndarray, b: np.ndarray) -> np.ndarray:
    """To calculate the affine matrix, three pairs of points are required. This
    function is used to get the 3rd point, given 2D points a & b.
    The 3rd point is defined by rotating vector `a - b` by 90 degrees
    anticlockwise, using b as the rotation center.
    Args:
        a (np.ndarray): The 1st point (x,y) in shape (2, )
        b (np.ndarray): The 2nd point (x,y) in shape (2, )
    Returns:
        np.ndarray: The 3rd point.
    """
    direction = a - b
    c = b + np.r_[-direction[1], direction[0]]
    return c
 def get_warp_matrix(center: np.ndarray,
                    scale: np.ndarray,
                    rot: float,
                    output_size: Tuple[int, int],
                    shift: Tuple[float, float] = (0., 0.),
                    inv: bool = False) -> np.ndarray:
    """Calculate the affine transformation matrix that can warp the bbox area
    in the input image to the output size.
    Args:
        center (np.ndarray[2, ]): Center of the bounding box (x, y).
        scale (np.ndarray[2, ]): Scale of the bounding box
            wrt [width, height].
        rot (float): Rotation angle (degree).
        output_size (np.ndarray[2, ] | list(2,)): Size of the
            destination heatmaps.
        shift (0-100%): Shift translation ratio wrt the width/height.
            Default (0., 0.).
        inv (bool): Option to inverse the affine transform direction.
            (inv=False: src->dst or inv=True: dst->src)
    Returns:
        np.ndarray: A 2x3 transformation matrix
    """
    shift = np.array(shift)
    src_w = scale[0]
    dst_w = output_size[0]
    dst_h = output_size[1]
    # compute transformation matrix
    rot_rad = np.deg2rad(rot)
    src_dir = _rotate_point(np.array([0., src_w * -0.5]), rot_rad)
    dst_dir = np.array([0., dst_w * -0.5])
    # get four corners of the src rectangle in the original image
    src = np.zeros((3, 2), dtype=np.float32)
    src[0, :] = center + scale * shift
    src[1, :] = center + src_dir + scale * shift
    src[2, :] = _get_3rd_point(src[0, :], src[1, :])
    # get four corners of the dst rectangle in the input image
    dst = np.zeros((3, 2), dtype=np.float32)
    dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
    dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
    dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
    if inv:
        warp_mat = cv2.getAffineTransform(np.float32(dst), np.float32(src))
    else:
        warp_mat = cv2.getAffineTransform(np.float32(src), np.float32(dst))
    return warp_mat
 def top_down_affine(input_size: dict, bbox_scale: dict, bbox_center: dict,
                    img: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    """Get the bbox image as the model input by affine transform.
    Args:
        input_size (dict): The input size of the model.
        bbox_scale (dict): The bbox scale of the img.
        bbox_center (dict): The bbox center of the img.
        img (np.ndarray): The original image.
    Returns:
        tuple: A tuple containing center and scale.
        - np.ndarray[float32]: img after affine transform.
        - np.ndarray[float32]: bbox scale after affine transform.
    """
    w, h = input_size
    warp_size = (int(w), int(h))
    # reshape bbox to fixed aspect ratio
    bbox_scale = _fix_aspect_ratio(bbox_scale, aspect_ratio=w / h)
    # get the affine matrix
    center = bbox_center
    scale = bbox_scale
    rot = 0
    warp_mat = get_warp_matrix(center, scale, rot, output_size=(w, h))
    # do affine transform
    img = cv2.warpAffine(img, warp_mat, warp_size, flags=cv2.INTER_LINEAR)
    return img, bbox_scale
 def get_simcc_maximum(simcc_x: np.ndarray,
                      simcc_y: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    """Get maximum response location and value from simcc representations.
    Note:
        instance number: N
        num_keypoints: K
        heatmap height: H
        heatmap width: W
    Args:
        simcc_x (np.ndarray): x-axis SimCC in shape (K, Wx) or (N, K, Wx)
        simcc_y (np.ndarray): y-axis SimCC in shape (K, Wy) or (N, K, Wy)
    Returns:
        tuple:
        - locs (np.ndarray): locations of maximum heatmap responses in shape
            (K, 2) or (N, K, 2)
        - vals (np.ndarray): values of maximum heatmap responses in shape
            (K,) or (N, K)
    """
    N, K, Wx = simcc_x.shape
    simcc_x = simcc_x.reshape(N * K, -1)
    simcc_y = simcc_y.reshape(N * K, -1)
    # get maximum value locations
    x_locs = np.argmax(simcc_x, axis=1)
    y_locs = np.argmax(simcc_y, axis=1)
    locs = np.stack((x_locs, y_locs), axis=-1).astype(np.float32)
    max_val_x = np.amax(simcc_x, axis=1)
    max_val_y = np.amax(simcc_y, axis=1)
    # get maximum value across x and y axis
    mask = max_val_x > max_val_y
    max_val_x[mask] = max_val_y[mask]
    vals = max_val_x
    locs[vals <= 0.] = -1
    # reshape
    locs = locs.reshape(N, K, 2)
    vals = vals.reshape(N, K)
    return locs, vals
 def decode(simcc_x: np.ndarray, simcc_y: np.ndarray,
           simcc_split_ratio) -> Tuple[np.ndarray, np.ndarray]:
    """Modulate simcc distribution with Gaussian.
    Args:
        simcc_x (np.ndarray[K, Wx]): model predicted simcc in x.
        simcc_y (np.ndarray[K, Wy]): model predicted simcc in y.
        simcc_split_ratio (int): The split ratio of simcc.
    Returns:
        tuple: A tuple containing center and scale.
        - np.ndarray[float32]: keypoints in shape (K, 2) or (n, K, 2)
        - np.ndarray[float32]: scores in shape (K,) or (n, K)
    """
    keypoints, scores = get_simcc_maximum(simcc_x, simcc_y)
    keypoints /= simcc_split_ratio
    return keypoints, scores
 def inference_pose(session, out_bbox, oriImg):
    h, w = session.get_inputs()[0].shape[2:]
    model_input_size = (w, h)
    resized_img, center, scale = preprocess(oriImg, out_bbox, model_input_size)
    outputs = inference(session, resized_img)
    keypoints, scores = postprocess(outputs, model_input_size, center, scale)
    return keypoints, scores
--- a/vton-api/preprocess/dwpose/util.py
+++ b/vton-api/preprocess/dwpose/util.py
@ -0,0 +1,297 @@
 import math
 import numpy as np
 import matplotlib
 import cv2
 eps = 0.01
 def smart_resize(x, s):
    Ht, Wt = s
    if x.ndim == 2:
        Ho, Wo = x.shape
        Co = 1
    else:
        Ho, Wo, Co = x.shape
    if Co == 3 or Co == 1:
        k = float(Ht + Wt) / float(Ho + Wo)
        return cv2.resize(x, (int(Wt), int(Ht)), interpolation=cv2.INTER_AREA if k < 1 else cv2.INTER_LANCZOS4)
    else:
        return np.stack([smart_resize(x[:, :, i], s) for i in range(Co)], axis=2)
 def smart_resize_k(x, fx, fy):
    if x.ndim == 2:
        Ho, Wo = x.shape
        Co = 1
    else:
        Ho, Wo, Co = x.shape
    Ht, Wt = Ho * fy, Wo * fx
    if Co == 3 or Co == 1:
        k = float(Ht + Wt) / float(Ho + Wo)
        return cv2.resize(x, (int(Wt), int(Ht)), interpolation=cv2.INTER_AREA if k < 1 else cv2.INTER_LANCZOS4)
    else:
        return np.stack([smart_resize_k(x[:, :, i], fx, fy) for i in range(Co)], axis=2)
 def padRightDownCorner(img, stride, padValue):
    h = img.shape[0]
    w = img.shape[1]
    pad = 4 * [None]
    pad[0] = 0 # up
    pad[1] = 0 # left
    pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
    pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right
    img_padded = img
    pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1))
    img_padded = np.concatenate((pad_up, img_padded), axis=0)
    pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1))
    img_padded = np.concatenate((pad_left, img_padded), axis=1)
    pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1))
    img_padded = np.concatenate((img_padded, pad_down), axis=0)
    pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1))
    img_padded = np.concatenate((img_padded, pad_right), axis=1)
    return img_padded, pad
 def transfer(model, model_weights):
    transfered_model_weights = {}
    for weights_name in model.state_dict().keys():
        transfered_model_weights[weights_name] = model_weights['.'.join(weights_name.split('.')[1:])]
    return transfered_model_weights
 def draw_bodypose(canvas, candidate, subset):
    H, W, C = canvas.shape
    candidate = np.array(candidate)
    subset = np.array(subset)
    stickwidth = 4
    limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
               [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
               [1, 16], [16, 18], [3, 17], [6, 18]]
    colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
              [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
              [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
    for i in range(17):
        for n in range(len(subset)):
            index = subset[n][np.array(limbSeq[i]) - 1]
            if -1 in index:
                continue
            Y = candidate[index.astype(int), 0] * float(W)
            X = candidate[index.astype(int), 1] * float(H)
            mX = np.mean(X)
            mY = np.mean(Y)
            length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
            angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
            polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
            cv2.fillConvexPoly(canvas, polygon, colors[i])
    canvas = (canvas * 0.6).astype(np.uint8)
    for i in range(18):
        for n in range(len(subset)):
            index = int(subset[n][i])
            if index == -1:
                continue
            x, y = candidate[index][0:2]
            x = int(x * W)
            y = int(y * H)
            cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
    return canvas
 def draw_handpose(canvas, all_hand_peaks):
    H, W, C = canvas.shape
    edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
             [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
    for peaks in all_hand_peaks:
        peaks = np.array(peaks)
        for ie, e in enumerate(edges):
            x1, y1 = peaks[e[0]]
            x2, y2 = peaks[e[1]]
            x1 = int(x1 * W)
            y1 = int(y1 * H)
            x2 = int(x2 * W)
            y2 = int(y2 * H)
            if x1 > eps and y1 > eps and x2 > eps and y2 > eps:
                cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie / float(len(edges)), 1.0, 1.0]) * 255, thickness=2)
        for i, keyponit in enumerate(peaks):
            x, y = keyponit
            x = int(x * W)
            y = int(y * H)
            if x > eps and y > eps:
                cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1)
    return canvas
 def draw_facepose(canvas, all_lmks):
    H, W, C = canvas.shape
    for lmks in all_lmks:
        lmks = np.array(lmks)
        for lmk in lmks:
            x, y = lmk
            x = int(x * W)
            y = int(y * H)
            if x > eps and y > eps:
                cv2.circle(canvas, (x, y), 3, (255, 255, 255), thickness=-1)
    return canvas
 # detect hand according to body pose keypoints
 # please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp
 def handDetect(candidate, subset, oriImg):
    # right hand: wrist 4, elbow 3, shoulder 2
    # left hand: wrist 7, elbow 6, shoulder 5
    ratioWristElbow = 0.33
    detect_result = []
    image_height, image_width = oriImg.shape[0:2]
    for person in subset.astype(int):
        # if any of three not detected
        has_left = np.sum(person[[5, 6, 7]] == -1) == 0
        has_right = np.sum(person[[2, 3, 4]] == -1) == 0
        if not (has_left or has_right):
            continue
        hands = []
        #left hand
        if has_left:
            left_shoulder_index, left_elbow_index, left_wrist_index = person[[5, 6, 7]]
            x1, y1 = candidate[left_shoulder_index][:2]
            x2, y2 = candidate[left_elbow_index][:2]
            x3, y3 = candidate[left_wrist_index][:2]
            hands.append([x1, y1, x2, y2, x3, y3, True])
        # right hand
        if has_right:
            right_shoulder_index, right_elbow_index, right_wrist_index = person[[2, 3, 4]]
            x1, y1 = candidate[right_shoulder_index][:2]
            x2, y2 = candidate[right_elbow_index][:2]
            x3, y3 = candidate[right_wrist_index][:2]
            hands.append([x1, y1, x2, y2, x3, y3, False])
        for x1, y1, x2, y2, x3, y3, is_left in hands:
            # pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox
            # handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]);
            # handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]);
            # const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow);
            # const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder);
            # handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder);
            x = x3 + ratioWristElbow * (x3 - x2)
            y = y3 + ratioWristElbow * (y3 - y2)
            distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
            distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
            width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder)
            # x-y refers to the center --> offset to topLeft point
            # handRectangle.x -= handRectangle.width / 2.f;
            # handRectangle.y -= handRectangle.height / 2.f;
            x -= width / 2
            y -= width / 2  # width = height
            # overflow the image
            if x < 0: x = 0
            if y < 0: y = 0
            width1 = width
            width2 = width
            if x + width > image_width: width1 = image_width - x
            if y + width > image_height: width2 = image_height - y
            width = min(width1, width2)
            # the max hand box value is 20 pixels
            if width >= 20:
                detect_result.append([int(x), int(y), int(width), is_left])
    '''
    return value: [[x, y, w, True if left hand else False]].
    width=height since the network require squared input.
    x, y is the coordinate of top left 
    '''
    return detect_result
 # Written by Lvmin
 def faceDetect(candidate, subset, oriImg):
    # left right eye ear 14 15 16 17
    detect_result = []
    image_height, image_width = oriImg.shape[0:2]
    for person in subset.astype(int):
        has_head = person[0] > -1
        if not has_head:
            continue
        has_left_eye = person[14] > -1
        has_right_eye = person[15] > -1
        has_left_ear = person[16] > -1
        has_right_ear = person[17] > -1
        if not (has_left_eye or has_right_eye or has_left_ear or has_right_ear):
            continue
        head, left_eye, right_eye, left_ear, right_ear = person[[0, 14, 15, 16, 17]]
        width = 0.0
        x0, y0 = candidate[head][:2]
        if has_left_eye:
            x1, y1 = candidate[left_eye][:2]
            d = max(abs(x0 - x1), abs(y0 - y1))
            width = max(width, d * 3.0)
        if has_right_eye:
            x1, y1 = candidate[right_eye][:2]
            d = max(abs(x0 - x1), abs(y0 - y1))
            width = max(width, d * 3.0)
        if has_left_ear:
            x1, y1 = candidate[left_ear][:2]
            d = max(abs(x0 - x1), abs(y0 - y1))
            width = max(width, d * 1.5)
        if has_right_ear:
            x1, y1 = candidate[right_ear][:2]
            d = max(abs(x0 - x1), abs(y0 - y1))
            width = max(width, d * 1.5)
        x, y = x0, y0
        x -= width
        y -= width
        if x < 0:
            x = 0
        if y < 0:
            y = 0
        width1 = width * 2
        width2 = width * 2
        if x + width > image_width:
            width1 = image_width - x
        if y + width > image_height:
            width2 = image_height - y
        width = min(width1, width2)
        if width >= 20:
            detect_result.append([int(x), int(y), int(width)])
    return detect_result
 # get max index of 2d array
 def npmax(array):
    arrayindex = array.argmax(1)
    arrayvalue = array.max(1)
    i = arrayvalue.argmax()
    j = arrayindex[i]
    return i, j
--- a/vton-api/preprocess/dwpose/wholebody.py
+++ b/vton-api/preprocess/dwpose/wholebody.py
@ -0,0 +1,46 @@
 import cv2
 import numpy as np
 import os
 import onnxruntime as ort
 from .onnxdet import inference_detector
 from .onnxpose import inference_pose
 class Wholebody:
    def __init__(self, model_root, device):
        providers = ['CPUExecutionProvider'
                 ] if device == 'cpu' else ['CUDAExecutionProvider']
        onnx_det = os.path.join(model_root, 'dwpose/yolox_l.onnx')
        onnx_pose = os.path.join(model_root, 'dwpose/dw-ll_ucoco_384.onnx')
        self.session_det = ort.InferenceSession(path_or_bytes=onnx_det, providers=providers)
        self.session_pose = ort.InferenceSession(path_or_bytes=onnx_pose, providers=providers)
    def __call__(self, oriImg):
        det_result = inference_detector(self.session_det, oriImg)
        keypoints, scores = inference_pose(self.session_pose, det_result, oriImg)
        keypoints_info = np.concatenate(
            (keypoints, scores[..., None]), axis=-1)
        # compute neck joint
        neck = np.mean(keypoints_info[:, [5, 6]], axis=1)
        # neck score when visualizing pred
        neck[:, 2:4] = np.logical_and(
            keypoints_info[:, 5, 2:4] > 0.3,
            keypoints_info[:, 6, 2:4] > 0.3).astype(int)
        new_keypoints_info = np.insert(
            keypoints_info, 17, neck, axis=1)
        mmpose_idx = [
            17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3
        ]
        openpose_idx = [
            1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17
        ]
        new_keypoints_info[:, openpose_idx] = \
            new_keypoints_info[:, mmpose_idx]
        keypoints_info = new_keypoints_info
        keypoints, scores = keypoints_info[
            ..., :2], keypoints_info[..., 2]
        return keypoints, scores
--- a/vton-api/preprocess/humanparsing/pycache/parsing_api.cpython-311.pyc
+++ b/vton-api/preprocess/humanparsing/pycache/parsing_api.cpython-311.pyc
--- a/vton-api/preprocess/humanparsing/pycache/run_parsing.cpython-311.pyc
+++ b/vton-api/preprocess/humanparsing/pycache/run_parsing.cpython-311.pyc
--- a/vton-api/preprocess/humanparsing/datasets/init.py
+++ b/vton-api/preprocess/humanparsing/datasets/init.py
--- a/vton-api/preprocess/humanparsing/datasets/pycache/init.cpython-311.pyc
+++ b/vton-api/preprocess/humanparsing/datasets/pycache/init.cpython-311.pyc
--- a/vton-api/preprocess/humanparsing/datasets/pycache/simple_extractor_dataset.cpython-311.pyc
+++ b/vton-api/preprocess/humanparsing/datasets/pycache/simple_extractor_dataset.cpython-311.pyc
--- a/vton-api/preprocess/humanparsing/datasets/datasets.py
+++ b/vton-api/preprocess/humanparsing/datasets/datasets.py
@ -0,0 +1,201 @@
 #!/usr/bin/env python
 # -*- encoding: utf-8 -*-
 """
@Author  :   Peike Li
@Contact :   peike.li@yahoo.com
@File    :   datasets.py
@Time    :   8/4/19 3:35 PM
@Desc    :
@License :   This source code is licensed under the license found in the
             LICENSE file in the root directory of this source tree.
 """
 import os
 import numpy as np
 import random
 import torch
 import cv2
 from torch.utils import data
 from utils.transforms import get_affine_transform
 class LIPDataSet(data.Dataset):
    def __init__(self, root, dataset, crop_size=[473, 473], scale_factor=0.25,
                 rotation_factor=30, ignore_label=255, transform=None):
        self.root = root
        self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0]
        self.crop_size = np.asarray(crop_size)
        self.ignore_label = ignore_label
        self.scale_factor = scale_factor
        self.rotation_factor = rotation_factor
        self.flip_prob = 0.5
        self.transform = transform
        self.dataset = dataset
        list_path = os.path.join(self.root, self.dataset + '_id.txt')
        train_list = [i_id.strip() for i_id in open(list_path)]
        self.train_list = train_list
        self.number_samples = len(self.train_list)
    def __len__(self):
        return self.number_samples
    def _box2cs(self, box):
        x, y, w, h = box[:4]
        return self._xywh2cs(x, y, w, h)
    def _xywh2cs(self, x, y, w, h):
        center = np.zeros((2), dtype=np.float32)
        center[0] = x + w * 0.5
        center[1] = y + h * 0.5
        if w > self.aspect_ratio * h:
            h = w * 1.0 / self.aspect_ratio
        elif w < self.aspect_ratio * h:
            w = h * self.aspect_ratio
        scale = np.array([w * 1.0, h * 1.0], dtype=np.float32)
        return center, scale
    def __getitem__(self, index):
        train_item = self.train_list[index]
        im_path = os.path.join(self.root, self.dataset + '_images', train_item + '.jpg')
        parsing_anno_path = os.path.join(self.root, self.dataset + '_segmentations', train_item + '.png')
        im = cv2.imread(im_path, cv2.IMREAD_COLOR)
        h, w, _ = im.shape
        parsing_anno = np.zeros((h, w), dtype=np.long)
        # Get person center and scale
        person_center, s = self._box2cs([0, 0, w - 1, h - 1])
        r = 0
        if self.dataset != 'test':
            # Get pose annotation
            parsing_anno = cv2.imread(parsing_anno_path, cv2.IMREAD_GRAYSCALE)
            if self.dataset == 'train' or self.dataset == 'trainval':
                sf = self.scale_factor
                rf = self.rotation_factor
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
                r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0
                if random.random() <= self.flip_prob:
                    im = im[:, ::-1, :]
                    parsing_anno = parsing_anno[:, ::-1]
                    person_center[0] = im.shape[1] - person_center[0] - 1
                    right_idx = [15, 17, 19]
                    left_idx = [14, 16, 18]
                    for i in range(0, 3):
                        right_pos = np.where(parsing_anno == right_idx[i])
                        left_pos = np.where(parsing_anno == left_idx[i])
                        parsing_anno[right_pos[0], right_pos[1]] = left_idx[i]
                        parsing_anno[left_pos[0], left_pos[1]] = right_idx[i]
        trans = get_affine_transform(person_center, s, r, self.crop_size)
        input = cv2.warpAffine(
            im,
            trans,
            (int(self.crop_size[1]), int(self.crop_size[0])),
            flags=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(0, 0, 0))
        if self.transform:
            input = self.transform(input)
        meta = {
            'name': train_item,
            'center': person_center,
            'height': h,
            'width': w,
            'scale': s,
            'rotation': r
        }
        if self.dataset == 'val' or self.dataset == 'test':
            return input, meta
        else:
            label_parsing = cv2.warpAffine(
                parsing_anno,
                trans,
                (int(self.crop_size[1]), int(self.crop_size[0])),
                flags=cv2.INTER_NEAREST,
                borderMode=cv2.BORDER_CONSTANT,
                borderValue=(255))
            label_parsing = torch.from_numpy(label_parsing)
            return input, label_parsing, meta
 class LIPDataValSet(data.Dataset):
    def __init__(self, root, dataset='val', crop_size=[473, 473], transform=None, flip=False):
        self.root = root
        self.crop_size = crop_size
        self.transform = transform
        self.flip = flip
        self.dataset = dataset
        self.root = root
        self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0]
        self.crop_size = np.asarray(crop_size)
        list_path = os.path.join(self.root, self.dataset + '_id.txt')
        val_list = [i_id.strip() for i_id in open(list_path)]
        self.val_list = val_list
        self.number_samples = len(self.val_list)
    def __len__(self):
        return len(self.val_list)
    def _box2cs(self, box):
        x, y, w, h = box[:4]
        return self._xywh2cs(x, y, w, h)
    def _xywh2cs(self, x, y, w, h):
        center = np.zeros((2), dtype=np.float32)
        center[0] = x + w * 0.5
        center[1] = y + h * 0.5
        if w > self.aspect_ratio * h:
            h = w * 1.0 / self.aspect_ratio
        elif w < self.aspect_ratio * h:
            w = h * self.aspect_ratio
        scale = np.array([w * 1.0, h * 1.0], dtype=np.float32)
        return center, scale
    def __getitem__(self, index):
        val_item = self.val_list[index]
        # Load training image
        im_path = os.path.join(self.root, self.dataset + '_images', val_item + '.jpg')
        im = cv2.imread(im_path, cv2.IMREAD_COLOR)
        h, w, _ = im.shape
        # Get person center and scale
        person_center, s = self._box2cs([0, 0, w - 1, h - 1])
        r = 0
        trans = get_affine_transform(person_center, s, r, self.crop_size)
        input = cv2.warpAffine(
            im,
            trans,
            (int(self.crop_size[1]), int(self.crop_size[0])),
            flags=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(0, 0, 0))
        input = self.transform(input)
        flip_input = input.flip(dims=[-1])
        if self.flip:
            batch_input_im = torch.stack([input, flip_input])
        else:
            batch_input_im = input
        meta = {
            'name': val_item,
            'center': person_center,
            'height': h,
            'width': w,
            'scale': s,
            'rotation': r
        }
        return batch_input_im, meta
--- a/vton-api/preprocess/humanparsing/datasets/simple_extractor_dataset.py
+++ b/vton-api/preprocess/humanparsing/datasets/simple_extractor_dataset.py
@ -0,0 +1,89 @@
 #!/usr/bin/env python
 # -*- encoding: utf-8 -*-
 """
@Author  :   Peike Li
@Contact :   peike.li@yahoo.com
@File    :   dataset.py
@Time    :   8/30/19 9:12 PM
@Desc    :   Dataset Definition
@License :   This source code is licensed under the license found in the
             LICENSE file in the root directory of this source tree.
 """
 import os
 import pdb
 import cv2
 import numpy as np
 from PIL import Image
 from torch.utils import data
 from utils.transforms import get_affine_transform
 class SimpleFolderDataset(data.Dataset):
    def __init__(self, root, input_size=[512, 512], transform=None):
        self.root = root
        self.input_size = input_size
        self.transform = transform
        self.aspect_ratio = input_size[1] * 1.0 / input_size[0]
        self.input_size = np.asarray(input_size)
        self.is_pil_image = False
        if isinstance(root, Image.Image):
            self.file_list = [root]
            self.is_pil_image = True
        elif os.path.isfile(root):
            self.file_list = [os.path.basename(root)]
            self.root = os.path.dirname(root)
        else:
            self.file_list = os.listdir(self.root)
    def __len__(self):
        return len(self.file_list)
    def _box2cs(self, box):
        x, y, w, h = box[:4]
        return self._xywh2cs(x, y, w, h)
    def _xywh2cs(self, x, y, w, h):
        center = np.zeros((2), dtype=np.float32)
        center[0] = x + w * 0.5
        center[1] = y + h * 0.5
        if w > self.aspect_ratio * h:
            h = w * 1.0 / self.aspect_ratio
        elif w < self.aspect_ratio * h:
            w = h * self.aspect_ratio
        scale = np.array([w, h], dtype=np.float32)
        return center, scale
    def __getitem__(self, index):
        if self.is_pil_image:
            img = np.asarray(self.file_list[index])[:, :, [2, 1, 0]]
        else:
            img_name = self.file_list[index]
            img_path = os.path.join(self.root, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        h, w, _ = img.shape
        # Get person center and scale
        person_center, s = self._box2cs([0, 0, w - 1, h - 1])
        r = 0
        trans = get_affine_transform(person_center, s, r, self.input_size)
        input = cv2.warpAffine(
            img,
            trans,
            (int(self.input_size[1]), int(self.input_size[0])),
            flags=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(0, 0, 0))
        input = self.transform(input)
        meta = {
            'center': person_center,
            'height': h,
            'width': w,
            'scale': s,
            'rotation': r
        }
        return input, meta
--- a/vton-api/preprocess/humanparsing/datasets/target_generation.py
+++ b/vton-api/preprocess/humanparsing/datasets/target_generation.py
@ -0,0 +1,40 @@
 import torch
 from torch.nn import functional as F
 def generate_edge_tensor(label, edge_width=3):
    label = label.type(torch.cuda.FloatTensor)
    if len(label.shape) == 2:
        label = label.unsqueeze(0)
    n, h, w = label.shape
    edge = torch.zeros(label.shape, dtype=torch.float).cuda()
    # right
    edge_right = edge[:, 1:h, :]
    edge_right[(label[:, 1:h, :] != label[:, :h - 1, :]) & (label[:, 1:h, :] != 255)
               & (label[:, :h - 1, :] != 255)] = 1
    # up
    edge_up = edge[:, :, :w - 1]
    edge_up[(label[:, :, :w - 1] != label[:, :, 1:w])
            & (label[:, :, :w - 1] != 255)
            & (label[:, :, 1:w] != 255)] = 1
    # upright
    edge_upright = edge[:, :h - 1, :w - 1]
    edge_upright[(label[:, :h - 1, :w - 1] != label[:, 1:h, 1:w])
                 & (label[:, :h - 1, :w - 1] != 255)
                 & (label[:, 1:h, 1:w] != 255)] = 1
    # bottomright
    edge_bottomright = edge[:, :h - 1, 1:w]
    edge_bottomright[(label[:, :h - 1, 1:w] != label[:, 1:h, :w - 1])
                     & (label[:, :h - 1, 1:w] != 255)
                     & (label[:, 1:h, :w - 1] != 255)] = 1
    kernel = torch.ones((1, 1, edge_width, edge_width), dtype=torch.float).cuda()
    with torch.no_grad():
        edge = edge.unsqueeze(1)
        edge = F.conv2d(edge, kernel, stride=1, padding=1)
    edge[edge!=0] = 1
    edge = edge.squeeze()
    return edge
--- a/vton-api/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/human_to_coco.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/human_to_coco.py
@ -0,0 +1,166 @@
 import argparse
 import datetime
 import json
 import os
 from PIL import Image
 import numpy as np
 import pycococreatortools
 def get_arguments():
    parser = argparse.ArgumentParser(description="transform mask annotation to coco annotation")
    parser.add_argument("--dataset", type=str, default='CIHP', help="name of dataset (CIHP, MHPv2 or VIP)")
    parser.add_argument("--json_save_dir", type=str, default='../data/msrcnn_finetune_annotations',
                        help="path to save coco-style annotation json file")
    parser.add_argument("--use_val", type=bool, default=False,
                        help="use train+val set for finetuning or not")
    parser.add_argument("--train_img_dir", type=str, default='../data/instance-level_human_parsing/Training/Images',
                        help="train image path")
    parser.add_argument("--train_anno_dir", type=str,
                        default='../data/instance-level_human_parsing/Training/Human_ids',
                        help="train human mask path")
    parser.add_argument("--val_img_dir", type=str, default='../data/instance-level_human_parsing/Validation/Images',
                        help="val image path")
    parser.add_argument("--val_anno_dir", type=str,
                        default='../data/instance-level_human_parsing/Validation/Human_ids',
                        help="val human mask path")
    return parser.parse_args()
 def main(args):
    INFO = {
        "description": args.split_name + " Dataset",
        "url": "",
        "version": "",
        "year": 2019,
        "contributor": "xyq",
        "date_created": datetime.datetime.utcnow().isoformat(' ')
    }
    LICENSES = [
        {
            "id": 1,
            "name": "",
            "url": ""
        }
    ]
    CATEGORIES = [
        {
            'id': 1,
            'name': 'person',
            'supercategory': 'person',
        },
    ]
    coco_output = {
        "info": INFO,
        "licenses": LICENSES,
        "categories": CATEGORIES,
        "images": [],
        "annotations": []
    }
    image_id = 1
    segmentation_id = 1
    for image_name in os.listdir(args.train_img_dir):
        image = Image.open(os.path.join(args.train_img_dir, image_name))
        image_info = pycococreatortools.create_image_info(
            image_id, image_name, image.size
        )
        coco_output["images"].append(image_info)
        human_mask_name = os.path.splitext(image_name)[0] + '.png'
        human_mask = np.asarray(Image.open(os.path.join(args.train_anno_dir, human_mask_name)))
        human_gt_labels = np.unique(human_mask)
        for i in range(1, len(human_gt_labels)):
            category_info = {'id': 1, 'is_crowd': 0}
            binary_mask = np.uint8(human_mask == i)
            annotation_info = pycococreatortools.create_annotation_info(
                segmentation_id, image_id, category_info, binary_mask,
                image.size, tolerance=10
            )
            if annotation_info is not None:
                coco_output["annotations"].append(annotation_info)
            segmentation_id += 1
        image_id += 1
    if not os.path.exists(args.json_save_dir):
        os.makedirs(args.json_save_dir)
    if not args.use_val:
        with open('{}/{}_train.json'.format(args.json_save_dir, args.split_name), 'w') as output_json_file:
            json.dump(coco_output, output_json_file)
    else:
        for image_name in os.listdir(args.val_img_dir):
            image = Image.open(os.path.join(args.val_img_dir, image_name))
            image_info = pycococreatortools.create_image_info(
                image_id, image_name, image.size
            )
            coco_output["images"].append(image_info)
            human_mask_name = os.path.splitext(image_name)[0] + '.png'
            human_mask = np.asarray(Image.open(os.path.join(args.val_anno_dir, human_mask_name)))
            human_gt_labels = np.unique(human_mask)
            for i in range(1, len(human_gt_labels)):
                category_info = {'id': 1, 'is_crowd': 0}
                binary_mask = np.uint8(human_mask == i)
                annotation_info = pycococreatortools.create_annotation_info(
                    segmentation_id, image_id, category_info, binary_mask,
                    image.size, tolerance=10
                )
                if annotation_info is not None:
                    coco_output["annotations"].append(annotation_info)
                segmentation_id += 1
            image_id += 1
        with open('{}/{}_trainval.json'.format(args.json_save_dir, args.split_name), 'w') as output_json_file:
            json.dump(coco_output, output_json_file)
    coco_output_val = {
        "info": INFO,
        "licenses": LICENSES,
        "categories": CATEGORIES,
        "images": [],
        "annotations": []
    }
    image_id_val = 1
    segmentation_id_val = 1
    for image_name in os.listdir(args.val_img_dir):
        image = Image.open(os.path.join(args.val_img_dir, image_name))
        image_info = pycococreatortools.create_image_info(
            image_id_val, image_name, image.size
        )
        coco_output_val["images"].append(image_info)
        human_mask_name = os.path.splitext(image_name)[0] + '.png'
        human_mask = np.asarray(Image.open(os.path.join(args.val_anno_dir, human_mask_name)))
        human_gt_labels = np.unique(human_mask)
        for i in range(1, len(human_gt_labels)):
            category_info = {'id': 1, 'is_crowd': 0}
            binary_mask = np.uint8(human_mask == i)
            annotation_info = pycococreatortools.create_annotation_info(
                segmentation_id_val, image_id_val, category_info, binary_mask,
                image.size, tolerance=10
            )
            if annotation_info is not None:
                coco_output_val["annotations"].append(annotation_info)
            segmentation_id_val += 1
        image_id_val += 1
    with open('{}/{}_val.json'.format(args.json_save_dir, args.split_name), 'w') as output_json_file_val:
        json.dump(coco_output_val, output_json_file_val)
 if __name__ == "__main__":
    args = get_arguments()
    main(args)
--- a/vton-api/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/pycococreatortools.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/pycococreatortools.py
@ -0,0 +1,114 @@
 import re
 import datetime
 import numpy as np
 from itertools import groupby
 from skimage import measure
 from PIL import Image
 from pycocotools import mask
 convert = lambda text: int(text) if text.isdigit() else text.lower()
 natrual_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
 def resize_binary_mask(array, new_size):
    image = Image.fromarray(array.astype(np.uint8) * 255)
    image = image.resize(new_size)
    return np.asarray(image).astype(np.bool_)
 def close_contour(contour):
    if not np.array_equal(contour[0], contour[-1]):
        contour = np.vstack((contour, contour[0]))
    return contour
 def binary_mask_to_rle(binary_mask):
    rle = {'counts': [], 'size': list(binary_mask.shape)}
    counts = rle.get('counts')
    for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order='F'))):
        if i == 0 and value == 1:
            counts.append(0)
        counts.append(len(list(elements)))
    return rle
 def binary_mask_to_polygon(binary_mask, tolerance=0):
    """Converts a binary mask to COCO polygon representation
    Args:
        binary_mask: a 2D binary numpy array where '1's represent the object
        tolerance: Maximum distance from original points of polygon to approximated
            polygonal chain. If tolerance is 0, the original coordinate array is returned.
    """
    polygons = []
    # pad mask to close contours of shapes which start and end at an edge
    padded_binary_mask = np.pad(binary_mask, pad_width=1, mode='constant', constant_values=0)
    contours = measure.find_contours(padded_binary_mask, 0.5)
    contours = np.subtract(contours, 1)
    for contour in contours:
        contour = close_contour(contour)
        contour = measure.approximate_polygon(contour, tolerance)
        if len(contour) < 3:
            continue
        contour = np.flip(contour, axis=1)
        segmentation = contour.ravel().tolist()
        # after padding and subtracting 1 we may get -0.5 points in our segmentation 
        segmentation = [0 if i < 0 else i for i in segmentation]
        polygons.append(segmentation)
    return polygons
 def create_image_info(image_id, file_name, image_size,
                      date_captured=datetime.datetime.utcnow().isoformat(' '),
                      license_id=1, coco_url="", flickr_url=""):
    image_info = {
        "id": image_id,
        "file_name": file_name,
        "width": image_size[0],
        "height": image_size[1],
        "date_captured": date_captured,
        "license": license_id,
        "coco_url": coco_url,
        "flickr_url": flickr_url
    }
    return image_info
 def create_annotation_info(annotation_id, image_id, category_info, binary_mask,
                           image_size=None, tolerance=2, bounding_box=None):
    if image_size is not None:
        binary_mask = resize_binary_mask(binary_mask, image_size)
    binary_mask_encoded = mask.encode(np.asfortranarray(binary_mask.astype(np.uint8)))
    area = mask.area(binary_mask_encoded)
    if area < 1:
        return None
    if bounding_box is None:
        bounding_box = mask.toBbox(binary_mask_encoded)
    if category_info["is_crowd"]:
        is_crowd = 1
        segmentation = binary_mask_to_rle(binary_mask)
    else:
        is_crowd = 0
        segmentation = binary_mask_to_polygon(binary_mask, tolerance)
        if not segmentation:
            return None
    annotation_info = {
        "id": annotation_id,
        "image_id": image_id,
        "category_id": category_info["id"],
        "iscrowd": is_crowd,
        "area": area.tolist(),
        "bbox": bounding_box.tolist(),
        "segmentation": segmentation,
        "width": binary_mask.shape[1],
        "height": binary_mask.shape[0],
    }
    return annotation_info
--- a/vton-api/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/test_human2coco_format.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/test_human2coco_format.py
@ -0,0 +1,74 @@
 import argparse
 import datetime
 import json
 import os
 from PIL import Image
 import pycococreatortools
 def get_arguments():
    parser = argparse.ArgumentParser(description="transform mask annotation to coco annotation")
    parser.add_argument("--dataset", type=str, default='CIHP', help="name of dataset (CIHP, MHPv2 or VIP)")
    parser.add_argument("--json_save_dir", type=str, default='../data/CIHP/annotations',
                        help="path to save coco-style annotation json file")
    parser.add_argument("--test_img_dir", type=str, default='../data/CIHP/Testing/Images',
                        help="test image path")
    return parser.parse_args()
 args = get_arguments()
 INFO = {
    "description": args.dataset + "Dataset",
    "url": "",
    "version": "",
    "year": 2020,
    "contributor": "yunqiuxu",
    "date_created": datetime.datetime.utcnow().isoformat(' ')
 }
 LICENSES = [
    {
        "id": 1,
        "name": "",
        "url": ""
    }
 ]
 CATEGORIES = [
    {
        'id': 1,
        'name': 'person',
        'supercategory': 'person',
    },
 ]
 def main(args):
    coco_output = {
        "info": INFO,
        "licenses": LICENSES,
        "categories": CATEGORIES,
        "images": [],
        "annotations": []
    }
    image_id = 1
    for image_name in os.listdir(args.test_img_dir):
        image = Image.open(os.path.join(args.test_img_dir, image_name))
        image_info = pycococreatortools.create_image_info(
            image_id, image_name, image.size
        )
        coco_output["images"].append(image_info)
        image_id += 1
    if not os.path.exists(os.path.join(args.json_save_dir)):
        os.mkdir(os.path.join(args.json_save_dir))
    with open('{}/{}.json'.format(args.json_save_dir, args.dataset), 'w') as output_json_file:
        json.dump(coco_output, output_json_file)
 if __name__ == "__main__":
    main(args)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.circleci/config.yml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.circleci/config.yml
@ -0,0 +1,179 @@
 # Python CircleCI 2.0 configuration file
 #
 # Check https://circleci.com/docs/2.0/language-python/ for more details
 #
 version: 2
 # -------------------------------------------------------------------------------------
 # Environments to run the jobs in
 # -------------------------------------------------------------------------------------
 cpu: &cpu
  docker:
    - image: circleci/python:3.6.8-stretch
  resource_class: medium
 gpu: &gpu
  machine:
    image: ubuntu-1604:201903-01
    docker_layer_caching: true
  resource_class: gpu.small
 # -------------------------------------------------------------------------------------
 # Re-usable commands
 # -------------------------------------------------------------------------------------
 install_python: &install_python
  - run:
      name: Install Python
      working_directory: ~/
      command: |
        pyenv install 3.6.1
        pyenv global 3.6.1
 setup_venv: &setup_venv
  - run:
      name: Setup Virtual Env
      working_directory: ~/
      command: |
        python -m venv ~/venv
        echo ". ~/venv/bin/activate" >> $BASH_ENV
        . ~/venv/bin/activate
        python --version
        which python
        which pip
        pip install --upgrade pip
 install_dep: &install_dep
  - run:
      name: Install Dependencies
      command: |
        pip install --progress-bar off -U 'git+https://github.com/facebookresearch/fvcore'
        pip install --progress-bar off cython opencv-python
        pip install --progress-bar off 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
        pip install --progress-bar off torch torchvision
 install_detectron2: &install_detectron2
  - run:
      name: Install Detectron2
      command: |
        gcc --version
        pip install -U --progress-bar off -e .[dev]
        python -m detectron2.utils.collect_env
 install_nvidia_driver: &install_nvidia_driver
  - run:
      name: Install nvidia driver
      working_directory: ~/
      command: |
        wget -q 'https://s3.amazonaws.com/ossci-linux/nvidia_driver/NVIDIA-Linux-x86_64-430.40.run'
        sudo /bin/bash ./NVIDIA-Linux-x86_64-430.40.run -s --no-drm
        nvidia-smi
 run_unittests: &run_unittests
  - run:
      name: Run Unit Tests
      command: |
        python -m unittest discover -v -s tests
 # -------------------------------------------------------------------------------------
 # Jobs to run
 # -------------------------------------------------------------------------------------
 jobs:
  cpu_tests:
    <<: *cpu
    working_directory: ~/detectron2
    steps:
      - checkout
      - <<: *setup_venv
      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
            - cache-key-{{ .Branch }}-ID-20200425
      - <<: *install_dep
      - save_cache:
          paths:
            - ~/venv
          key: cache-key-{{ .Branch }}-ID-20200425
      - <<: *install_detectron2
      - run:
          name: isort
          command: |
            isort -c -sp .
      - run:
          name: black
          command: |
            black --check -l 100 .
      - run:
          name: flake8
          command: |
            flake8 .
      - <<: *run_unittests
  gpu_tests:
    <<: *gpu
    working_directory: ~/detectron2
    steps:
      - checkout
      - <<: *install_nvidia_driver
      - run:
          name: Install nvidia-docker
          working_directory: ~/
          command: |
            curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
            distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
            curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \
            sudo tee /etc/apt/sources.list.d/nvidia-docker.list
            sudo apt-get update && sudo apt-get install -y nvidia-docker2
            # reload the docker daemon configuration
            sudo pkill -SIGHUP dockerd
      - run:
          name: Launch docker
          working_directory: ~/detectron2/docker
          command: |
            nvidia-docker build -t detectron2:v0 -f Dockerfile-circleci .
            nvidia-docker run -itd --name d2 detectron2:v0
            docker exec -it d2 nvidia-smi
      - run:
          name: Build Detectron2
          command: |
            docker exec -it d2 pip install 'git+https://github.com/facebookresearch/fvcore'
            docker cp ~/detectron2 d2:/detectron2
            # This will build d2 for the target GPU arch only
            docker exec -it d2 pip install -e /detectron2
            docker exec -it d2 python3 -m detectron2.utils.collect_env
            docker exec -it d2 python3 -c 'import torch; assert(torch.cuda.is_available())'
      - run:
          name: Run Unit Tests
          command: |
            docker exec -e CIRCLECI=true -it d2 python3 -m unittest discover -v -s /detectron2/tests
 workflows:
  version: 2
  regular_test:
    jobs:
      - cpu_tests
      - gpu_tests
  #nightly_test:
    #jobs:
      #- gpu_tests
    #triggers:
      #- schedule:
          #cron: "0 0 * * *"
          #filters:
            #branches:
              #only:
                #- master
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.clang-format
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.clang-format
@ -0,0 +1,85 @@
 AccessModifierOffset: -1
 AlignAfterOpenBracket: AlwaysBreak
 AlignConsecutiveAssignments: false
 AlignConsecutiveDeclarations: false
 AlignEscapedNewlinesLeft: true
 AlignOperands:   false
 AlignTrailingComments: false
 AllowAllParametersOfDeclarationOnNextLine: false
 AllowShortBlocksOnASingleLine: false
 AllowShortCaseLabelsOnASingleLine: false
 AllowShortFunctionsOnASingleLine: Empty
 AllowShortIfStatementsOnASingleLine: false
 AllowShortLoopsOnASingleLine: false
 AlwaysBreakAfterReturnType: None
 AlwaysBreakBeforeMultilineStrings: true
 AlwaysBreakTemplateDeclarations: true
 BinPackArguments: false
 BinPackParameters: false
 BraceWrapping:
  AfterClass:      false
  AfterControlStatement: false
  AfterEnum:       false
  AfterFunction:   false
  AfterNamespace:  false
  AfterObjCDeclaration: false
  AfterStruct:     false
  AfterUnion:      false
  BeforeCatch:     false
  BeforeElse:      false
  IndentBraces:    false
 BreakBeforeBinaryOperators: None
 BreakBeforeBraces: Attach
 BreakBeforeTernaryOperators: true
 BreakConstructorInitializersBeforeComma: false
 BreakAfterJavaFieldAnnotations: false
 BreakStringLiterals: false
 ColumnLimit:     80
 CommentPragmas:  '^ IWYU pragma:'
 ConstructorInitializerAllOnOneLineOrOnePerLine: true
 ConstructorInitializerIndentWidth: 4
 ContinuationIndentWidth: 4
 Cpp11BracedListStyle: true
 DerivePointerAlignment: false
 DisableFormat:   false
 ForEachMacros:   [ FOR_EACH, FOR_EACH_ENUMERATE, FOR_EACH_KV, FOR_EACH_R, FOR_EACH_RANGE, ]
 IncludeCategories:
  - Regex:           '^<.*\.h(pp)?>'
    Priority:        1
  - Regex:           '^<.*'
    Priority:        2
  - Regex:           '.*'
    Priority:        3
 IndentCaseLabels: true
 IndentWidth:     2
 IndentWrappedFunctionNames: false
 KeepEmptyLinesAtTheStartOfBlocks: false
 MacroBlockBegin: ''
 MacroBlockEnd:   ''
 MaxEmptyLinesToKeep: 1
 NamespaceIndentation: None
 ObjCBlockIndentWidth: 2
 ObjCSpaceAfterProperty: false
 ObjCSpaceBeforeProtocolList: false
 PenaltyBreakBeforeFirstCallParameter: 1
 PenaltyBreakComment: 300
 PenaltyBreakFirstLessLess: 120
 PenaltyBreakString: 1000
 PenaltyExcessCharacter: 1000000
 PenaltyReturnTypeOnItsOwnLine: 200
 PointerAlignment: Left
 ReflowComments:  true
 SortIncludes:    true
 SpaceAfterCStyleCast: false
 SpaceBeforeAssignmentOperators: true
 SpaceBeforeParens: ControlStatements
 SpaceInEmptyParentheses: false
 SpacesBeforeTrailingComments: 1
 SpacesInAngles:  false
 SpacesInContainerLiterals: true
 SpacesInCStyleCastParentheses: false
 SpacesInParentheses: false
 SpacesInSquareBrackets: false
 Standard:        Cpp11
 TabWidth:        8
 UseTab:          Never
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.flake8
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.flake8
@ -0,0 +1,9 @@
 # This is an example .flake8 config, used when developing *Black* itself.
 # Keep in sync with setup.cfg which is used for source packages.
 [flake8]
 ignore = W503, E203, E221, C901, C408, E741
 max-line-length = 100
 max-complexity = 18
 select = B,C,E,F,W,T4,B9
 exclude = build,__init__.py
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/CODE_OF_CONDUCT.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/CODE_OF_CONDUCT.md
@ -0,0 +1,5 @@
 # Code of Conduct
 Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
 Please read the [full text](https://code.fb.com/codeofconduct/)
 so that you can understand what actions will and will not be tolerated.
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/CONTRIBUTING.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/CONTRIBUTING.md
@ -0,0 +1,49 @@
 # Contributing to detectron2
 ## Issues
 We use GitHub issues to track public bugs and questions.
 Please make sure to follow one of the
 [issue templates](https://github.com/facebookresearch/detectron2/issues/new/choose)
 when reporting any issues.
 Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
 disclosure of security bugs. In those cases, please go through the process
 outlined on that page and do not file a public issue.
 ## Pull Requests
 We actively welcome your pull requests.
 However, if you're adding any significant features (e.g. > 50 lines), please
 make sure to have a corresponding issue to discuss your motivation and proposals,
 before sending a PR. We do not always accept new features, and we take the following
 factors into consideration:
 1. Whether the same feature can be achieved without modifying detectron2.
 Detectron2 is designed so that you can implement many extensions from the outside, e.g.
 those in [projects](https://github.com/facebookresearch/detectron2/tree/master/projects).
 If some part is not as extensible, you can also bring up the issue to make it more extensible.
 2. Whether the feature is potentially useful to a large audience, or only to a small portion of users.
 3. Whether the proposed solution has a good design / interface.
 4. Whether the proposed solution adds extra mental/practical overhead to users who don't
   need such feature.
 5. Whether the proposed solution breaks existing APIs.
 When sending a PR, please do:
 1. If a PR contains multiple orthogonal changes, split it to several PRs.
 2. If you've added code that should be tested, add tests.
 3. For PRs that need experiments (e.g. adding a new model or new methods),
 	 you don't need to update model zoo, but do provide experiment results in the description of the PR.
 4. If APIs are changed, update the documentation.
 5. Make sure your code lints with `./dev/linter.sh`.
 ## Contributor License Agreement ("CLA")
 In order to accept your pull request, we need you to submit a CLA. You only need
 to do this once to work on any of Facebook's open source projects.
 Complete your CLA here: <https://code.facebook.com/cla>
 ## License
 By contributing to detectron2, you agree that your contributions will be licensed
 under the LICENSE file in the root directory of this source tree.
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/Detectron2-Logo-Horz.svg
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/Detectron2-Logo-Horz.svg
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE.md
@ -0,0 +1,5 @@
 Please select an issue template from
 https://github.com/facebookresearch/detectron2/issues/new/choose .
 Otherwise your issue will be closed.
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/bugs.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/bugs.md
@ -0,0 +1,36 @@
 ---
 name: "🐛 Bugs"
 about: Report bugs in detectron2
 title: Please read & provide the following
 ---
 ## Instructions To Reproduce the 🐛 Bug:
 1. what changes you made (`git diff`) or what code you wrote
 ```
 <put diff or code here>
 ```
 2. what exact command you run:
 3. what you observed (including __full logs__):
 ```
 <put logs here>
 ```
 4. please simplify the steps as much as possible so they do not require additional resources to
 	 run, such as a private dataset.
 ## Expected behavior:
 If there are no obvious error in "what you observed" provided above,
 please tell us the expected behavior.
 ## Environment:
 Provide your environment information using the following command:
 ```
 wget -nc -q https://github.com/facebookresearch/detectron2/raw/master/detectron2/utils/collect_env.py && python collect_env.py
 ```
 If your issue looks like an installation issue / environment issue,
 please first try to solve it yourself with the instructions in
 https://detectron2.readthedocs.io/tutorials/install.html#common-installation-issues
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/config.yml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/config.yml
@ -0,0 +1,9 @@
 # require an issue template to be chosen
 blank_issues_enabled: false
 # Unexpected behaviors & bugs are split to two templates.
 # When they are one template, users think "it's not a bug" and don't choose the template.
 #
 # But the file name is still "unexpected-problems-bugs.md" so that old references
 # to this issue template still works.
 # It's ok since this template should be a superset of "bugs.md" (unexpected behaviors is a superset of bugs)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/feature-request.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/feature-request.md
@ -0,0 +1,31 @@
 ---
 name: "\U0001F680Feature Request"
 about: Submit a proposal/request for a new detectron2 feature
 ---
 ## 🚀 Feature
 A clear and concise description of the feature proposal.
 ## Motivation & Examples
 Tell us why the feature is useful.
 Describe what the feature would look like, if it is implemented.
 Best demonstrated using **code examples** in addition to words.
 ## Note
 We only consider adding new features if they are relevant to many users.
 If you request implementation of research papers --
 we only consider papers that have enough significance and prevalance in the object detection field.
 We do not take requests for most projects in the `projects/` directory,
 because they are research code release that is mainly for other researchers to reproduce results.
 Instead of adding features inside detectron2,
 you can implement many features by [extending detectron2](https://detectron2.readthedocs.io/tutorials/extend.html).
 The [projects/](https://github.com/facebookresearch/detectron2/tree/master/projects/) directory contains many of such examples.
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/questions-help-support.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/questions-help-support.md
@ -0,0 +1,26 @@
 ---
 name: "❓How to do something?"
 about: How to do something using detectron2? What does an API do?
 ---
 ## ❓ How to do something using detectron2
 Describe what you want to do, including:
 1. what inputs you will provide, if any:
 2. what outputs you are expecting:
 ## ❓ What does an API do and how to use it?
 Please link to which API or documentation you're asking about from
 https://detectron2.readthedocs.io/
 NOTE:
 1. Only general answers are provided.
   If you want to ask about "why X did not work", please use the
   [Unexpected behaviors](https://github.com/facebookresearch/detectron2/issues/new/choose) issue template.
 2. About how to implement new models / new dataloader / new training logic, etc., check documentation first.
 3. We do not answer general machine learning / computer vision questions that are not specific to detectron2, such as how a model works, how to improve your training/make it converge, or what algorithm/methods can be used to achieve X.
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/unexpected-problems-bugs.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/unexpected-problems-bugs.md
@ -0,0 +1,45 @@
 ---
 name: "Unexpected behaviors"
 about: Run into unexpected behaviors when using detectron2
 title: Please read & provide the following
 ---
 If you do not know the root cause of the problem, and wish someone to help you, please
 post according to this template:
 ## Instructions To Reproduce the Issue:
 1. what changes you made (`git diff`) or what code you wrote
 ```
 <put diff or code here>
 ```
 2. what exact command you run:
 3. what you observed (including __full logs__):
 ```
 <put logs here>
 ```
 4. please simplify the steps as much as possible so they do not require additional resources to
 	 run, such as a private dataset.
 ## Expected behavior:
 If there are no obvious error in "what you observed" provided above,
 please tell us the expected behavior.
 If you expect the model to converge / work better, note that we do not give suggestions
 on how to train a new model.
 Only in one of the two conditions we will help with it:
 (1) You're unable to reproduce the results in detectron2 model zoo.
 (2) It indicates a detectron2 bug.
 ## Environment:
 Provide your environment information using the following command:
 ```
 wget -nc -q https://github.com/facebookresearch/detectron2/raw/master/detectron2/utils/collect_env.py && python collect_env.py
 ```
 If your issue looks like an installation issue / environment issue,
 please first try to solve it yourself with the instructions in
 https://detectron2.readthedocs.io/tutorials/install.html#common-installation-issues
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/pull_request_template.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.github/pull_request_template.md
@ -0,0 +1,9 @@
 Thanks for your contribution!
 If you're sending a large PR (e.g., >50 lines),
 please open an issue first about the feature / bug, and indicate how you want to contribute.
 Before submitting a PR, please run `dev/linter.sh` to lint the code.
 See https://detectron2.readthedocs.io/notes/contributing.html#pull-requests
 about how we handle PRs.
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.gitignore
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/.gitignore
@ -0,0 +1,46 @@
 # output dir
 output
 instant_test_output
 inference_test_output
 *.jpg
 *.png
 *.txt
 *.json
 *.diff
 # compilation and distribution
 __pycache__
 _ext
 *.pyc
 *.so
 detectron2.egg-info/
 build/
 dist/
 wheels/
 # pytorch/python/numpy formats
 *.pth
 *.pkl
 *.npy
 # ipython/jupyter notebooks
 *.ipynb
 **/.ipynb_checkpoints/
 # Editor temporaries
 *.swn
 *.swo
 *.swp
 *~
 # editor settings
 .idea
 .vscode
 # project dirs
 /detectron2/model_zoo/configs
 /datasets
 /projects/*/datasets
 /models
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/GETTING_STARTED.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/GETTING_STARTED.md
@ -0,0 +1,79 @@
 ## Getting Started with Detectron2
 This document provides a brief intro of the usage of builtin command-line tools in detectron2.
 For a tutorial that involves actual coding with the API,
 see our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
 which covers how to run inference with an
 existing model, and how to train a builtin model on a custom dataset.
 For more advanced tutorials, refer to our [documentation](https://detectron2.readthedocs.io/tutorials/extend.html).
 ### Inference Demo with Pre-trained Models
 1. Pick a model and its config file from
 	[model zoo](MODEL_ZOO.md),
 	for example, `mask_rcnn_R_50_FPN_3x.yaml`.
 2. We provide `demo.py` that is able to run builtin standard models. Run it with:
 ```
 cd demo/
 python demo.py --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
  --input input1.jpg input2.jpg \
  [--other-options]
  --opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl
 ```
 The configs are made for training, therefore we need to specify `MODEL.WEIGHTS` to a model from model zoo for evaluation.
 This command will run the inference and show visualizations in an OpenCV window.
 For details of the command line arguments, see `demo.py -h` or look at its source code
 to understand its behavior. Some common arguments are:
 * To run __on your webcam__, replace `--input files` with `--webcam`.
 * To run __on a video__, replace `--input files` with `--video-input video.mp4`.
 * To run __on cpu__, add `MODEL.DEVICE cpu` after `--opts`.
 * To save outputs to a directory (for images) or a file (for webcam or video), use `--output`.
 ### Training & Evaluation in Command Line
 We provide a script in "tools/{,plain_}train_net.py", that is made to train
 all the configs provided in detectron2.
 You may want to use it as a reference to write your own training script.
 To train a model with "train_net.py", first
 setup the corresponding datasets following
 [datasets/README.md](./datasets/README.md),
 then run:
 ```
 cd tools/
 ./train_net.py --num-gpus 8 \
 	--config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
 ```
 The configs are made for 8-GPU training.
 To train on 1 GPU, you may need to [change some parameters](https://arxiv.org/abs/1706.02677), e.g.:
 ```
 ./train_net.py \
 	--config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
 	--num-gpus 1 SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
 ```
 For most models, CPU training is not supported.
 To evaluate a model's performance, use
 ```
 ./train_net.py \
 	--config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
 	--eval-only MODEL.WEIGHTS /path/to/checkpoint_file
 ```
 For more options, see `./train_net.py -h`.
 ### Use Detectron2 APIs in Your Code
 See our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
 to learn how to use detectron2 APIs to:
 1. run inference with an existing model
 2. train a builtin model on a custom dataset
 See [detectron2/projects](https://github.com/facebookresearch/detectron2/tree/master/projects)
 for more ways to build your project on detectron2.
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/INSTALL.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/INSTALL.md
@ -0,0 +1,184 @@
 ## Installation
 Our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
 has step-by-step instructions that install detectron2.
 The [Dockerfile](docker)
 also installs detectron2 with a few simple commands.
 ### Requirements
 - Linux or macOS with Python ≥ 3.6
 - PyTorch ≥ 1.4
 - [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation.
 	You can install them together at [pytorch.org](https://pytorch.org) to make sure of this.
 - OpenCV, optional, needed by demo and visualization
 - pycocotools: `pip install cython; pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'`
 ### Build Detectron2 from Source
 gcc & g++ ≥ 5 are required. [ninja](https://ninja-build.org/) is recommended for faster build.
 After having them, run:
 ```
 python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
 # (add --user if you don't have permission)
 # Or, to install it from a local clone:
 git clone https://github.com/facebookresearch/detectron2.git
 python -m pip install -e detectron2
 # Or if you are on macOS
 # CC=clang CXX=clang++ python -m pip install -e .
 ```
 To __rebuild__ detectron2 that's built from a local clone, use `rm -rf build/ **/*.so` to clean the
 old build first. You often need to rebuild detectron2 after reinstalling PyTorch.
 ### Install Pre-Built Detectron2 (Linux only)
 ```
 # for CUDA 10.1:
 python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/index.html
 ```
 You can replace cu101 with "cu{100,92}" or "cpu".
 Note that:
 1. Such installation has to be used with certain version of official PyTorch release.
   See [releases](https://github.com/facebookresearch/detectron2/releases) for requirements.
   It will not work with a different version of PyTorch or a non-official build of PyTorch.
 2. Such installation is out-of-date w.r.t. master branch of detectron2. It may not be
 	 compatible with the master branch of a research project that uses detectron2 (e.g. those in
 	 [projects](projects) or [meshrcnn](https://github.com/facebookresearch/meshrcnn/)).
 ### Common Installation Issues
 If you met issues using the pre-built detectron2, please uninstall it and try building it from source.
 Click each issue for its solutions:
 <details>
 <summary>
 Undefined torch/aten/caffe2 symbols, or segmentation fault immediately when running the library.
 </summary>
 <br/>
 This usually happens when detectron2 or torchvision is not
 compiled with the version of PyTorch you're running.
 Pre-built torchvision or detectron2 has to work with the corresponding official release of pytorch.
 If the error comes from a pre-built torchvision, uninstall torchvision and pytorch and reinstall them
 following [pytorch.org](http://pytorch.org). So the versions will match.
 If the error comes from a pre-built detectron2, check [release notes](https://github.com/facebookresearch/detectron2/releases)
 to see the corresponding pytorch version required for each pre-built detectron2.
 If the error comes from detectron2 or torchvision that you built manually from source,
 remove files you built (`build/`, `**/*.so`) and rebuild it so it can pick up the version of pytorch currently in your environment.
 If you cannot resolve this problem, please include the output of `gdb -ex "r" -ex "bt" -ex "quit" --args python -m detectron2.utils.collect_env`
 in your issue.
 </details>
 <details>
 <summary>
 Undefined C++ symbols (e.g. `GLIBCXX`) or C++ symbols not found.
 </summary>
 <br/>
 Usually it's because the library is compiled with a newer C++ compiler but run with an old C++ runtime.
 This often happens with old anaconda.
 Try `conda update libgcc`. Then rebuild detectron2.
 The fundamental solution is to run the code with proper C++ runtime.
 One way is to use `LD_PRELOAD=/path/to/libstdc++.so`.
 </details>
 <details>
 <summary>
 "Not compiled with GPU support" or "Detectron2 CUDA Compiler: not available".
 </summary>
 <br/>
 CUDA is not found when building detectron2.
 You should make sure
 ```
 python -c 'import torch; from torch.utils.cpp_extension import CUDA_HOME; print(torch.cuda.is_available(), CUDA_HOME)'
 ```
 print valid outputs at the time you build detectron2.
 Most models can run inference (but not training) without GPU support. To use CPUs, set `MODEL.DEVICE='cpu'` in the config.
 </details>
 <details>
 <summary>
 "invalid device function" or "no kernel image is available for execution".
 </summary>
 <br/>
 Two possibilities:
 * You build detectron2 with one version of CUDA but run it with a different version.
  To check whether it is the case,
  use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions.
 	In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA"
 	to contain cuda libraries of the same version.
 	When they are inconsistent,
 	you need to either install a different build of PyTorch (or build by yourself)
 	to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
 * Detectron2 or PyTorch/torchvision is not built for the correct GPU architecture (compute compatibility).
 	The GPU architecture for PyTorch/detectron2/torchvision is available in the "architecture flags" in
 	`python -m detectron2.utils.collect_env`.
 	The GPU architecture flags of detectron2/torchvision by default matches the GPU model detected
 	during compilation. This means the compiled code may not work on a different GPU model.
 	To overwrite the GPU architecture for detectron2/torchvision, use `TORCH_CUDA_ARCH_LIST` environment variable during compilation.
 	For example, `export TORCH_CUDA_ARCH_LIST=6.0,7.0` makes it compile for both P100s and V100s.
 	Visit [developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus) to find out
 	the correct compute compatibility number for your device.
 </details>
 <details>
 <summary>
 Undefined CUDA symbols; cannot open libcudart.so; other nvcc failures.
 </summary>
 <br/>
 The version of NVCC you use to build detectron2 or torchvision does
 not match the version of CUDA you are running with.
 This often happens when using anaconda's CUDA runtime.
 Use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions.
 In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA"
 to contain cuda libraries of the same version.
 When they are inconsistent,
 you need to either install a different build of PyTorch (or build by yourself)
 to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
 </details>
 <details>
 <summary>
 "ImportError: cannot import name '_C'".
 </summary>
 <br/>
 Please build and install detectron2 following the instructions above.
 If you are running code from detectron2's root directory, `cd` to a different one.
 Otherwise you may not import the code that you installed.
 </details>
 <details>
 <summary>
 ONNX conversion segfault after some "TraceWarning".
 </summary>
 <br/>
 The ONNX package is compiled with too old compiler.
 Please build and install ONNX from its source code using a compiler
 whose version is closer to what's used by PyTorch (available in `torch.__config__.show()`).
 </details>
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/LICENSE
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/LICENSE
@ -0,0 +1,201 @@
 Apache License
 Version 2.0, January 2004
 http://www.apache.org/licenses/
 TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
 1. Definitions.
 "License" shall mean the terms and conditions for use, reproduction,
 and distribution as defined by Sections 1 through 9 of this document.
 "Licensor" shall mean the copyright owner or entity authorized by
 the copyright owner that is granting the License.
 "Legal Entity" shall mean the union of the acting entity and all
 other entities that control, are controlled by, or are under common
 control with that entity. For the purposes of this definition,
 "control" means (i) the power, direct or indirect, to cause the
 direction or management of such entity, whether by contract or
 otherwise, or (ii) ownership of fifty percent (50%) or more of the
 outstanding shares, or (iii) beneficial ownership of such entity.
 "You" (or "Your") shall mean an individual or Legal Entity
 exercising permissions granted by this License.
 "Source" form shall mean the preferred form for making modifications,
 including but not limited to software source code, documentation
 source, and configuration files.
 "Object" form shall mean any form resulting from mechanical
 transformation or translation of a Source form, including but
 not limited to compiled object code, generated documentation,
 and conversions to other media types.
 "Work" shall mean the work of authorship, whether in Source or
 Object form, made available under the License, as indicated by a
 copyright notice that is included in or attached to the work
 (an example is provided in the Appendix below).
 "Derivative Works" shall mean any work, whether in Source or Object
 form, that is based on (or derived from) the Work and for which the
 editorial revisions, annotations, elaborations, or other modifications
 represent, as a whole, an original work of authorship. For the purposes
 of this License, Derivative Works shall not include works that remain
 separable from, or merely link (or bind by name) to the interfaces of,
 the Work and Derivative Works thereof.
 "Contribution" shall mean any work of authorship, including
 the original version of the Work and any modifications or additions
 to that Work or Derivative Works thereof, that is intentionally
 submitted to Licensor for inclusion in the Work by the copyright owner
 or by an individual or Legal Entity authorized to submit on behalf of
 the copyright owner. For the purposes of this definition, "submitted"
 means any form of electronic, verbal, or written communication sent
 to the Licensor or its representatives, including but not limited to
 communication on electronic mailing lists, source code control systems,
 and issue tracking systems that are managed by, or on behalf of, the
 Licensor for the purpose of discussing and improving the Work, but
 excluding communication that is conspicuously marked or otherwise
 designated in writing by the copyright owner as "Not a Contribution."
 "Contributor" shall mean Licensor and any individual or Legal Entity
 on behalf of whom a Contribution has been received by Licensor and
 subsequently incorporated within the Work.
 2. Grant of Copyright License. Subject to the terms and conditions of
 this License, each Contributor hereby grants to You a perpetual,
 worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 copyright license to reproduce, prepare Derivative Works of,
 publicly display, publicly perform, sublicense, and distribute the
 Work and such Derivative Works in Source or Object form.
 3. Grant of Patent License. Subject to the terms and conditions of
 this License, each Contributor hereby grants to You a perpetual,
 worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 (except as stated in this section) patent license to make, have made,
 use, offer to sell, sell, import, and otherwise transfer the Work,
 where such license applies only to those patent claims licensable
 by such Contributor that are necessarily infringed by their
 Contribution(s) alone or by combination of their Contribution(s)
 with the Work to which such Contribution(s) was submitted. If You
 institute patent litigation against any entity (including a
 cross-claim or counterclaim in a lawsuit) alleging that the Work
 or a Contribution incorporated within the Work constitutes direct
 or contributory patent infringement, then any patent licenses
 granted to You under this License for that Work shall terminate
 as of the date such litigation is filed.
 4. Redistribution. You may reproduce and distribute copies of the
 Work or Derivative Works thereof in any medium, with or without
 modifications, and in Source or Object form, provided that You
 meet the following conditions:
 (a) You must give any other recipients of the Work or
 Derivative Works a copy of this License; and
 (b) You must cause any modified files to carry prominent notices
 stating that You changed the files; and
 (c) You must retain, in the Source form of any Derivative Works
 that You distribute, all copyright, patent, trademark, and
 attribution notices from the Source form of the Work,
 excluding those notices that do not pertain to any part of
 the Derivative Works; and
 (d) If the Work includes a "NOTICE" text file as part of its
 distribution, then any Derivative Works that You distribute must
 include a readable copy of the attribution notices contained
 within such NOTICE file, excluding those notices that do not
 pertain to any part of the Derivative Works, in at least one
 of the following places: within a NOTICE text file distributed
 as part of the Derivative Works; within the Source form or
 documentation, if provided along with the Derivative Works; or,
 within a display generated by the Derivative Works, if and
 wherever such third-party notices normally appear. The contents
 of the NOTICE file are for informational purposes only and
 do not modify the License. You may add Your own attribution
 notices within Derivative Works that You distribute, alongside
 or as an addendum to the NOTICE text from the Work, provided
 that such additional attribution notices cannot be construed
 as modifying the License.
 You may add Your own copyright statement to Your modifications and
 may provide additional or different license terms and conditions
 for use, reproduction, or distribution of Your modifications, or
 for any such Derivative Works as a whole, provided Your use,
 reproduction, and distribution of the Work otherwise complies with
 the conditions stated in this License.
 5. Submission of Contributions. Unless You explicitly state otherwise,
 any Contribution intentionally submitted for inclusion in the Work
 by You to the Licensor shall be under the terms and conditions of
 this License, without any additional terms or conditions.
 Notwithstanding the above, nothing herein shall supersede or modify
 the terms of any separate license agreement you may have executed
 with Licensor regarding such Contributions.
 6. Trademarks. This License does not grant permission to use the trade
 names, trademarks, service marks, or product names of the Licensor,
 except as required for reasonable and customary use in describing the
 origin of the Work and reproducing the content of the NOTICE file.
 7. Disclaimer of Warranty. Unless required by applicable law or
 agreed to in writing, Licensor provides the Work (and each
 Contributor provides its Contributions) on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 implied, including, without limitation, any warranties or conditions
 of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
 PARTICULAR PURPOSE. You are solely responsible for determining the
 appropriateness of using or redistributing the Work and assume any
 risks associated with Your exercise of permissions under this License.
 8. Limitation of Liability. In no event and under no legal theory,
 whether in tort (including negligence), contract, or otherwise,
 unless required by applicable law (such as deliberate and grossly
 negligent acts) or agreed to in writing, shall any Contributor be
 liable to You for damages, including any direct, indirect, special,
 incidental, or consequential damages of any character arising as a
 result of this License or out of the use or inability to use the
 Work (including but not limited to damages for loss of goodwill,
 work stoppage, computer failure or malfunction, or any and all
 other commercial damages or losses), even if such Contributor
 has been advised of the possibility of such damages.
 9. Accepting Warranty or Additional Liability. While redistributing
 the Work or Derivative Works thereof, You may choose to offer,
 and charge a fee for, acceptance of support, warranty, indemnity,
 or other liability obligations and/or rights consistent with this
 License. However, in accepting such obligations, You may act only
 on Your own behalf and on Your sole responsibility, not on behalf
 of any other Contributor, and only if You agree to indemnify,
 defend, and hold each Contributor harmless for any liability
 incurred by, or claims asserted against, such Contributor by reason
 of your accepting any such warranty or additional liability.
 END OF TERMS AND CONDITIONS
 APPENDIX: How to apply the Apache License to your work.
 To apply the Apache License to your work, attach the following
 boilerplate notice, with the fields enclosed by brackets "[]"
 replaced with your own identifying information. (Don't include
 the brackets!)  The text should be enclosed in the appropriate
 comment syntax for the file format. We also recommend that a
 file or class name and description of purpose be included on the
 same "printed page" as the copyright notice for easier
 identification within third-party archives.
 Copyright 2019 - present, Facebook, Inc
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/MODEL_ZOO.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/MODEL_ZOO.md
@ -0,0 +1,903 @@
 # Detectron2 Model Zoo and Baselines
 ## Introduction
 This file documents a large collection of baselines trained
 with detectron2 in Sep-Oct, 2019.
 All numbers were obtained on [Big Basin](https://engineering.fb.com/data-center-engineering/introducing-big-basin-our-next-generation-ai-hardware/)
 servers with 8 NVIDIA V100 GPUs & NVLink. The software in use were PyTorch 1.3, CUDA 9.2, cuDNN 7.4.2 or 7.6.3.
 You can access these models from code using [detectron2.model_zoo](https://detectron2.readthedocs.io/modules/model_zoo.html) APIs.
 In addition to these official baseline models, you can find more models in [projects/](projects/).
 #### How to Read the Tables
 * The "Name" column contains a link to the config file. Running `tools/train_net.py` with this config file
 	and 8 GPUs will reproduce the model.
 * Training speed is averaged across the entire training.
 	We keep updating the speed with latest version of detectron2/pytorch/etc.,
 	so they might be different from the `metrics` file.
 	Training speed for multi-machine jobs is not provided.
 * Inference speed is measured by `tools/train_net.py --eval-only`, or [inference_on_dataset()](https://detectron2.readthedocs.io/modules/evaluation.html#detectron2.evaluation.inference_on_dataset),
  with batch size 1 in detectron2 directly.
 	Measuring it with your own code will likely introduce other overhead.
  Actual deployment in production should in general be faster than the given inference
  speed due to more optimizations.
 * The *model id* column is provided for ease of reference.
  To check downloaded file integrity, any model on this page contains its md5 prefix in its file name.
 * Training curves and other statistics can be found in `metrics` for each model.
 #### Common Settings for COCO Models
 * All COCO models were trained on `train2017` and evaluated on `val2017`.
 * The default settings are __not directly comparable__ with Detectron's standard settings.
  For example, our default training data augmentation uses scale jittering in addition to horizontal flipping.
  To make fair comparisons with Detectron's settings, see
  [Detectron1-Comparisons](configs/Detectron1-Comparisons/) for accuracy comparison,
  and [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html)
  for speed comparison.
 * For Faster/Mask R-CNN, we provide baselines based on __3 different backbone combinations__:
  * __FPN__: Use a ResNet+FPN backbone with standard conv and FC heads for mask and box prediction,
    respectively. It obtains the best
    speed/accuracy tradeoff, but the other two are still useful for research.
  * __C4__: Use a ResNet conv4 backbone with conv5 head. The original baseline in the Faster R-CNN paper.
  * __DC5__ (Dilated-C5): Use a ResNet conv5 backbone with dilations in conv5, and standard conv and FC heads
    for mask and box prediction, respectively.
    This is used by the Deformable ConvNet paper.
 * Most models are trained with the 3x schedule (~37 COCO epochs).
  Although 1x models are heavily under-trained, we provide some ResNet-50 models with the 1x (~12 COCO epochs)
  training schedule for comparison when doing quick research iteration.
 #### ImageNet Pretrained Models
 We provide backbone models pretrained on ImageNet-1k dataset.
 These models have __different__ format from those provided in Detectron: we do not fuse BatchNorm into an affine layer.
 * [R-50.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-50.pkl): converted copy of [MSRA's original ResNet-50](https://github.com/KaimingHe/deep-residual-networks) model.
 * [R-101.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-101.pkl): converted copy of [MSRA's original ResNet-101](https://github.com/KaimingHe/deep-residual-networks) model.
 * [X-101-32x8d.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/FAIR/X-101-32x8d.pkl): ResNeXt-101-32x8d model trained with Caffe2 at FB.
 Pretrained models in Detectron's format can still be used. For example:
 * [X-152-32x8d-IN5k.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl):
  ResNeXt-152-32x8d model trained on ImageNet-5k with Caffe2 at FB (see ResNeXt paper for details on ImageNet-5k).
 * [R-50-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl):
  ResNet-50 with Group Normalization.
 * [R-101-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl):
  ResNet-101 with Group Normalization.
 Torchvision's ResNet models can be used after converted by [this script](tools/convert-torchvision-to-d2.py).
 #### License
 All models available for download through this document are licensed under the
 [Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/).
 ### COCO Object Detection Baselines
 #### Faster R-CNN:
 <!--
 (fb only) To update the table in vim:
 1. Remove the old table: d}
 2. Copy the below command to the place of the table
 3. :.!bash
 ./gen_html_table.py --config 'COCO-Detection/faster*50*'{1x,3x}'*' 'COCO-Detection/faster*101*' --name R50-C4 R50-DC5 R50-FPN R50-C4 R50-DC5 R50-FPN R101-C4 R101-DC5 R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP
 -->
 <table><tbody>
 <!-- START TABLE -->
 <!-- TABLE HEADER -->
 <th valign="bottom">Name</th>
 <th valign="bottom">lr<br/>sched</th>
 <th valign="bottom">train<br/>time<br/>(s/iter)</th>
 <th valign="bottom">inference<br/>time<br/>(s/im)</th>
 <th valign="bottom">train<br/>mem<br/>(GB)</th>
 <th valign="bottom">box<br/>AP</th>
 <th valign="bottom">model id</th>
 <th valign="bottom">download</th>
 <!-- TABLE BODY -->
 <!-- ROW: faster_rcnn_R_50_C4_1x -->
 <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml">R50-C4</a></td>
 <td align="center">1x</td>
 <td align="center">0.551</td>
 <td align="center">0.102</td>
 <td align="center">4.8</td>
 <td align="center">35.7</td>
 <td align="center">137257644</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/137257644/model_final_721ade.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/137257644/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: faster_rcnn_R_50_DC5_1x -->
 <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml">R50-DC5</a></td>
 <td align="center">1x</td>
 <td align="center">0.380</td>
 <td align="center">0.068</td>
 <td align="center">5.0</td>
 <td align="center">37.3</td>
 <td align="center">137847829</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_1x/137847829/model_final_51d356.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_1x/137847829/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: faster_rcnn_R_50_FPN_1x -->
 <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
 <td align="center">1x</td>
 <td align="center">0.210</td>
 <td align="center">0.038</td>
 <td align="center">3.0</td>
 <td align="center">37.9</td>
 <td align="center">137257794</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_1x/137257794/model_final_b275ba.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_1x/137257794/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: faster_rcnn_R_50_C4_3x -->
 <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml">R50-C4</a></td>
 <td align="center">3x</td>
 <td align="center">0.543</td>
 <td align="center">0.104</td>
 <td align="center">4.8</td>
 <td align="center">38.4</td>
 <td align="center">137849393</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_3x/137849393/model_final_f97cb7.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_3x/137849393/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: faster_rcnn_R_50_DC5_3x -->
 <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml">R50-DC5</a></td>
 <td align="center">3x</td>
 <td align="center">0.378</td>
 <td align="center">0.070</td>
 <td align="center">5.0</td>
 <td align="center">39.0</td>
 <td align="center">137849425</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_3x/137849425/model_final_68d202.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_3x/137849425/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: faster_rcnn_R_50_FPN_3x -->
 <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml">R50-FPN</a></td>
 <td align="center">3x</td>
 <td align="center">0.209</td>
 <td align="center">0.038</td>
 <td align="center">3.0</td>
 <td align="center">40.2</td>
 <td align="center">137849458</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: faster_rcnn_R_101_C4_3x -->
 <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml">R101-C4</a></td>
 <td align="center">3x</td>
 <td align="center">0.619</td>
 <td align="center">0.139</td>
 <td align="center">5.9</td>
 <td align="center">41.1</td>
 <td align="center">138204752</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_C4_3x/138204752/model_final_298dad.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_C4_3x/138204752/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: faster_rcnn_R_101_DC5_3x -->
 <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml">R101-DC5</a></td>
 <td align="center">3x</td>
 <td align="center">0.452</td>
 <td align="center">0.086</td>
 <td align="center">6.1</td>
 <td align="center">40.6</td>
 <td align="center">138204841</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_DC5_3x/138204841/model_final_3e0943.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_DC5_3x/138204841/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: faster_rcnn_R_101_FPN_3x -->
 <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml">R101-FPN</a></td>
 <td align="center">3x</td>
 <td align="center">0.286</td>
 <td align="center">0.051</td>
 <td align="center">4.1</td>
 <td align="center">42.0</td>
 <td align="center">137851257</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/model_final_f6e8b1.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: faster_rcnn_X_101_32x8d_FPN_3x -->
 <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml">X101-FPN</a></td>
 <td align="center">3x</td>
 <td align="center">0.638</td>
 <td align="center">0.098</td>
 <td align="center">6.7</td>
 <td align="center">43.0</td>
 <td align="center">139173657</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x/139173657/model_final_68b088.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x/139173657/metrics.json">metrics</a></td>
 </tr>
 </tbody></table>
 #### RetinaNet:
 <!--
 ./gen_html_table.py --config 'COCO-Detection/retina*50*' 'COCO-Detection/retina*101*' --name R50 R50 R101 --fields lr_sched train_speed inference_speed mem box_AP
 -->
 <table><tbody>
 <!-- START TABLE -->
 <!-- TABLE HEADER -->
 <th valign="bottom">Name</th>
 <th valign="bottom">lr<br/>sched</th>
 <th valign="bottom">train<br/>time<br/>(s/iter)</th>
 <th valign="bottom">inference<br/>time<br/>(s/im)</th>
 <th valign="bottom">train<br/>mem<br/>(GB)</th>
 <th valign="bottom">box<br/>AP</th>
 <th valign="bottom">model id</th>
 <th valign="bottom">download</th>
 <!-- TABLE BODY -->
 <!-- ROW: retinanet_R_50_FPN_1x -->
 <tr><td align="left"><a href="configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml">R50</a></td>
 <td align="center">1x</td>
 <td align="center">0.200</td>
 <td align="center">0.055</td>
 <td align="center">3.9</td>
 <td align="center">36.5</td>
 <td align="center">137593951</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_1x/137593951/model_final_b796dc.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_1x/137593951/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: retinanet_R_50_FPN_3x -->
 <tr><td align="left"><a href="configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml">R50</a></td>
 <td align="center">3x</td>
 <td align="center">0.201</td>
 <td align="center">0.055</td>
 <td align="center">3.9</td>
 <td align="center">37.9</td>
 <td align="center">137849486</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_3x/137849486/model_final_4cafe0.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_3x/137849486/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: retinanet_R_101_FPN_3x -->
 <tr><td align="left"><a href="configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml">R101</a></td>
 <td align="center">3x</td>
 <td align="center">0.280</td>
 <td align="center">0.068</td>
 <td align="center">5.1</td>
 <td align="center">39.9</td>
 <td align="center">138363263</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_101_FPN_3x/138363263/model_final_59f53c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_101_FPN_3x/138363263/metrics.json">metrics</a></td>
 </tr>
 </tbody></table>
 #### RPN & Fast R-CNN:
 <!--
 ./gen_html_table.py --config 'COCO-Detection/rpn*' 'COCO-Detection/fast_rcnn*' --name "RPN R50-C4" "RPN R50-FPN" "Fast R-CNN R50-FPN" --fields lr_sched train_speed inference_speed mem box_AP prop_AR
 -->
 <table><tbody>
 <!-- START TABLE -->
 <!-- TABLE HEADER -->
 <th valign="bottom">Name</th>
 <th valign="bottom">lr<br/>sched</th>
 <th valign="bottom">train<br/>time<br/>(s/iter)</th>
 <th valign="bottom">inference<br/>time<br/>(s/im)</th>
 <th valign="bottom">train<br/>mem<br/>(GB)</th>
 <th valign="bottom">box<br/>AP</th>
 <th valign="bottom">prop.<br/>AR</th>
 <th valign="bottom">model id</th>
 <th valign="bottom">download</th>
 <!-- TABLE BODY -->
 <!-- ROW: rpn_R_50_C4_1x -->
 <tr><td align="left"><a href="configs/COCO-Detection/rpn_R_50_C4_1x.yaml">RPN R50-C4</a></td>
 <td align="center">1x</td>
 <td align="center">0.130</td>
 <td align="center">0.034</td>
 <td align="center">1.5</td>
 <td align="center"></td>
 <td align="center">51.6</td>
 <td align="center">137258005</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_C4_1x/137258005/model_final_450694.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_C4_1x/137258005/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: rpn_R_50_FPN_1x -->
 <tr><td align="left"><a href="configs/COCO-Detection/rpn_R_50_FPN_1x.yaml">RPN R50-FPN</a></td>
 <td align="center">1x</td>
 <td align="center">0.186</td>
 <td align="center">0.032</td>
 <td align="center">2.7</td>
 <td align="center"></td>
 <td align="center">58.0</td>
 <td align="center">137258492</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_FPN_1x/137258492/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: fast_rcnn_R_50_FPN_1x -->
 <tr><td align="left"><a href="configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml">Fast R-CNN R50-FPN</a></td>
 <td align="center">1x</td>
 <td align="center">0.140</td>
 <td align="center">0.029</td>
 <td align="center">2.6</td>
 <td align="center">37.8</td>
 <td align="center"></td>
 <td align="center">137635226</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/metrics.json">metrics</a></td>
 </tr>
 </tbody></table>
 ### COCO Instance Segmentation Baselines with Mask R-CNN
 <!--
 ./gen_html_table.py --config 'COCO-InstanceSegmentation/mask*50*'{1x,3x}'*' 'COCO-InstanceSegmentation/mask*101*' --name R50-C4 R50-DC5 R50-FPN R50-C4 R50-DC5 R50-FPN R101-C4 R101-DC5 R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP mask_AP
 -->
 <table><tbody>
 <!-- START TABLE -->
 <!-- TABLE HEADER -->
 <th valign="bottom">Name</th>
 <th valign="bottom">lr<br/>sched</th>
 <th valign="bottom">train<br/>time<br/>(s/iter)</th>
 <th valign="bottom">inference<br/>time<br/>(s/im)</th>
 <th valign="bottom">train<br/>mem<br/>(GB)</th>
 <th valign="bottom">box<br/>AP</th>
 <th valign="bottom">mask<br/>AP</th>
 <th valign="bottom">model id</th>
 <th valign="bottom">download</th>
 <!-- TABLE BODY -->
 <!-- ROW: mask_rcnn_R_50_C4_1x -->
 <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml">R50-C4</a></td>
 <td align="center">1x</td>
 <td align="center">0.584</td>
 <td align="center">0.110</td>
 <td align="center">5.2</td>
 <td align="center">36.8</td>
 <td align="center">32.2</td>
 <td align="center">137259246</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x/137259246/model_final_9243eb.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x/137259246/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: mask_rcnn_R_50_DC5_1x -->
 <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml">R50-DC5</a></td>
 <td align="center">1x</td>
 <td align="center">0.471</td>
 <td align="center">0.076</td>
 <td align="center">6.5</td>
 <td align="center">38.3</td>
 <td align="center">34.2</td>
 <td align="center">137260150</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x/137260150/model_final_4f86c3.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x/137260150/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: mask_rcnn_R_50_FPN_1x -->
 <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
 <td align="center">1x</td>
 <td align="center">0.261</td>
 <td align="center">0.043</td>
 <td align="center">3.4</td>
 <td align="center">38.6</td>
 <td align="center">35.2</td>
 <td align="center">137260431</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/model_final_a54504.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: mask_rcnn_R_50_C4_3x -->
 <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml">R50-C4</a></td>
 <td align="center">3x</td>
 <td align="center">0.575</td>
 <td align="center">0.111</td>
 <td align="center">5.2</td>
 <td align="center">39.8</td>
 <td align="center">34.4</td>
 <td align="center">137849525</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: mask_rcnn_R_50_DC5_3x -->
 <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml">R50-DC5</a></td>
 <td align="center">3x</td>
 <td align="center">0.470</td>
 <td align="center">0.076</td>
 <td align="center">6.5</td>
 <td align="center">40.0</td>
 <td align="center">35.9</td>
 <td align="center">137849551</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: mask_rcnn_R_50_FPN_3x -->
 <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml">R50-FPN</a></td>
 <td align="center">3x</td>
 <td align="center">0.261</td>
 <td align="center">0.043</td>
 <td align="center">3.4</td>
 <td align="center">41.0</td>
 <td align="center">37.2</td>
 <td align="center">137849600</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: mask_rcnn_R_101_C4_3x -->
 <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml">R101-C4</a></td>
 <td align="center">3x</td>
 <td align="center">0.652</td>
 <td align="center">0.145</td>
 <td align="center">6.3</td>
 <td align="center">42.6</td>
 <td align="center">36.7</td>
 <td align="center">138363239</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x/138363239/model_final_a2914c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x/138363239/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: mask_rcnn_R_101_DC5_3x -->
 <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml">R101-DC5</a></td>
 <td align="center">3x</td>
 <td align="center">0.545</td>
 <td align="center">0.092</td>
 <td align="center">7.6</td>
 <td align="center">41.9</td>
 <td align="center">37.3</td>
 <td align="center">138363294</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x/138363294/model_final_0464b7.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x/138363294/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: mask_rcnn_R_101_FPN_3x -->
 <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml">R101-FPN</a></td>
 <td align="center">3x</td>
 <td align="center">0.340</td>
 <td align="center">0.056</td>
 <td align="center">4.6</td>
 <td align="center">42.9</td>
 <td align="center">38.6</td>
 <td align="center">138205316</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/model_final_a3ec72.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: mask_rcnn_X_101_32x8d_FPN_3x -->
 <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml">X101-FPN</a></td>
 <td align="center">3x</td>
 <td align="center">0.690</td>
 <td align="center">0.103</td>
 <td align="center">7.2</td>
 <td align="center">44.3</td>
 <td align="center">39.5</td>
 <td align="center">139653917</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/model_final_2d9806.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/metrics.json">metrics</a></td>
 </tr>
 </tbody></table>
 ### COCO Person Keypoint Detection Baselines with Keypoint R-CNN
 <!--
 ./gen_html_table.py --config 'COCO-Keypoints/*50*' 'COCO-Keypoints/*101*'  --name R50-FPN R50-FPN R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP keypoint_AP
 -->
 <table><tbody>
 <!-- START TABLE -->
 <!-- TABLE HEADER -->
 <th valign="bottom">Name</th>
 <th valign="bottom">lr<br/>sched</th>
 <th valign="bottom">train<br/>time<br/>(s/iter)</th>
 <th valign="bottom">inference<br/>time<br/>(s/im)</th>
 <th valign="bottom">train<br/>mem<br/>(GB)</th>
 <th valign="bottom">box<br/>AP</th>
 <th valign="bottom">kp.<br/>AP</th>
 <th valign="bottom">model id</th>
 <th valign="bottom">download</th>
 <!-- TABLE BODY -->
 <!-- ROW: keypoint_rcnn_R_50_FPN_1x -->
 <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
 <td align="center">1x</td>
 <td align="center">0.315</td>
 <td align="center">0.072</td>
 <td align="center">5.0</td>
 <td align="center">53.6</td>
 <td align="center">64.0</td>
 <td align="center">137261548</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x/137261548/model_final_04e291.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x/137261548/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: keypoint_rcnn_R_50_FPN_3x -->
 <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml">R50-FPN</a></td>
 <td align="center">3x</td>
 <td align="center">0.316</td>
 <td align="center">0.066</td>
 <td align="center">5.0</td>
 <td align="center">55.4</td>
 <td align="center">65.5</td>
 <td align="center">137849621</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: keypoint_rcnn_R_101_FPN_3x -->
 <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml">R101-FPN</a></td>
 <td align="center">3x</td>
 <td align="center">0.390</td>
 <td align="center">0.076</td>
 <td align="center">6.1</td>
 <td align="center">56.4</td>
 <td align="center">66.1</td>
 <td align="center">138363331</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x/138363331/model_final_997cc7.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x/138363331/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: keypoint_rcnn_X_101_32x8d_FPN_3x -->
 <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml">X101-FPN</a></td>
 <td align="center">3x</td>
 <td align="center">0.738</td>
 <td align="center">0.121</td>
 <td align="center">8.7</td>
 <td align="center">57.3</td>
 <td align="center">66.0</td>
 <td align="center">139686956</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x/139686956/model_final_5ad38f.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x/139686956/metrics.json">metrics</a></td>
 </tr>
 </tbody></table>
 ### COCO Panoptic Segmentation Baselines with Panoptic FPN
 <!--
 ./gen_html_table.py --config 'COCO-PanopticSegmentation/*50*' 'COCO-PanopticSegmentation/*101*'  --name R50-FPN R50-FPN R101-FPN --fields lr_sched train_speed inference_speed mem box_AP mask_AP PQ
 -->
 <table><tbody>
 <!-- START TABLE -->
 <!-- TABLE HEADER -->
 <th valign="bottom">Name</th>
 <th valign="bottom">lr<br/>sched</th>
 <th valign="bottom">train<br/>time<br/>(s/iter)</th>
 <th valign="bottom">inference<br/>time<br/>(s/im)</th>
 <th valign="bottom">train<br/>mem<br/>(GB)</th>
 <th valign="bottom">box<br/>AP</th>
 <th valign="bottom">mask<br/>AP</th>
 <th valign="bottom">PQ</th>
 <th valign="bottom">model id</th>
 <th valign="bottom">download</th>
 <!-- TABLE BODY -->
 <!-- ROW: panoptic_fpn_R_50_1x -->
 <tr><td align="left"><a href="configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml">R50-FPN</a></td>
 <td align="center">1x</td>
 <td align="center">0.304</td>
 <td align="center">0.053</td>
 <td align="center">4.8</td>
 <td align="center">37.6</td>
 <td align="center">34.7</td>
 <td align="center">39.4</td>
 <td align="center">139514544</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x/139514544/model_final_dbfeb4.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x/139514544/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: panoptic_fpn_R_50_3x -->
 <tr><td align="left"><a href="configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml">R50-FPN</a></td>
 <td align="center">3x</td>
 <td align="center">0.302</td>
 <td align="center">0.053</td>
 <td align="center">4.8</td>
 <td align="center">40.0</td>
 <td align="center">36.5</td>
 <td align="center">41.5</td>
 <td align="center">139514569</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: panoptic_fpn_R_101_3x -->
 <tr><td align="left"><a href="configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml">R101-FPN</a></td>
 <td align="center">3x</td>
 <td align="center">0.392</td>
 <td align="center">0.066</td>
 <td align="center">6.0</td>
 <td align="center">42.4</td>
 <td align="center">38.5</td>
 <td align="center">43.0</td>
 <td align="center">139514519</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/model_final_cafdb1.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/metrics.json">metrics</a></td>
 </tr>
 </tbody></table>
 ### LVIS Instance Segmentation Baselines with Mask R-CNN
 Mask R-CNN baselines on the [LVIS dataset](https://lvisdataset.org), v0.5.
 These baselines are described in Table 3(c) of the [LVIS paper](https://arxiv.org/abs/1908.03195).
 NOTE: the 1x schedule here has the same amount of __iterations__ as the COCO 1x baselines.
 They are roughly 24 epochs of LVISv0.5 data.
 The final results of these configs have large variance across different runs.
 <!--
 ./gen_html_table.py --config 'LVIS-InstanceSegmentation/mask*50*' 'LVIS-InstanceSegmentation/mask*101*' --name R50-FPN R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP mask_AP
 -->
 <table><tbody>
 <!-- START TABLE -->
 <!-- TABLE HEADER -->
 <th valign="bottom">Name</th>
 <th valign="bottom">lr<br/>sched</th>
 <th valign="bottom">train<br/>time<br/>(s/iter)</th>
 <th valign="bottom">inference<br/>time<br/>(s/im)</th>
 <th valign="bottom">train<br/>mem<br/>(GB)</th>
 <th valign="bottom">box<br/>AP</th>
 <th valign="bottom">mask<br/>AP</th>
 <th valign="bottom">model id</th>
 <th valign="bottom">download</th>
 <!-- TABLE BODY -->
 <!-- ROW: mask_rcnn_R_50_FPN_1x -->
 <tr><td align="left"><a href="configs/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
 <td align="center">1x</td>
 <td align="center">0.292</td>
 <td align="center">0.107</td>
 <td align="center">7.1</td>
 <td align="center">23.6</td>
 <td align="center">24.4</td>
 <td align="center">144219072</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/144219072/model_final_571f7c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/144219072/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: mask_rcnn_R_101_FPN_1x -->
 <tr><td align="left"><a href="configs/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml">R101-FPN</a></td>
 <td align="center">1x</td>
 <td align="center">0.371</td>
 <td align="center">0.114</td>
 <td align="center">7.8</td>
 <td align="center">25.6</td>
 <td align="center">25.9</td>
 <td align="center">144219035</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x/144219035/model_final_824ab5.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x/144219035/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: mask_rcnn_X_101_32x8d_FPN_1x -->
 <tr><td align="left"><a href="configs/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml">X101-FPN</a></td>
 <td align="center">1x</td>
 <td align="center">0.712</td>
 <td align="center">0.151</td>
 <td align="center">10.2</td>
 <td align="center">26.7</td>
 <td align="center">27.1</td>
 <td align="center">144219108</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x/144219108/model_final_5e3439.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x/144219108/metrics.json">metrics</a></td>
 </tr>
 </tbody></table>
 ### Cityscapes & Pascal VOC Baselines
 Simple baselines for
 * Mask R-CNN on Cityscapes instance segmentation (initialized from COCO pre-training, then trained on Cityscapes fine annotations only)
 * Faster R-CNN on PASCAL VOC object detection (trained on VOC 2007 train+val + VOC 2012 train+val, tested on VOC 2007 using 11-point interpolated AP)
 <!--
 ./gen_html_table.py --config 'Cityscapes/*' 'PascalVOC-Detection/*' --name "R50-FPN, Cityscapes" "R50-C4, VOC" --fields train_speed inference_speed mem box_AP box_AP50 mask_AP
 -->
 <table><tbody>
 <!-- START TABLE -->
 <!-- TABLE HEADER -->
 <th valign="bottom">Name</th>
 <th valign="bottom">train<br/>time<br/>(s/iter)</th>
 <th valign="bottom">inference<br/>time<br/>(s/im)</th>
 <th valign="bottom">train<br/>mem<br/>(GB)</th>
 <th valign="bottom">box<br/>AP</th>
 <th valign="bottom">box<br/>AP50</th>
 <th valign="bottom">mask<br/>AP</th>
 <th valign="bottom">model id</th>
 <th valign="bottom">download</th>
 <!-- TABLE BODY -->
 <!-- ROW: mask_rcnn_R_50_FPN -->
 <tr><td align="left"><a href="configs/Cityscapes/mask_rcnn_R_50_FPN.yaml">R50-FPN, Cityscapes</a></td>
 <td align="center">0.240</td>
 <td align="center">0.078</td>
 <td align="center">4.4</td>
 <td align="center"></td>
 <td align="center"></td>
 <td align="center">36.5</td>
 <td align="center">142423278</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Cityscapes/mask_rcnn_R_50_FPN/142423278/model_final_af9cf5.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Cityscapes/mask_rcnn_R_50_FPN/142423278/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: faster_rcnn_R_50_C4 -->
 <tr><td align="left"><a href="configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml">R50-C4, VOC</a></td>
 <td align="center">0.537</td>
 <td align="center">0.081</td>
 <td align="center">4.8</td>
 <td align="center">51.9</td>
 <td align="center">80.3</td>
 <td align="center"></td>
 <td align="center">142202221</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/PascalVOC-Detection/faster_rcnn_R_50_C4/142202221/model_final_b1acc2.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/PascalVOC-Detection/faster_rcnn_R_50_C4/142202221/metrics.json">metrics</a></td>
 </tr>
 </tbody></table>
 ### Other Settings
 Ablations for Deformable Conv and Cascade R-CNN:
 <!--
 ./gen_html_table.py --config 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml' 'Misc/*R_50_FPN_1x_dconv*' 'Misc/cascade*1x.yaml' 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml' 'Misc/*R_50_FPN_3x_dconv*' 'Misc/cascade*3x.yaml' --name "Baseline R50-FPN" "Deformable Conv" "Cascade R-CNN" "Baseline R50-FPN" "Deformable Conv" "Cascade R-CNN"  --fields lr_sched train_speed inference_speed mem box_AP mask_AP
 -->
 <table><tbody>
 <!-- START TABLE -->
 <!-- TABLE HEADER -->
 <th valign="bottom">Name</th>
 <th valign="bottom">lr<br/>sched</th>
 <th valign="bottom">train<br/>time<br/>(s/iter)</th>
 <th valign="bottom">inference<br/>time<br/>(s/im)</th>
 <th valign="bottom">train<br/>mem<br/>(GB)</th>
 <th valign="bottom">box<br/>AP</th>
 <th valign="bottom">mask<br/>AP</th>
 <th valign="bottom">model id</th>
 <th valign="bottom">download</th>
 <!-- TABLE BODY -->
 <!-- ROW: mask_rcnn_R_50_FPN_1x -->
 <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">Baseline R50-FPN</a></td>
 <td align="center">1x</td>
 <td align="center">0.261</td>
 <td align="center">0.043</td>
 <td align="center">3.4</td>
 <td align="center">38.6</td>
 <td align="center">35.2</td>
 <td align="center">137260431</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/model_final_a54504.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: mask_rcnn_R_50_FPN_1x_dconv_c3-c5 -->
 <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml">Deformable Conv</a></td>
 <td align="center">1x</td>
 <td align="center">0.342</td>
 <td align="center">0.048</td>
 <td align="center">3.5</td>
 <td align="center">41.5</td>
 <td align="center">37.5</td>
 <td align="center">138602867</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5/138602867/model_final_65c703.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5/138602867/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: cascade_mask_rcnn_R_50_FPN_1x -->
 <tr><td align="left"><a href="configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml">Cascade R-CNN</a></td>
 <td align="center">1x</td>
 <td align="center">0.317</td>
 <td align="center">0.052</td>
 <td align="center">4.0</td>
 <td align="center">42.1</td>
 <td align="center">36.4</td>
 <td align="center">138602847</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_1x/138602847/model_final_e9d89b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_1x/138602847/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: mask_rcnn_R_50_FPN_3x -->
 <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml">Baseline R50-FPN</a></td>
 <td align="center">3x</td>
 <td align="center">0.261</td>
 <td align="center">0.043</td>
 <td align="center">3.4</td>
 <td align="center">41.0</td>
 <td align="center">37.2</td>
 <td align="center">137849600</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: mask_rcnn_R_50_FPN_3x_dconv_c3-c5 -->
 <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml">Deformable Conv</a></td>
 <td align="center">3x</td>
 <td align="center">0.349</td>
 <td align="center">0.047</td>
 <td align="center">3.5</td>
 <td align="center">42.7</td>
 <td align="center">38.5</td>
 <td align="center">144998336</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5/144998336/model_final_821d0b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5/144998336/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: cascade_mask_rcnn_R_50_FPN_3x -->
 <tr><td align="left"><a href="configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml">Cascade R-CNN</a></td>
 <td align="center">3x</td>
 <td align="center">0.328</td>
 <td align="center">0.053</td>
 <td align="center">4.0</td>
 <td align="center">44.3</td>
 <td align="center">38.5</td>
 <td align="center">144998488</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/metrics.json">metrics</a></td>
 </tr>
 </tbody></table>
 Ablations for normalization methods, and a few models trained from scratch following [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883).
 (Note: The baseline uses `2fc` head while the others use [`4conv1fc` head](https://arxiv.org/abs/1803.08494))
 <!--
 ./gen_html_table.py --config 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml' 'Misc/mask*50_FPN_3x_gn.yaml' 'Misc/mask*50_FPN_3x_syncbn.yaml' 'Misc/scratch*' --name "Baseline R50-FPN" "GN" "SyncBN" "GN (from scratch)" "GN (from scratch)" "SyncBN (from scratch)" --fields lr_sched train_speed inference_speed mem box_AP mask_AP
   -->
 <table><tbody>
 <!-- START TABLE -->
 <!-- TABLE HEADER -->
 <th valign="bottom">Name</th>
 <th valign="bottom">lr<br/>sched</th>
 <th valign="bottom">train<br/>time<br/>(s/iter)</th>
 <th valign="bottom">inference<br/>time<br/>(s/im)</th>
 <th valign="bottom">train<br/>mem<br/>(GB)</th>
 <th valign="bottom">box<br/>AP</th>
 <th valign="bottom">mask<br/>AP</th>
 <th valign="bottom">model id</th>
 <th valign="bottom">download</th>
 <!-- TABLE BODY -->
 <!-- ROW: mask_rcnn_R_50_FPN_3x -->
 <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml">Baseline R50-FPN</a></td>
 <td align="center">3x</td>
 <td align="center">0.261</td>
 <td align="center">0.043</td>
 <td align="center">3.4</td>
 <td align="center">41.0</td>
 <td align="center">37.2</td>
 <td align="center">137849600</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: mask_rcnn_R_50_FPN_3x_gn -->
 <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml">GN</a></td>
 <td align="center">3x</td>
 <td align="center">0.356</td>
 <td align="center">0.069</td>
 <td align="center">7.3</td>
 <td align="center">42.6</td>
 <td align="center">38.6</td>
 <td align="center">138602888</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_gn/138602888/model_final_dc5d9e.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_gn/138602888/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: mask_rcnn_R_50_FPN_3x_syncbn -->
 <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml">SyncBN</a></td>
 <td align="center">3x</td>
 <td align="center">0.371</td>
 <td align="center">0.053</td>
 <td align="center">5.5</td>
 <td align="center">41.9</td>
 <td align="center">37.8</td>
 <td align="center">169527823</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_syncbn/169527823/model_final_3b3c51.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_syncbn/169527823/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: scratch_mask_rcnn_R_50_FPN_3x_gn -->
 <tr><td align="left"><a href="configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml">GN (from scratch)</a></td>
 <td align="center">3x</td>
 <td align="center">0.400</td>
 <td align="center">0.069</td>
 <td align="center">9.8</td>
 <td align="center">39.9</td>
 <td align="center">36.6</td>
 <td align="center">138602908</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/model_final_01ca85.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: scratch_mask_rcnn_R_50_FPN_9x_gn -->
 <tr><td align="left"><a href="configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml">GN (from scratch)</a></td>
 <td align="center">9x</td>
 <td align="center">N/A</td>
 <td align="center">0.070</td>
 <td align="center">9.8</td>
 <td align="center">43.7</td>
 <td align="center">39.6</td>
 <td align="center">183808979</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn/183808979/model_final_da7b4c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn/183808979/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: scratch_mask_rcnn_R_50_FPN_9x_syncbn -->
 <tr><td align="left"><a href="configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml">SyncBN (from scratch)</a></td>
 <td align="center">9x</td>
 <td align="center">N/A</td>
 <td align="center">0.055</td>
 <td align="center">7.2</td>
 <td align="center">43.6</td>
 <td align="center">39.3</td>
 <td align="center">184226666</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn/184226666/model_final_5ce33e.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn/184226666/metrics.json">metrics</a></td>
 </tr>
 </tbody></table>
 A few very large models trained for a long time, for demo purposes. They are trained using multiple machines:
 <!--
 ./gen_html_table.py --config 'Misc/panoptic_*dconv*' 'Misc/cascade_*152*' --name "Panoptic FPN R101" "Mask R-CNN X152" --fields inference_speed mem box_AP mask_AP PQ
 # manually add TTA results
 -->
 <table><tbody>
 <!-- START TABLE -->
 <!-- TABLE HEADER -->
 <th valign="bottom">Name</th>
 <th valign="bottom">inference<br/>time<br/>(s/im)</th>
 <th valign="bottom">train<br/>mem<br/>(GB)</th>
 <th valign="bottom">box<br/>AP</th>
 <th valign="bottom">mask<br/>AP</th>
 <th valign="bottom">PQ</th>
 <th valign="bottom">model id</th>
 <th valign="bottom">download</th>
 <!-- TABLE BODY -->
 <!-- ROW: panoptic_fpn_R_101_dconv_cascade_gn_3x -->
 <tr><td align="left"><a href="configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml">Panoptic FPN R101</a></td>
 <td align="center">0.107</td>
 <td align="center">11.4</td>
 <td align="center">47.4</td>
 <td align="center">41.3</td>
 <td align="center">46.1</td>
 <td align="center">139797668</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x/139797668/model_final_be35db.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x/139797668/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv -->
 <tr><td align="left"><a href="configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml">Mask R-CNN X152</a></td>
 <td align="center">0.242</td>
 <td align="center">15.1</td>
 <td align="center">50.2</td>
 <td align="center">44.0</td>
 <td align="center"></td>
 <td align="center">18131413</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv/18131413/model_0039999_e76410.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv/18131413/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: TTA cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv -->
 <tr><td align="left">above + test-time aug.</td>
 <td align="center"></td>
 <td align="center"></td>
 <td align="center">51.9</td>
 <td align="center">45.9</td>
 <td align="center"></td>
 <td align="center"></td>
 <td align="center"></td>
 </tr>
 </tbody></table>
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/README.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/README.md
@ -0,0 +1,56 @@
 <img src=".github/Detectron2-Logo-Horz.svg" width="300" >
 Detectron2 is Facebook AI Research's next generation software system
 that implements state-of-the-art object detection algorithms.
 It is a ground-up rewrite of the previous version,
 [Detectron](https://github.com/facebookresearch/Detectron/),
 and it originates from [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark/).
 <div align="center">
  <img src="https://user-images.githubusercontent.com/1381301/66535560-d3422200-eace-11e9-9123-5535d469db19.png"/>
 </div>
 ### What's New
 * It is powered by the [PyTorch](https://pytorch.org) deep learning framework.
 * Includes more features such as panoptic segmentation, densepose, Cascade R-CNN, rotated bounding boxes, etc.
 * Can be used as a library to support [different projects](projects/) on top of it.
  We'll open source more research projects in this way.
 * It [trains much faster](https://detectron2.readthedocs.io/notes/benchmarks.html).
 See our [blog post](https://ai.facebook.com/blog/-detectron2-a-pytorch-based-modular-object-detection-library-/)
 to see more demos and learn about detectron2.
 ## Installation
 See [INSTALL.md](INSTALL.md).
 ## Quick Start
 See [GETTING_STARTED.md](GETTING_STARTED.md),
 or the [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5).
 Learn more at our [documentation](https://detectron2.readthedocs.org).
 And see [projects/](projects/) for some projects that are built on top of detectron2.
 ## Model Zoo and Baselines
 We provide a large set of baseline results and trained models available for download in the [Detectron2 Model Zoo](MODEL_ZOO.md).
 ## License
 Detectron2 is released under the [Apache 2.0 license](LICENSE).
 ## Citing Detectron2
 If you use Detectron2 in your research or wish to refer to the baseline results published in the [Model Zoo](MODEL_ZOO.md), please use the following BibTeX entry.
 ```BibTeX
@misc{wu2019detectron2,
  author =       {Yuxin Wu and Alexander Kirillov and Francisco Massa and
                  Wan-Yen Lo and Ross Girshick},
  title =        {Detectron2},
  howpublished = {\url{https://github.com/facebookresearch/detectron2}},
  year =         {2019}
 }
 ```
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-C4.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-C4.yaml
@ -0,0 +1,18 @@
 MODEL:
  META_ARCHITECTURE: "GeneralizedRCNN"
  RPN:
    PRE_NMS_TOPK_TEST: 6000
    POST_NMS_TOPK_TEST: 1000
  ROI_HEADS:
    NAME: "Res5ROIHeads"
 DATASETS:
  TRAIN: ("coco_2017_train",)
  TEST: ("coco_2017_val",)
 SOLVER:
  IMS_PER_BATCH: 16
  BASE_LR: 0.02
  STEPS: (60000, 80000)
  MAX_ITER: 90000
 INPUT:
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
 VERSION: 2
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-DilatedC5.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-DilatedC5.yaml
@ -0,0 +1,31 @@
 MODEL:
  META_ARCHITECTURE: "GeneralizedRCNN"
  RESNETS:
    OUT_FEATURES: ["res5"]
    RES5_DILATION: 2
  RPN:
    IN_FEATURES: ["res5"]
    PRE_NMS_TOPK_TEST: 6000
    POST_NMS_TOPK_TEST: 1000
  ROI_HEADS:
    NAME: "StandardROIHeads"
    IN_FEATURES: ["res5"]
  ROI_BOX_HEAD:
    NAME: "FastRCNNConvFCHead"
    NUM_FC: 2
    POOLER_RESOLUTION: 7
  ROI_MASK_HEAD:
    NAME: "MaskRCNNConvUpsampleHead"
    NUM_CONV: 4
    POOLER_RESOLUTION: 14
 DATASETS:
  TRAIN: ("coco_2017_train",)
  TEST: ("coco_2017_val",)
 SOLVER:
  IMS_PER_BATCH: 16
  BASE_LR: 0.02
  STEPS: (60000, 80000)
  MAX_ITER: 90000
 INPUT:
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
 VERSION: 2
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-FPN.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-FPN.yaml
@ -0,0 +1,42 @@
 MODEL:
  META_ARCHITECTURE: "GeneralizedRCNN"
  BACKBONE:
    NAME: "build_resnet_fpn_backbone"
  RESNETS:
    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
  FPN:
    IN_FEATURES: ["res2", "res3", "res4", "res5"]
  ANCHOR_GENERATOR:
    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
  RPN:
    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
    # Detectron1 uses 2000 proposals per-batch,
    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
    POST_NMS_TOPK_TRAIN: 1000
    POST_NMS_TOPK_TEST: 1000
  ROI_HEADS:
    NAME: "StandardROIHeads"
    IN_FEATURES: ["p2", "p3", "p4", "p5"]
  ROI_BOX_HEAD:
    NAME: "FastRCNNConvFCHead"
    NUM_FC: 2
    POOLER_RESOLUTION: 7
  ROI_MASK_HEAD:
    NAME: "MaskRCNNConvUpsampleHead"
    NUM_CONV: 4
    POOLER_RESOLUTION: 14
 DATASETS:
  TRAIN: ("coco_2017_train",)
  TEST: ("coco_2017_val",)
 SOLVER:
  IMS_PER_BATCH: 16
  BASE_LR: 0.02
  STEPS: (60000, 80000)
  MAX_ITER: 90000
 INPUT:
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
 VERSION: 2
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RetinaNet.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RetinaNet.yaml
@ -0,0 +1,24 @@
 MODEL:
  META_ARCHITECTURE: "RetinaNet"
  BACKBONE:
    NAME: "build_retinanet_resnet_fpn_backbone"
  RESNETS:
    OUT_FEATURES: ["res3", "res4", "res5"]
  ANCHOR_GENERATOR:
    SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
  FPN:
    IN_FEATURES: ["res3", "res4", "res5"]
  RETINANET:
    IOU_THRESHOLDS: [0.4, 0.5]
    IOU_LABELS: [0, -1, 1]
 DATASETS:
  TRAIN: ("coco_2017_train",)
  TEST: ("coco_2017_val",)
 SOLVER:
  IMS_PER_BATCH: 16
  BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
  STEPS: (60000, 80000)
  MAX_ITER: 90000
 INPUT:
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
 VERSION: 2
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml
@ -0,0 +1,17 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: False
  LOAD_PROPOSALS: True
  RESNETS:
    DEPTH: 50
  PROPOSAL_GENERATOR:
    NAME: "PrecomputedProposals"
 DATASETS:
  TRAIN: ("coco_2017_train",)
  PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", )
  TEST: ("coco_2017_val",)
  PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
 DATALOADER:
  # proposals are part of the dataset_dicts, and take a lot of RAM
  NUM_WORKERS: 2
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml
@ -0,0 +1,9 @@
 _BASE_: "../Base-RCNN-C4.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
  MASK_ON: False
  RESNETS:
    DEPTH: 101
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml
@ -0,0 +1,9 @@
 _BASE_: "../Base-RCNN-DilatedC5.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
  MASK_ON: False
  RESNETS:
    DEPTH: 101
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml
@ -0,0 +1,9 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
  MASK_ON: False
  RESNETS:
    DEPTH: 101
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml
@ -0,0 +1,6 @@
 _BASE_: "../Base-RCNN-C4.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: False
  RESNETS:
    DEPTH: 50
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml
@ -0,0 +1,9 @@
 _BASE_: "../Base-RCNN-C4.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: False
  RESNETS:
    DEPTH: 50
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml
@ -0,0 +1,6 @@
 _BASE_: "../Base-RCNN-DilatedC5.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: False
  RESNETS:
    DEPTH: 50
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml
@ -0,0 +1,9 @@
 _BASE_: "../Base-RCNN-DilatedC5.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: False
  RESNETS:
    DEPTH: 50
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml
@ -0,0 +1,6 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: False
  RESNETS:
    DEPTH: 50
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml
@ -0,0 +1,9 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: False
  RESNETS:
    DEPTH: 50
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml
@ -0,0 +1,13 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  MASK_ON: False
  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
  PIXEL_STD: [57.375, 57.120, 58.395]
  RESNETS:
    STRIDE_IN_1X1: False  # this is a C2 model
    NUM_GROUPS: 32
    WIDTH_PER_GROUP: 8
    DEPTH: 101
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml
@ -0,0 +1,8 @@
 _BASE_: "../Base-RetinaNet.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
  RESNETS:
    DEPTH: 101
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml
@ -0,0 +1,5 @@
 _BASE_: "../Base-RetinaNet.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  RESNETS:
    DEPTH: 50
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml
@ -0,0 +1,8 @@
 _BASE_: "../Base-RetinaNet.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  RESNETS:
    DEPTH: 50
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/rpn_R_50_C4_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/rpn_R_50_C4_1x.yaml
@ -0,0 +1,10 @@
 _BASE_: "../Base-RCNN-C4.yaml"
 MODEL:
  META_ARCHITECTURE: "ProposalNetwork"
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: False
  RESNETS:
    DEPTH: 50
  RPN:
    PRE_NMS_TOPK_TEST: 12000
    POST_NMS_TOPK_TEST: 2000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml
@ -0,0 +1,9 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  META_ARCHITECTURE: "ProposalNetwork"
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: False
  RESNETS:
    DEPTH: 50
  RPN:
    POST_NMS_TOPK_TEST: 2000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml
@ -0,0 +1,9 @@
 _BASE_: "../Base-RCNN-C4.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
  MASK_ON: True
  RESNETS:
    DEPTH: 101
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml
@ -0,0 +1,9 @@
 _BASE_: "../Base-RCNN-DilatedC5.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
  MASK_ON: True
  RESNETS:
    DEPTH: 101
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
@ -0,0 +1,9 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
  MASK_ON: True
  RESNETS:
    DEPTH: 101
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml
@ -0,0 +1,6 @@
 _BASE_: "../Base-RCNN-C4.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: True
  RESNETS:
    DEPTH: 50
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml
@ -0,0 +1,9 @@
 _BASE_: "../Base-RCNN-C4.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: True
  RESNETS:
    DEPTH: 50
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml
@ -0,0 +1,6 @@
 _BASE_: "../Base-RCNN-DilatedC5.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: True
  RESNETS:
    DEPTH: 50
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml
@ -0,0 +1,9 @@
 _BASE_: "../Base-RCNN-DilatedC5.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: True
  RESNETS:
    DEPTH: 50
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
@ -0,0 +1,6 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: True
  RESNETS:
    DEPTH: 50
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
@ -0,0 +1,9 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: True
  RESNETS:
    DEPTH: 50
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
@ -0,0 +1,13 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  MASK_ON: True
  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
  PIXEL_STD: [57.375, 57.120, 58.395]
  RESNETS:
    STRIDE_IN_1X1: False  # this is a C2 model
    NUM_GROUPS: 32
    WIDTH_PER_GROUP: 8
    DEPTH: 101
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml
@ -0,0 +1,15 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  KEYPOINT_ON: True
  ROI_HEADS:
    NUM_CLASSES: 1
  ROI_BOX_HEAD:
    SMOOTH_L1_BETA: 0.5  # Keypoint AP degrades (though box AP improves) when using plain L1 loss
  RPN:
    # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
    # 1000 proposals per-image is found to hurt box AP.
    # Therefore we increase it to 1500 per-image.
    POST_NMS_TOPK_TRAIN: 1500
 DATASETS:
  TRAIN: ("keypoints_coco_2017_train",)
  TEST: ("keypoints_coco_2017_val",)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml
@ -0,0 +1,8 @@
 _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
  RESNETS:
    DEPTH: 101
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml
@ -0,0 +1,5 @@
 _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  RESNETS:
    DEPTH: 50
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml
@ -0,0 +1,8 @@
 _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  RESNETS:
    DEPTH: 50
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
@ -0,0 +1,12 @@
 _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
  PIXEL_STD: [57.375, 57.120, 58.395]
  RESNETS:
    STRIDE_IN_1X1: False  # this is a C2 model
    NUM_GROUPS: 32
    WIDTH_PER_GROUP: 8
    DEPTH: 101
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml
@ -0,0 +1,9 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  META_ARCHITECTURE: "PanopticFPN"
  MASK_ON: True
  SEM_SEG_HEAD:
    LOSS_WEIGHT: 0.5
 DATASETS:
  TRAIN: ("coco_2017_train_panoptic_separated",)
  TEST: ("coco_2017_val_panoptic_separated",)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
@ -0,0 +1,8 @@
 _BASE_: "Base-Panoptic-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
  RESNETS:
    DEPTH: 101
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml
@ -0,0 +1,5 @@
 _BASE_: "Base-Panoptic-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  RESNETS:
    DEPTH: 50
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml
@ -0,0 +1,8 @@
 _BASE_: "Base-Panoptic-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  RESNETS:
    DEPTH: 50
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
@ -0,0 +1,27 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  # For better, more stable performance initialize from COCO
  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
  MASK_ON: True
  ROI_HEADS:
    NUM_CLASSES: 8
 # This is similar to the setting used in Mask R-CNN paper, Appendix A
 # But there are some differences, e.g., we did not initialize the output
 # layer using the corresponding classes from COCO
 INPUT:
  MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
  MIN_SIZE_TRAIN_SAMPLING: "choice"
  MIN_SIZE_TEST: 1024
  MAX_SIZE_TRAIN: 2048
  MAX_SIZE_TEST: 2048
 DATASETS:
  TRAIN: ("cityscapes_fine_instance_seg_train",)
  TEST: ("cityscapes_fine_instance_seg_val",)
 SOLVER:
  BASE_LR: 0.01
  STEPS: (18000,)
  MAX_ITER: 24000
  IMS_PER_BATCH: 8
 TEST:
  EVAL_PERIOD: 8000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/README.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/README.md
@ -0,0 +1,83 @@
 Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron.
 The differences in implementation details are shared in
 [Compatibility with Other Libraries](../../docs/notes/compatibility.md).
 The differences in model zoo's experimental settings include:
 * Use scale augmentation during training. This improves AP with lower training cost.
 * Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may
  affect other AP.
 * Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP.
 * Use `ROIAlignV2`. This does not significantly affect AP.
 In this directory, we provide a few configs that __do not__ have the above changes.
 They mimic Detectron's behavior as close as possible,
 and provide a fair comparison of accuracy and speed against Detectron.
 <!--
 ./gen_html_table.py --config 'Detectron1-Comparisons/*.yaml' --name "Faster R-CNN" "Keypoint R-CNN" "Mask R-CNN" --fields lr_sched train_speed inference_speed mem box_AP mask_AP keypoint_AP --base-dir ../../../configs/Detectron1-Comparisons
 -->
 <table><tbody>
 <!-- START TABLE -->
 <!-- TABLE HEADER -->
 <th valign="bottom">Name</th>
 <th valign="bottom">lr<br/>sched</th>
 <th valign="bottom">train<br/>time<br/>(s/iter)</th>
 <th valign="bottom">inference<br/>time<br/>(s/im)</th>
 <th valign="bottom">train<br/>mem<br/>(GB)</th>
 <th valign="bottom">box<br/>AP</th>
 <th valign="bottom">mask<br/>AP</th>
 <th valign="bottom">kp.<br/>AP</th>
 <th valign="bottom">model id</th>
 <th valign="bottom">download</th>
 <!-- TABLE BODY -->
 <!-- ROW: faster_rcnn_R_50_FPN_noaug_1x -->
 <tr><td align="left"><a href="faster_rcnn_R_50_FPN_noaug_1x.yaml">Faster R-CNN</a></td>
 <td align="center">1x</td>
 <td align="center">0.219</td>
 <td align="center">0.038</td>
 <td align="center">3.1</td>
 <td align="center">36.9</td>
 <td align="center"></td>
 <td align="center"></td>
 <td align="center">137781054</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/model_final_7ab50c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: keypoint_rcnn_R_50_FPN_1x -->
 <tr><td align="left"><a href="keypoint_rcnn_R_50_FPN_1x.yaml">Keypoint R-CNN</a></td>
 <td align="center">1x</td>
 <td align="center">0.313</td>
 <td align="center">0.071</td>
 <td align="center">5.0</td>
 <td align="center">53.1</td>
 <td align="center"></td>
 <td align="center">64.2</td>
 <td align="center">137781195</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/model_final_cce136.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/metrics.json">metrics</a></td>
 </tr>
 <!-- ROW: mask_rcnn_R_50_FPN_noaug_1x -->
 <tr><td align="left"><a href="mask_rcnn_R_50_FPN_noaug_1x.yaml">Mask R-CNN</a></td>
 <td align="center">1x</td>
 <td align="center">0.273</td>
 <td align="center">0.043</td>
 <td align="center">3.4</td>
 <td align="center">37.8</td>
 <td align="center">34.9</td>
 <td align="center"></td>
 <td align="center">137781281</td>
 <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/model_final_62ca52.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/metrics.json">metrics</a></td>
 </tr>
 </tbody></table>
 ## Comparisons:
 * Faster R-CNN: Detectron's AP is 36.7, similar to ours.
 * Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's
  [bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be
 	compensated back by some parameter tuning.
 * Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation.
 For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html).
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
@ -0,0 +1,17 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: False
  RESNETS:
    DEPTH: 50
  # Detectron1 uses smooth L1 loss with some magic beta values.
  # The defaults are changed to L1 loss in Detectron2.
  RPN:
    SMOOTH_L1_BETA: 0.1111
  ROI_BOX_HEAD:
    SMOOTH_L1_BETA: 1.0
    POOLER_SAMPLING_RATIO: 2
    POOLER_TYPE: "ROIAlign"
 INPUT:
  # no scale augmentation
  MIN_SIZE_TRAIN: (800, )
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
@ -0,0 +1,27 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  KEYPOINT_ON: True
  RESNETS:
    DEPTH: 50
  ROI_HEADS:
    NUM_CLASSES: 1
  ROI_KEYPOINT_HEAD:
    POOLER_RESOLUTION: 14
    POOLER_SAMPLING_RATIO: 2
    POOLER_TYPE: "ROIAlign"
  # Detectron1 uses smooth L1 loss with some magic beta values.
  # The defaults are changed to L1 loss in Detectron2.
  ROI_BOX_HEAD:
    SMOOTH_L1_BETA: 1.0
    POOLER_SAMPLING_RATIO: 2
    POOLER_TYPE: "ROIAlign"
  RPN:
    SMOOTH_L1_BETA: 0.1111
    # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2
    # 1000 proposals per-image is found to hurt box AP.
    # Therefore we increase it to 1500 per-image.
    POST_NMS_TOPK_TRAIN: 1500
 DATASETS:
  TRAIN: ("keypoints_coco_2017_train",)
  TEST: ("keypoints_coco_2017_val",)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
@ -0,0 +1,20 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: True
  RESNETS:
    DEPTH: 50
  # Detectron1 uses smooth L1 loss with some magic beta values.
  # The defaults are changed to L1 loss in Detectron2.
  RPN:
    SMOOTH_L1_BETA: 0.1111
  ROI_BOX_HEAD:
    SMOOTH_L1_BETA: 1.0
    POOLER_SAMPLING_RATIO: 2
    POOLER_TYPE: "ROIAlign"
  ROI_MASK_HEAD:
    POOLER_SAMPLING_RATIO: 2
    POOLER_TYPE: "ROIAlign"
 INPUT:
  # no scale augmentation
  MIN_SIZE_TRAIN: (800, )
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
@ -0,0 +1,19 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
  MASK_ON: True
  RESNETS:
    DEPTH: 101
  ROI_HEADS:
    NUM_CLASSES: 1230
    SCORE_THRESH_TEST: 0.0001
 INPUT:
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
 DATASETS:
  TRAIN: ("lvis_v0.5_train",)
  TEST: ("lvis_v0.5_val",)
 TEST:
  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
 DATALOADER:
  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
  REPEAT_THRESHOLD: 0.001
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
@ -0,0 +1,19 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: True
  RESNETS:
    DEPTH: 50
  ROI_HEADS:
    NUM_CLASSES: 1230
    SCORE_THRESH_TEST: 0.0001
 INPUT:
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
 DATASETS:
  TRAIN: ("lvis_v0.5_train",)
  TEST: ("lvis_v0.5_val",)
 TEST:
  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
 DATALOADER:
  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
  REPEAT_THRESHOLD: 0.001
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
@ -0,0 +1,23 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
  PIXEL_STD: [57.375, 57.120, 58.395]
  MASK_ON: True
  RESNETS:
    STRIDE_IN_1X1: False  # this is a C2 model
    NUM_GROUPS: 32
    WIDTH_PER_GROUP: 8
    DEPTH: 101
  ROI_HEADS:
    NUM_CLASSES: 1230
    SCORE_THRESH_TEST: 0.0001
 INPUT:
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
 DATASETS:
  TRAIN: ("lvis_v0.5_train",)
  TEST: ("lvis_v0.5_val",)
 TEST:
  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
 DATALOADER:
  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
  REPEAT_THRESHOLD: 0.001
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
@ -0,0 +1,12 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: True
  RESNETS:
    DEPTH: 50
  ROI_HEADS:
    NAME: CascadeROIHeads
  ROI_BOX_HEAD:
    CLS_AGNOSTIC_BBOX_REG: True
  RPN:
    POST_NMS_TOPK_TRAIN: 2000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
@ -0,0 +1,15 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: True
  RESNETS:
    DEPTH: 50
  ROI_HEADS:
    NAME: CascadeROIHeads
  ROI_BOX_HEAD:
    CLS_AGNOSTIC_BBOX_REG: True
  RPN:
    POST_NMS_TOPK_TRAIN: 2000
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
@ -0,0 +1,36 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  MASK_ON: True
  WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
  RESNETS:
    STRIDE_IN_1X1: False  # this is a C2 model
    NUM_GROUPS: 32
    WIDTH_PER_GROUP: 8
    DEPTH: 152
    DEFORM_ON_PER_STAGE: [False, True, True, True]
  ROI_HEADS:
    NAME: "CascadeROIHeads"
  ROI_BOX_HEAD:
    NAME: "FastRCNNConvFCHead"
    NUM_CONV: 4
    NUM_FC: 1
    NORM: "GN"
    CLS_AGNOSTIC_BBOX_REG: True
  ROI_MASK_HEAD:
    NUM_CONV: 8
    NORM: "GN"
  RPN:
    POST_NMS_TOPK_TRAIN: 2000
 SOLVER:
  IMS_PER_BATCH: 128
  STEPS: (35000, 45000)
  MAX_ITER: 50000
  BASE_LR: 0.16
 INPUT:
  MIN_SIZE_TRAIN: (640, 864)
  MIN_SIZE_TRAIN_SAMPLING: "range"
  MAX_SIZE_TRAIN: 1440
  CROP:
    ENABLED: True
 TEST:
  EVAL_PERIOD: 2500
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv_parsing.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv_parsing.yaml
@ -0,0 +1,42 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  MASK_ON: True
 #   WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
  WEIGHTS: "model_0039999_e76410.pkl"
  RESNETS:
    STRIDE_IN_1X1: False  # this is a C2 model
    NUM_GROUPS: 32
    WIDTH_PER_GROUP: 8
    DEPTH: 152
    DEFORM_ON_PER_STAGE: [False, True, True, True]
  ROI_HEADS:
    NAME: "CascadeROIHeads"
    NUM_CLASSES: 1
  ROI_BOX_HEAD:
    NAME: "FastRCNNConvFCHead"
    NUM_CONV: 4
    NUM_FC: 1
    NORM: "GN"
    CLS_AGNOSTIC_BBOX_REG: True
  ROI_MASK_HEAD:
    NUM_CONV: 8
    NORM: "GN"
  RPN:
    POST_NMS_TOPK_TRAIN: 2000
 SOLVER:
 #   IMS_PER_BATCH: 128
  IMS_PER_BATCH: 1
  STEPS: (35000, 45000)
  MAX_ITER: 50000
  BASE_LR: 0.16
 INPUT:
  MIN_SIZE_TRAIN: (640, 864)
  MIN_SIZE_TRAIN_SAMPLING: "range"
  MAX_SIZE_TRAIN: 1440
  CROP:
    ENABLED: True
 TEST:
  EVAL_PERIOD: 2500
 DATASETS:
  TRAIN: ("CIHP_train","VIP_trainval")
  TEST: ("CIHP_val",)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/demo.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/demo.yaml
@ -0,0 +1,25 @@
 _BASE_: "cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml"
 MODEL:
  MASK_ON: True
  ROI_HEADS:
    NMS_THRESH_TEST: 0.95
    SCORE_THRESH_TEST: 0.5
    NUM_CLASSES: 1
 SOLVER:
  IMS_PER_BATCH: 1
  STEPS: (30000, 45000)
  MAX_ITER: 50000
  BASE_LR: 0.02
 INPUT:
  MIN_SIZE_TRAIN: (640, 864)
  MIN_SIZE_TRAIN_SAMPLING: "range"
  MAX_SIZE_TRAIN: 1440
  CROP:
    ENABLED: True
 TEST:
  AUG:
    ENABLED: True
 DATASETS:
  TRAIN: ("demo_train",)
  TEST: ("demo_val",)
 OUTPUT_DIR: "../../data/DemoDataset/detectron2_prediction"
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
@ -0,0 +1,10 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: True
  RESNETS:
    DEPTH: 50
  ROI_BOX_HEAD:
    CLS_AGNOSTIC_BBOX_REG: True
  ROI_MASK_HEAD:
    CLS_AGNOSTIC_MASK: True
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
@ -0,0 +1,8 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: True
  RESNETS:
    DEPTH: 50
    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
    DEFORM_MODULATED: False
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
@ -0,0 +1,11 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: True
  RESNETS:
    DEPTH: 50
    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
    DEFORM_MODULATED: False
 SOLVER:
  STEPS: (210000, 250000)
  MAX_ITER: 270000
--- a/Show More
+++ b/Show More