Add at new repo again

2025-01-28 21:48:35 +00:00
commit 6e660ddb3c
564 changed files with 75575 additions and 0 deletions
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/README.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/README.md
@@ -0,0 +1,9 @@
+
+This directory contains:
+
+1. A script that converts a detectron2 model to caffe2 format.
+
+2. An example that loads a Mask R-CNN model in caffe2 format and runs inference.
+
+See [tutorial](https://detectron2.readthedocs.io/tutorials/deployment.html)
+for their usage.
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/caffe2_converter.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/caffe2_converter.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+import argparse
+import os
+import onnx
+import torch
+
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import get_cfg
+from detectron2.data import build_detection_test_loader
+from detectron2.evaluation import COCOEvaluator, inference_on_dataset, print_csv_format
+from detectron2.export import Caffe2Tracer, add_export_config
+from detectron2.modeling import build_model
+from detectron2.utils.logger import setup_logger
+
+
+def setup_cfg(args):
+    cfg = get_cfg()
+    # cuda context is initialized before creating dataloader, so we don't fork anymore
+    cfg.DATALOADER.NUM_WORKERS = 0
+    cfg = add_export_config(cfg)
+    cfg.merge_from_file(args.config_file)
+    cfg.merge_from_list(args.opts)
+    cfg.freeze()
+    if cfg.MODEL.DEVICE != "cpu":
+        TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2])
+        assert TORCH_VERSION >= (1, 5), "PyTorch>=1.5 required for GPU conversion!"
+    return cfg
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert a model using caffe2 tracing.")
+    parser.add_argument(
+        "--format",
+        choices=["caffe2", "onnx", "torchscript"],
+        help="output format",
+        default="caffe2",
+    )
+    parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
+    parser.add_argument("--run-eval", action="store_true")
+    parser.add_argument("--output", help="output directory for the converted model")
+    parser.add_argument(
+        "opts",
+        help="Modify config options using the command-line",
+        default=None,
+        nargs=argparse.REMAINDER,
+    )
+    args = parser.parse_args()
+    logger = setup_logger()
+    logger.info("Command line arguments: " + str(args))
+    os.makedirs(args.output, exist_ok=True)
+
+    cfg = setup_cfg(args)
+
+    # create a torch model
+    torch_model = build_model(cfg)
+    DetectionCheckpointer(torch_model).resume_or_load(cfg.MODEL.WEIGHTS)
+
+    # get a sample data
+    data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
+    first_batch = next(iter(data_loader))
+
+    # convert and save caffe2 model
+    tracer = Caffe2Tracer(cfg, torch_model, first_batch)
+    if args.format == "caffe2":
+        caffe2_model = tracer.export_caffe2()
+        caffe2_model.save_protobuf(args.output)
+        # draw the caffe2 graph
+        caffe2_model.save_graph(os.path.join(args.output, "model.svg"), inputs=first_batch)
+    elif args.format == "onnx":
+        onnx_model = tracer.export_onnx()
+        onnx.save(onnx_model, os.path.join(args.output, "model.onnx"))
+    elif args.format == "torchscript":
+        script_model = tracer.export_torchscript()
+        script_model.save(os.path.join(args.output, "model.ts"))
+
+        # Recursively print IR of all modules
+        with open(os.path.join(args.output, "model_ts_IR.txt"), "w") as f:
+            try:
+                f.write(script_model._actual_script_module._c.dump_to_str(True, False, False))
+            except AttributeError:
+                pass
+        # Print IR of the entire graph (all submodules inlined)
+        with open(os.path.join(args.output, "model_ts_IR_inlined.txt"), "w") as f:
+            f.write(str(script_model.inlined_graph))
+        # Print the model structure in pytorch style
+        with open(os.path.join(args.output, "model.txt"), "w") as f:
+            f.write(str(script_model))
+
+    # run evaluation with the converted model
+    if args.run_eval:
+        assert args.format == "caffe2", "Python inference in other format is not yet supported."
+        dataset = cfg.DATASETS.TEST[0]
+        data_loader = build_detection_test_loader(cfg, dataset)
+        # NOTE: hard-coded evaluator. change to the evaluator for your dataset
+        evaluator = COCOEvaluator(dataset, cfg, True, args.output)
+        metrics = inference_on_dataset(caffe2_model, data_loader, evaluator)
+        print_csv_format(metrics)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/caffe2_mask_rcnn.cpp
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/caffe2_mask_rcnn.cpp
@@ -0,0 +1,119 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+#include <c10/util/Flags.h>
+#include <caffe2/core/blob.h>
+#include <caffe2/core/common.h>
+#include <caffe2/core/init.h>
+#include <caffe2/core/net.h>
+#include <caffe2/core/workspace.h>
+#include <caffe2/utils/proto_utils.h>
+
+#include <opencv2/opencv.hpp>
+#include <cassert>
+#include <chrono>
+#include <iostream>
+#include <string>
+
+C10_DEFINE_string(predict_net, "", "path to model.pb");
+C10_DEFINE_string(init_net, "", "path to model_init.pb");
+C10_DEFINE_string(input, "", "path to input image");
+
+using namespace std;
+using namespace caffe2;
+
+int main(int argc, char** argv) {
+  caffe2::GlobalInit(&argc, &argv);
+  string predictNetPath = FLAGS_predict_net;
+  string initNetPath = FLAGS_init_net;
+  cv::Mat input = cv::imread(FLAGS_input, cv::IMREAD_COLOR);
+
+  const int height = input.rows;
+  const int width = input.cols;
+  // FPN models require divisibility of 32
+  assert(height % 32 == 0 && width % 32 == 0);
+  const int batch = 1;
+  const int channels = 3;
+
+  // initialize Net and Workspace
+  caffe2::NetDef initNet_, predictNet_;
+  CAFFE_ENFORCE(ReadProtoFromFile(initNetPath, &initNet_));
+  CAFFE_ENFORCE(ReadProtoFromFile(predictNetPath, &predictNet_));
+
+  Workspace workSpace;
+  for (auto& str : predictNet_.external_input()) {
+    workSpace.CreateBlob(str);
+  }
+  CAFFE_ENFORCE(workSpace.CreateNet(predictNet_));
+  CAFFE_ENFORCE(workSpace.RunNetOnce(initNet_));
+
+  // setup inputs
+  auto data = BlobGetMutableTensor(workSpace.GetBlob("data"), caffe2::CPU);
+  data->Resize(batch, channels, height, width);
+  float* ptr = data->mutable_data<float>();
+  // HWC to CHW
+  for (int c = 0; c < 3; ++c) {
+    for (int i = 0; i < height * width; ++i) {
+      ptr[c * height * width + i] = static_cast<float>(input.data[3 * i + c]);
+    }
+  }
+
+  auto im_info =
+      BlobGetMutableTensor(workSpace.GetBlob("im_info"), caffe2::CPU);
+  im_info->Resize(batch, 3);
+  float* im_info_ptr = im_info->mutable_data<float>();
+  im_info_ptr[0] = height;
+  im_info_ptr[1] = width;
+  im_info_ptr[2] = 1.0;
+
+  // run the network
+  CAFFE_ENFORCE(workSpace.RunNet(predictNet_.name()));
+
+  // run 3 more times to benchmark
+  int N_benchmark = 3;
+  auto start_time = chrono::high_resolution_clock::now();
+  for (int i = 0; i < N_benchmark; ++i) {
+    CAFFE_ENFORCE(workSpace.RunNet(predictNet_.name()));
+  }
+  auto end_time = chrono::high_resolution_clock::now();
+  auto ms = chrono::duration_cast<chrono::microseconds>(end_time - start_time)
+                .count();
+  cout << "Latency (should vary with different inputs): "
+       << ms * 1.0 / 1e6 / N_benchmark << " seconds" << endl;
+
+  // parse Mask R-CNN outputs
+  caffe2::Tensor bbox(
+      workSpace.GetBlob("bbox_nms")->Get<caffe2::Tensor>(), caffe2::CPU);
+  caffe2::Tensor scores(
+      workSpace.GetBlob("score_nms")->Get<caffe2::Tensor>(), caffe2::CPU);
+  caffe2::Tensor labels(
+      workSpace.GetBlob("class_nms")->Get<caffe2::Tensor>(), caffe2::CPU);
+  caffe2::Tensor mask_probs(
+      workSpace.GetBlob("mask_fcn_probs")->Get<caffe2::Tensor>(), caffe2::CPU);
+  cout << "bbox:" << bbox.DebugString() << endl;
+  cout << "scores:" << scores.DebugString() << endl;
+  cout << "labels:" << labels.DebugString() << endl;
+  cout << "mask_probs: " << mask_probs.DebugString() << endl;
+
+  int num_instances = bbox.sizes()[0];
+  for (int i = 0; i < num_instances; ++i) {
+    float score = scores.data<float>()[i];
+    if (score < 0.6)
+      continue; // skip them
+
+    const float* box = bbox.data<float>() + i * 4;
+    int label = labels.data<float>()[i];
+
+    cout << "Prediction " << i << ", xyxy=(";
+    cout << box[0] << ", " << box[1] << ", " << box[2] << ", " << box[3]
+         << "); score=" << score << "; label=" << label << endl;
+
+    const float* mask = mask_probs.data<float>() +
+        i * mask_probs.size_from_dim(1) + label * mask_probs.size_from_dim(2);
+
+    // save the 28x28 mask
+    cv::Mat cv_mask(28, 28, CV_32FC1);
+    memcpy(cv_mask.data, mask, 28 * 28 * sizeof(float));
+    cv::imwrite("mask" + std::to_string(i) + ".png", cv_mask * 255.);
+  }
+  return 0;
+}
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/torchscript_traced_mask_rcnn.cpp
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/torchscript_traced_mask_rcnn.cpp
@@ -0,0 +1,71 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+#include <opencv2/opencv.hpp>
+#include <iostream>
+#include <string>
+
+#include <torch/csrc/autograd/grad_mode.h>
+#include <torch/script.h>
+
+using namespace std;
+
+// experimental. don't use
+int main(int argc, const char* argv[]) {
+  if (argc != 3) {
+    return 1;
+  }
+  std::string image_file = argv[2];
+
+  torch::autograd::AutoGradMode guard(false);
+  auto module = torch::jit::load(argv[1]);
+
+  assert(module.buffers().size() > 0);
+  // Assume that the entire model is on the same device.
+  // We just put input to this device.
+  auto device = (*begin(module.buffers())).device();
+
+  cv::Mat input_img = cv::imread(image_file, cv::IMREAD_COLOR);
+  const int height = input_img.rows;
+  const int width = input_img.cols;
+  // FPN models require divisibility of 32
+  assert(height % 32 == 0 && width % 32 == 0);
+  const int channels = 3;
+
+  auto input = torch::from_blob(
+      input_img.data, {1, height, width, channels}, torch::kUInt8);
+  // NHWC to NCHW
+  input = input.to(device, torch::kFloat).permute({0, 3, 1, 2}).contiguous();
+
+  std::array<float, 3> im_info_data{height * 1.0f, width * 1.0f, 1.0f};
+  auto im_info = torch::from_blob(im_info_data.data(), {1, 3}).to(device);
+
+  // run the network
+  auto output = module.forward({std::make_tuple(input, im_info)});
+
+  // run 3 more times to benchmark
+  int N_benchmark = 3;
+  auto start_time = chrono::high_resolution_clock::now();
+  for (int i = 0; i < N_benchmark; ++i) {
+    output = module.forward({std::make_tuple(input, im_info)});
+  }
+  auto end_time = chrono::high_resolution_clock::now();
+  auto ms = chrono::duration_cast<chrono::microseconds>(end_time - start_time)
+                .count();
+  cout << "Latency (should vary with different inputs): "
+       << ms * 1.0 / 1e6 / N_benchmark << " seconds" << endl;
+
+  auto outputs = output.toTuple()->elements();
+  // parse Mask R-CNN outputs
+  auto bbox = outputs[0].toTensor(), scores = outputs[1].toTensor(),
+       labels = outputs[2].toTensor(), mask_probs = outputs[3].toTensor();
+
+  cout << "bbox: " << bbox.toString() << " " << bbox.sizes() << endl;
+  cout << "scores: " << scores.toString() << " " << scores.sizes() << endl;
+  cout << "labels: " << labels.toString() << " " << labels.sizes() << endl;
+  cout << "mask_probs: " << mask_probs.toString() << " " << mask_probs.sizes()
+       << endl;
+
+  int num_instances = bbox.sizes()[0];
+  cout << bbox << endl;
+  return 0;
+}