Add at new repo again

This commit is contained in:
2025-01-28 21:48:35 +00:00
commit 6e660ddb3c
564 changed files with 75575 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
This directory contains:
1. A script that converts a detectron2 model to caffe2 format.
2. An example that loads a Mask R-CNN model in caffe2 format and runs inference.
See [tutorial](https://detectron2.readthedocs.io/tutorials/deployment.html)
for their usage.

View File

@@ -0,0 +1,98 @@
#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import argparse
import os
import onnx
import torch
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.data import build_detection_test_loader
from detectron2.evaluation import COCOEvaluator, inference_on_dataset, print_csv_format
from detectron2.export import Caffe2Tracer, add_export_config
from detectron2.modeling import build_model
from detectron2.utils.logger import setup_logger
def setup_cfg(args):
cfg = get_cfg()
# cuda context is initialized before creating dataloader, so we don't fork anymore
cfg.DATALOADER.NUM_WORKERS = 0
cfg = add_export_config(cfg)
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
if cfg.MODEL.DEVICE != "cpu":
TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2])
assert TORCH_VERSION >= (1, 5), "PyTorch>=1.5 required for GPU conversion!"
return cfg
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Convert a model using caffe2 tracing.")
parser.add_argument(
"--format",
choices=["caffe2", "onnx", "torchscript"],
help="output format",
default="caffe2",
)
parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
parser.add_argument("--run-eval", action="store_true")
parser.add_argument("--output", help="output directory for the converted model")
parser.add_argument(
"opts",
help="Modify config options using the command-line",
default=None,
nargs=argparse.REMAINDER,
)
args = parser.parse_args()
logger = setup_logger()
logger.info("Command line arguments: " + str(args))
os.makedirs(args.output, exist_ok=True)
cfg = setup_cfg(args)
# create a torch model
torch_model = build_model(cfg)
DetectionCheckpointer(torch_model).resume_or_load(cfg.MODEL.WEIGHTS)
# get a sample data
data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
first_batch = next(iter(data_loader))
# convert and save caffe2 model
tracer = Caffe2Tracer(cfg, torch_model, first_batch)
if args.format == "caffe2":
caffe2_model = tracer.export_caffe2()
caffe2_model.save_protobuf(args.output)
# draw the caffe2 graph
caffe2_model.save_graph(os.path.join(args.output, "model.svg"), inputs=first_batch)
elif args.format == "onnx":
onnx_model = tracer.export_onnx()
onnx.save(onnx_model, os.path.join(args.output, "model.onnx"))
elif args.format == "torchscript":
script_model = tracer.export_torchscript()
script_model.save(os.path.join(args.output, "model.ts"))
# Recursively print IR of all modules
with open(os.path.join(args.output, "model_ts_IR.txt"), "w") as f:
try:
f.write(script_model._actual_script_module._c.dump_to_str(True, False, False))
except AttributeError:
pass
# Print IR of the entire graph (all submodules inlined)
with open(os.path.join(args.output, "model_ts_IR_inlined.txt"), "w") as f:
f.write(str(script_model.inlined_graph))
# Print the model structure in pytorch style
with open(os.path.join(args.output, "model.txt"), "w") as f:
f.write(str(script_model))
# run evaluation with the converted model
if args.run_eval:
assert args.format == "caffe2", "Python inference in other format is not yet supported."
dataset = cfg.DATASETS.TEST[0]
data_loader = build_detection_test_loader(cfg, dataset)
# NOTE: hard-coded evaluator. change to the evaluator for your dataset
evaluator = COCOEvaluator(dataset, cfg, True, args.output)
metrics = inference_on_dataset(caffe2_model, data_loader, evaluator)
print_csv_format(metrics)

View File

@@ -0,0 +1,119 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
#include <c10/util/Flags.h>
#include <caffe2/core/blob.h>
#include <caffe2/core/common.h>
#include <caffe2/core/init.h>
#include <caffe2/core/net.h>
#include <caffe2/core/workspace.h>
#include <caffe2/utils/proto_utils.h>
#include <opencv2/opencv.hpp>
#include <cassert>
#include <chrono>
#include <iostream>
#include <string>
C10_DEFINE_string(predict_net, "", "path to model.pb");
C10_DEFINE_string(init_net, "", "path to model_init.pb");
C10_DEFINE_string(input, "", "path to input image");
using namespace std;
using namespace caffe2;
int main(int argc, char** argv) {
caffe2::GlobalInit(&argc, &argv);
string predictNetPath = FLAGS_predict_net;
string initNetPath = FLAGS_init_net;
cv::Mat input = cv::imread(FLAGS_input, cv::IMREAD_COLOR);
const int height = input.rows;
const int width = input.cols;
// FPN models require divisibility of 32
assert(height % 32 == 0 && width % 32 == 0);
const int batch = 1;
const int channels = 3;
// initialize Net and Workspace
caffe2::NetDef initNet_, predictNet_;
CAFFE_ENFORCE(ReadProtoFromFile(initNetPath, &initNet_));
CAFFE_ENFORCE(ReadProtoFromFile(predictNetPath, &predictNet_));
Workspace workSpace;
for (auto& str : predictNet_.external_input()) {
workSpace.CreateBlob(str);
}
CAFFE_ENFORCE(workSpace.CreateNet(predictNet_));
CAFFE_ENFORCE(workSpace.RunNetOnce(initNet_));
// setup inputs
auto data = BlobGetMutableTensor(workSpace.GetBlob("data"), caffe2::CPU);
data->Resize(batch, channels, height, width);
float* ptr = data->mutable_data<float>();
// HWC to CHW
for (int c = 0; c < 3; ++c) {
for (int i = 0; i < height * width; ++i) {
ptr[c * height * width + i] = static_cast<float>(input.data[3 * i + c]);
}
}
auto im_info =
BlobGetMutableTensor(workSpace.GetBlob("im_info"), caffe2::CPU);
im_info->Resize(batch, 3);
float* im_info_ptr = im_info->mutable_data<float>();
im_info_ptr[0] = height;
im_info_ptr[1] = width;
im_info_ptr[2] = 1.0;
// run the network
CAFFE_ENFORCE(workSpace.RunNet(predictNet_.name()));
// run 3 more times to benchmark
int N_benchmark = 3;
auto start_time = chrono::high_resolution_clock::now();
for (int i = 0; i < N_benchmark; ++i) {
CAFFE_ENFORCE(workSpace.RunNet(predictNet_.name()));
}
auto end_time = chrono::high_resolution_clock::now();
auto ms = chrono::duration_cast<chrono::microseconds>(end_time - start_time)
.count();
cout << "Latency (should vary with different inputs): "
<< ms * 1.0 / 1e6 / N_benchmark << " seconds" << endl;
// parse Mask R-CNN outputs
caffe2::Tensor bbox(
workSpace.GetBlob("bbox_nms")->Get<caffe2::Tensor>(), caffe2::CPU);
caffe2::Tensor scores(
workSpace.GetBlob("score_nms")->Get<caffe2::Tensor>(), caffe2::CPU);
caffe2::Tensor labels(
workSpace.GetBlob("class_nms")->Get<caffe2::Tensor>(), caffe2::CPU);
caffe2::Tensor mask_probs(
workSpace.GetBlob("mask_fcn_probs")->Get<caffe2::Tensor>(), caffe2::CPU);
cout << "bbox:" << bbox.DebugString() << endl;
cout << "scores:" << scores.DebugString() << endl;
cout << "labels:" << labels.DebugString() << endl;
cout << "mask_probs: " << mask_probs.DebugString() << endl;
int num_instances = bbox.sizes()[0];
for (int i = 0; i < num_instances; ++i) {
float score = scores.data<float>()[i];
if (score < 0.6)
continue; // skip them
const float* box = bbox.data<float>() + i * 4;
int label = labels.data<float>()[i];
cout << "Prediction " << i << ", xyxy=(";
cout << box[0] << ", " << box[1] << ", " << box[2] << ", " << box[3]
<< "); score=" << score << "; label=" << label << endl;
const float* mask = mask_probs.data<float>() +
i * mask_probs.size_from_dim(1) + label * mask_probs.size_from_dim(2);
// save the 28x28 mask
cv::Mat cv_mask(28, 28, CV_32FC1);
memcpy(cv_mask.data, mask, 28 * 28 * sizeof(float));
cv::imwrite("mask" + std::to_string(i) + ".png", cv_mask * 255.);
}
return 0;
}

View File

@@ -0,0 +1,71 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#include <opencv2/opencv.hpp>
#include <iostream>
#include <string>
#include <torch/csrc/autograd/grad_mode.h>
#include <torch/script.h>
using namespace std;
// experimental. don't use
int main(int argc, const char* argv[]) {
if (argc != 3) {
return 1;
}
std::string image_file = argv[2];
torch::autograd::AutoGradMode guard(false);
auto module = torch::jit::load(argv[1]);
assert(module.buffers().size() > 0);
// Assume that the entire model is on the same device.
// We just put input to this device.
auto device = (*begin(module.buffers())).device();
cv::Mat input_img = cv::imread(image_file, cv::IMREAD_COLOR);
const int height = input_img.rows;
const int width = input_img.cols;
// FPN models require divisibility of 32
assert(height % 32 == 0 && width % 32 == 0);
const int channels = 3;
auto input = torch::from_blob(
input_img.data, {1, height, width, channels}, torch::kUInt8);
// NHWC to NCHW
input = input.to(device, torch::kFloat).permute({0, 3, 1, 2}).contiguous();
std::array<float, 3> im_info_data{height * 1.0f, width * 1.0f, 1.0f};
auto im_info = torch::from_blob(im_info_data.data(), {1, 3}).to(device);
// run the network
auto output = module.forward({std::make_tuple(input, im_info)});
// run 3 more times to benchmark
int N_benchmark = 3;
auto start_time = chrono::high_resolution_clock::now();
for (int i = 0; i < N_benchmark; ++i) {
output = module.forward({std::make_tuple(input, im_info)});
}
auto end_time = chrono::high_resolution_clock::now();
auto ms = chrono::duration_cast<chrono::microseconds>(end_time - start_time)
.count();
cout << "Latency (should vary with different inputs): "
<< ms * 1.0 / 1e6 / N_benchmark << " seconds" << endl;
auto outputs = output.toTuple()->elements();
// parse Mask R-CNN outputs
auto bbox = outputs[0].toTensor(), scores = outputs[1].toTensor(),
labels = outputs[2].toTensor(), mask_probs = outputs[3].toTensor();
cout << "bbox: " << bbox.toString() << " " << bbox.sizes() << endl;
cout << "scores: " << scores.toString() << " " << scores.sizes() << endl;
cout << "labels: " << labels.toString() << " " << labels.sizes() << endl;
cout << "mask_probs: " << mask_probs.toString() << " " << mask_probs.sizes()
<< endl;
int num_instances = bbox.sizes()[0];
cout << bbox << endl;
return 0;
}