将 Pytorch 模型部署到 C 环境
三种部署Pytorch模型到C++环境的方式
文章目录
前言
由于工作原因需要部署Pytorch模型到c++环境下,目前大概有三种方式。
1、pytorch转成onnx文件后,通过opencv读取。
2、pytroch转成onnx文件后,通过onnxruntime读取。
3、利用libtorch库,也就是pytorch的c++版。
一、pytorch2onnx
首先的将pytorch训练好的模型导出onnx文件。
安装所需包:
pip install onnx
pip install onnxruntime
from nets.deeplabv3 import deeplabv3 #这里导入自己的模型
import torch
import os
from PIL import Image
import numpy as np
import onnx
import onnxruntime
def preprocess_input(image):
image /= 255.0
return image
def cvtColor(image):
if len(np.shape(image)) == 3 and np.shape(image)[-2] == 3:
return image
else:
image = image.convert('RGB')
return image
# 检查输出
def check_onnx_output(filename, input_data, torch_output):
print("模型测试")
session = onnxruntime.InferenceSession(filename)
input_name = session.get_inputs()[0].name
result = session.run([], {input_name: input_data.detach().cpu().numpy()})
for test_result, gold_result in zip(result, torch_output.values()):
np.testing.assert_almost_equal(
gold_result.cpu().numpy(), test_result, decimal=3,
)
return result
# 检查模型
def check_onnx_model(model, onnx_filename, input_image):
with torch.no_grad():
torch_out = {"output": model(input_image)}
check_onnx_output(onnx_filename, input_image, torch_out)
print("模型输出一致")
onnx_model = onnx.load(onnx_filename)
onnx.checker.check_model(onnx_model)
print("模型测试成功")
return onnx_model
if __name__ == '__main__':
# 模型路径
model_path = 'net.pth'
onnx_path = os.path.split(model_path)[0] + '/'
device = 'cpu'
# 图片路径
VOCdevkit_path ='./1.jpg'
img = Image.open(VOCdevkit_path)
img = cvtColor(img)
img = np.expand_dims(np.transpose(preprocess_input(np.array(img, np.float32)), (2, 0, 1)), 0)
img = torch.from_numpy(img)
net = deeplabv3 ()
net.load_state_dict(torch.load(model_path, map_location=device), strict=True)
net = net.eval()
out = net(img)
print(out)
torch.onnx.export(net, img, onnx_path + "torch.onnx", verbose=True ,input_names=["input"], output_names=["output"], opset_version=11)
# traced_cpu = torch.jit.trace(net, img)
# torch.jit.save(traced_cpu, onnx_path + "cpu.pt")
# 检测导出的onnx模型是否完整,输出是否一致
onnx_name = onnx_path + "torch.onnx"
onnx_model = check_onnx_model(net, onnx_name, img)
二、三种部署的方式
1.opencv加载onnx
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <onnxruntime_cxx_api.h>
#include <fstream>
#include <iostream>
#include <cstdlib>
using namespace std;
int main()
{
String modelFile = "./torch.onnx";
String imageFile = "./1.jpg";
dnn::Net net = cv::dnn::readNetFromONNX(modelFile); //读取网络和参数
// step 1: Read an image in HWC BGR UINT8 format.
cv::Mat imageBGR = cv::imread(input_path, cv::ImreadModes::IMREAD_COLOR);
// step 2: Resize the image.
cv::Mat resizedImageRGB, resizedImage, preprocessedImage;
resize(imageBGR , resizedImage, Size(500, 500), INTER_AREA)
// step 3: Convert the image to HWC RGB UINT8 format.
cv::cvtColor(resizedImage, resizedImageRGB,
cv::ColorConversionCodes::COLOR_BGR2RGB);
// step 4: Convert the image to HWC RGB float format by dividing each pixel by 255.
resizedImageRGB.convertTo(resizedImage, CV_32F, 1.0 / 255);
// step 5: Split the RGB channels from the image.
cv::Mat channels[3];
cv::split(resizedImage, channels);
//step 7: Merge the RGB channels back to the image.
cv::merge(channels, 3, resizedImage);
// step 8: Convert the image to CHW RGB float format.
// HWC to CHW
cv::dnn::blobFromImage(resizedImage, preprocessedImage);
net.setInput(inputBolb); //输入图像
Mat result = net.forward(); //前向计算
cout << result << endl;
return 0;
}
2.onnxruntime加载onnx
下面部署的是语义分割的模型。
#include <assert.h>
#include <vector>
#include <iostream>
#include <fstream>
#include <unordered_map>
#include <memory>
#include <algorithm>
#include <onnxruntime_cxx_api.h>
#include <cuda_provider_factory.h>
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc_c.h>
#include <opencv2/dnn.hpp>
using namespace cv;
using namespace std;
using namespace cv::dnn;
bool CheckStatus(const OrtApi* g_ort, OrtStatus* status) {
if (status != nullptr) {
const char* msg = g_ort->GetErrorMessage(status);
std::cerr << msg << std::endl;
g_ort->ReleaseStatus(status);
throw Ort::Exception(msg, OrtErrorCode::ORT_EP_FAIL);
}
return true;
}
// 图像处理 标准化处理
void PreProcess(const Mat& image, Mat& image_blob)
{
Mat input;
image.copyTo(input);
//数据处理 标准化
std::vector<Mat> channels, channel_p;
split(input, channels);
Mat R, G, B;
B = channels.at(0);
G = channels.at(1);
R = channels.at(2);
B = B / 255.0;
G = G / 255.0;
R = R / 255.0;
channel_p.push_back(R);
channel_p.push_back(G);
channel_p.push_back(B);
Mat outt;
merge(channel_p, outt);
image_blob = outt;
}
void run_ort_net(std::string backend, std::string input_path) {
#ifdef _WIN32
const wchar_t* model_path = L"F:/visual studio workplace/torch.onnx";
#else
const char* model_path = "F:/visual studio workplace/torch.onnx";
#endif
const OrtApi* g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION);
OrtEnv* env;
CheckStatus(g_ort, g_ort->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "test", &env));
OrtSessionOptions* session_options;
CheckStatus(g_ort, g_ort->CreateSessionOptions(&session_options));
CheckStatus(g_ort, g_ort->SetIntraOpNumThreads(session_options, 1));
CheckStatus(g_ort, g_ort->SetSessionGraphOptimizationLevel(session_options, ORT_ENABLE_BASIC));
std::vector<const char*> options_keys = { "runtime", "buffer_type" };
std::vector<const char*> options_values = { backend.c_str(), "FLOAT" }; // set to TF8 if use quantized data
OrtSession* session;
CheckStatus(g_ort, g_ort->CreateSession(env, model_path, session_options, &session));
OrtAllocator* allocator;
CheckStatus(g_ort, g_ort->GetAllocatorWithDefaultOptions(&allocator));
size_t num_input_nodes;
CheckStatus(g_ort, g_ort->SessionGetInputCount(session, &num_input_nodes));
std::vector<const char*> input_node_names;
std::vector<std::vector<int64_t>> input_node_dims;
std::vector<ONNXTensorElementDataType> input_types;
std::vector<OrtValue*> input_tensors;
input_node_names.resize(num_input_nodes);
input_node_dims.resize(num_input_nodes);
input_types.resize(num_input_nodes);
input_tensors.resize(num_input_nodes);
for (size_t i = 0; i < num_input_nodes; i++) {
// Get input node names
char* input_name;
CheckStatus(g_ort, g_ort->SessionGetInputName(session, i, allocator, &input_name));
input_node_names[i] = input_name;
std::cout << "input name :" << input_name << std::endl;
// Get input node types
OrtTypeInfo* typeinfo;
CheckStatus(g_ort, g_ort->SessionGetInputTypeInfo(session, i, &typeinfo));
const OrtTensorTypeAndShapeInfo* tensor_info;
CheckStatus(g_ort, g_ort->CastTypeInfoToTensorInfo(typeinfo, &tensor_info));
ONNXTensorElementDataType type;
CheckStatus(g_ort, g_ort->GetTensorElementType(tensor_info, &type));
input_types[i] = type;
// Get input shapes/dims
size_t num_dims;
CheckStatus(g_ort, g_ort->GetDimensionsCount(tensor_info, &num_dims));
input_node_dims[i].resize(num_dims);
CheckStatus(g_ort, g_ort->GetDimensions(tensor_info, input_node_dims[i].data(), num_dims));
std::cout << "input dims :" << num_dims << std::endl;
size_t tensor_size;
CheckStatus(g_ort, g_ort->GetTensorShapeElementCount(tensor_info, &tensor_size));
if (typeinfo) g_ort->ReleaseTypeInfo(typeinfo);
}
size_t num_output_nodes;
std::vector<const char*> output_node_names;
std::vector<std::vector<int64_t>> output_node_dims;
std::vector<OrtValue*> output_tensors;
CheckStatus(g_ort, g_ort->SessionGetOutputCount(session, &num_output_nodes));
output_node_names.resize(num_output_nodes);
output_node_dims.resize(num_output_nodes);
output_tensors.resize(num_output_nodes);
for (size_t i = 0; i < num_output_nodes; i++) {
// Get output node names
char* output_name;
CheckStatus(g_ort, g_ort->SessionGetOutputName(session, i, allocator, &output_name));
output_node_names[i] = output_name;
std::cout << "output dims :" << output_name << std::endl;
OrtTypeInfo* typeinfo;
CheckStatus(g_ort, g_ort->SessionGetOutputTypeInfo(session, i, &typeinfo));
const OrtTensorTypeAndShapeInfo* tensor_info;
CheckStatus(g_ort, g_ort->CastTypeInfoToTensorInfo(typeinfo, &tensor_info));
// Get output shapes/dims
size_t num_dims;
CheckStatus(g_ort, g_ort->GetDimensionsCount(tensor_info, &num_dims));
output_node_dims[i].resize(num_dims);
CheckStatus(g_ort, g_ort->GetDimensions(tensor_info, (int64_t*)output_node_dims[i].data(), num_dims));
std::cout << "output dims :" << num_dims << std::endl;
size_t tensor_size;
CheckStatus(g_ort, g_ort->GetTensorShapeElementCount(tensor_info, &tensor_size));
if (typeinfo) g_ort->ReleaseTypeInfo(typeinfo);
}
//加载图片
Mat img = imread(input_path);
Mat det1;
//resize(img, det1, Size(500, 500), INTER_AREA);
img.convertTo(img, CV_32FC3);
PreProcess(img, det1); //标准化处理
Mat blob = dnn::blobFromImage(det1, 1., Size(500, 500), Scalar(0, 0, 0), false, false);
printf("Load success!\n");
OrtMemoryInfo* memory_info;
CheckStatus(g_ort, g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info));
CheckStatus(g_ort, g_ort->CreateTensorWithDataAsOrtValue(memory_info, blob.ptr<float>(), blob.total() * sizeof(float), input_node_dims[0].data(),
input_node_dims[0].size(), input_types[0], &input_tensors[0]));
CheckStatus(g_ort, g_ort->Run(session, nullptr, input_node_names.data(), (const OrtValue* const*)input_tensors.data(),
input_tensors.size(), output_node_names.data(), output_node_names.size(),
output_tensors.data()));
size_t output_data_size = 500 * 500;
size_t output_data_length = output_data_size * sizeof(int64_t*);
std::vector<int64_t*> output_data(output_data_length);
void* output_buffer;
CheckStatus(g_ort, g_ort->GetTensorMutableData(output_tensors[0], &output_buffer));
int64_t* int_buffer = reinterpret_cast<int64_t*>(output_buffer);
/* auto max = std::max_element(int_buffer, int_buffer + output_data_size);
int max_index = static_cast<int>(std::distance(int_buffer, max));*/
//std::cout << *max << std::endl;
int count = 0;
Mat newarr = Mat_<int>(500, 500); //定义一个500*500的矩阵
for (int i = 0; i < newarr.rows; i++)
{
for (int j = 0; j < newarr.cols; j++) //矩阵列数循环
{
if ((int)int_buffer[i * j + j] >= 1) {
count++;
newarr.at<int>(i, j) = 255;
continue;
}
newarr.at<int>(i, j) = int_buffer[i * j + j];
}
}
cout << count << endl;
imwrite("./test.png", newarr);
newarr = imread("./test.png", IMREAD_GRAYSCALE);
cout << newarr.channels() << endl;
imshow("mask", newarr);
cv::waitKey();
}
int main(int argc, char* argv[]) {
std::string backend = "CPU";
std::string input_path = "./1.jpg";
run_ort_net(backend, input_path);
return 0;
}
结果为了更好的显示,把非背景的值置为255,如下图:
3.libtorch部署
pytorch训练的模型,需要转换为script model,参考在C++平台上部署PyTorch模型流程+踩坑实录
#include <torch/script.h>
#include <iostream>
#include <opencv2/opencv.hpp>
#include <torch/torch.h>
int main()
{
torch::DeviceType device_type;
if (torch::cuda::is_available()) {
std::cout << "CUDA available! Predicting on GPU." << std::endl;
device_type = torch::kCUDA;
}
else {
std::cout << "Predicting on CPU." << std::endl;
device_type = torch::kCUDA;
}
torch::Device device(device_type);
//Init model
std::string model_pb = "./cpu.pth";
auto module = torch::jit::load(model_pb);
module.to(at::kCUDA);
auto image = cv::imread("./1_35.jpg", cv::ImreadModes::IMREAD_COLOR);
cv::Mat image_transfomed;
cv::resize(image, image_transfomed, cv::Size(500, 500));
// convert to tensort
torch::Tensor tensor_image = torch::from_blob(image_transfomed.data,
{ image_transfomed.rows, image_transfomed.cols,3 }, torch::kByte);
tensor_image = tensor_image.permute({ 2,0,1 });
tensor_image = tensor_image.toType(torch::kFloat);
tensor_image = tensor_image.div(255);
tensor_image = tensor_image.unsqueeze(0);
tensor_image = tensor_image.to(at::kCUDA);
torch::Tensor output = module.forward({ tensor_image }).toTensor();
auto max_result = output.max(1, true);
auto max_index = std::get<1>(max_result).item<float>();
std::cout << output << std::endl;
//return max_index;
return 0;
}
参考资料
[1] https://github.com/microsoft/onnxruntime-inference-examples/blob/main/c_cxx/Snpe_EP/main.cpp
[2] https://blog.csdn.net/qq_44747572/article/details/120820964?spm=1001.2014.3001.5501
[3] https://zhuanlan.zhihu.com/p/191569603
[4] https://zhuanlan.zhihu.com/p/414317269