将 Pytorch 模型部署到 C 环境

三种部署Pytorch模型到C++环境的方式

文章目录

  • 三种部署Pytorch模型到C++环境的方式
  • 前言
  • 一、pytorch2onnx
  • 二、三种部署的方式
  • 1.opencv加载onnx
  • 2.onnxruntime加载onnx
  • 3.libtorch部署
  • 参考资料

  • 前言

    由于工作原因需要部署Pytorch模型到c++环境下,目前大概有三种方式。
    1、pytorch转成onnx文件后,通过opencv读取。
    2、pytroch转成onnx文件后,通过onnxruntime读取。
    3、利用libtorch库,也就是pytorch的c++版。


    一、pytorch2onnx

    首先的将pytorch训练好的模型导出onnx文件。

    安装所需包:
    pip install onnx
    pip install onnxruntime

    from nets.deeplabv3 import deeplabv3 #这里导入自己的模型
    import torch
    import os
    from PIL import Image
    import numpy as np
    import onnx
    import onnxruntime
    
    def preprocess_input(image):
        image /= 255.0
        return image
    
    def cvtColor(image):
        if len(np.shape(image)) == 3 and np.shape(image)[-2] == 3:
            return image
        else:
            image = image.convert('RGB')
            return image
    
    # 检查输出
    def check_onnx_output(filename, input_data, torch_output):
        print("模型测试")
        session = onnxruntime.InferenceSession(filename)
        input_name = session.get_inputs()[0].name
        result = session.run([], {input_name: input_data.detach().cpu().numpy()})
        for test_result, gold_result in zip(result, torch_output.values()):
            np.testing.assert_almost_equal(
                gold_result.cpu().numpy(), test_result, decimal=3,
            )
        return result
    # 检查模型
    def check_onnx_model(model, onnx_filename, input_image):
        with torch.no_grad():
            torch_out = {"output": model(input_image)}
        check_onnx_output(onnx_filename, input_image, torch_out)
        print("模型输出一致")
        onnx_model = onnx.load(onnx_filename)
        onnx.checker.check_model(onnx_model)
        print("模型测试成功")
        return onnx_model
    
    
    if __name__ == '__main__':
    	# 模型路径
        model_path = 'net.pth'
        onnx_path = os.path.split(model_path)[0] + '/'
        device = 'cpu'
        # 图片路径
        VOCdevkit_path ='./1.jpg'
    
        img = Image.open(VOCdevkit_path)
        img = cvtColor(img)
        img  = np.expand_dims(np.transpose(preprocess_input(np.array(img, np.float32)), (2, 0, 1)), 0)
        img = torch.from_numpy(img)
    
        net = deeplabv3 ()
        net.load_state_dict(torch.load(model_path, map_location=device), strict=True)
        net = net.eval()
        out = net(img)
        print(out)
    
        torch.onnx.export(net, img, onnx_path + "torch.onnx", verbose=True ,input_names=["input"], output_names=["output"], opset_version=11)
    
        # traced_cpu = torch.jit.trace(net, img)
        # torch.jit.save(traced_cpu, onnx_path + "cpu.pt")
    
        # 检测导出的onnx模型是否完整,输出是否一致
        onnx_name = onnx_path + "torch.onnx"
        onnx_model = check_onnx_model(net, onnx_name, img)
    

    二、三种部署的方式

    1.opencv加载onnx

    #include <opencv2/dnn.hpp>
    #include <opencv2/imgproc.hpp>
    #include <onnxruntime_cxx_api.h>
    #include <fstream>
    #include <iostream>
    #include <cstdlib>
    using namespace std;
    
    int main()
    {
      String modelFile = "./torch.onnx";
      String imageFile = "./1.jpg";
    
      dnn::Net net = cv::dnn::readNetFromONNX(modelFile); //读取网络和参数
       
       // step 1: Read an image in HWC BGR UINT8 format.
        cv::Mat imageBGR = cv::imread(input_path, cv::ImreadModes::IMREAD_COLOR);
        // step 2: Resize the image.
        cv::Mat resizedImageRGB, resizedImage, preprocessedImage;
    	resize(imageBGR , resizedImage, Size(500, 500), INTER_AREA)
        // step 3: Convert the image to HWC RGB UINT8 format.
        cv::cvtColor(resizedImage, resizedImageRGB,
            cv::ColorConversionCodes::COLOR_BGR2RGB);
        // step 4: Convert the image to HWC RGB float format by dividing each pixel by 255.
        resizedImageRGB.convertTo(resizedImage, CV_32F, 1.0 / 255);
        // step 5: Split the RGB channels from the image.   
        cv::Mat channels[3];
        cv::split(resizedImage, channels);
        //step 7: Merge the RGB channels back to the image.
        cv::merge(channels, 3, resizedImage);
        // step 8: Convert the image to CHW RGB float format.
        // HWC to CHW
        cv::dnn::blobFromImage(resizedImage, preprocessedImage);
        
       net.setInput(inputBolb); //输入图像
        Mat result = net.forward(); //前向计算
        cout << result << endl;
        return 0;
    }
    

    2.onnxruntime加载onnx

    安装onnxruntime 参考

    下面部署的是语义分割的模型。

    #include <assert.h>
    #include <vector>
    #include <iostream>
    #include <fstream>
    #include <unordered_map>
    #include <memory>
    #include <algorithm>
    #include <onnxruntime_cxx_api.h>
    
    #include <cuda_provider_factory.h>
    
    #include <opencv2/core.hpp>
    #include <opencv2/imgcodecs.hpp>
    #include <opencv2/opencv.hpp>
    #include <opencv2/highgui.hpp>
    #include <opencv2/core/core.hpp>
    #include <opencv2/imgproc/imgproc_c.h>
    #include <opencv2/dnn.hpp>
    
    using namespace cv;
    using namespace std;
    using namespace cv::dnn;
    
    bool CheckStatus(const OrtApi* g_ort, OrtStatus* status) {
        if (status != nullptr) {
            const char* msg = g_ort->GetErrorMessage(status);
            std::cerr << msg << std::endl;
            g_ort->ReleaseStatus(status);
            throw Ort::Exception(msg, OrtErrorCode::ORT_EP_FAIL);
        }
        return true;
    }
    
    // 图像处理  标准化处理
    void PreProcess(const Mat& image, Mat& image_blob)
    {
        Mat input;
        image.copyTo(input);
    
        //数据处理 标准化
        std::vector<Mat> channels, channel_p;
        split(input, channels);
        Mat R, G, B;
        B = channels.at(0);
        G = channels.at(1);
        R = channels.at(2);
    
        B = B / 255.0;
        G = G / 255.0;
        R = R / 255.0;
    
        channel_p.push_back(R);
        channel_p.push_back(G);
        channel_p.push_back(B);
    
        Mat outt;
        merge(channel_p, outt);
        image_blob = outt;
    }
    
    
    void run_ort_net(std::string backend, std::string input_path) {
    #ifdef _WIN32
        const wchar_t* model_path = L"F:/visual studio workplace/torch.onnx";
    #else
        const char* model_path = "F:/visual studio workplace/torch.onnx";
    #endif
    
        const OrtApi* g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION);
        OrtEnv* env;
        CheckStatus(g_ort, g_ort->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "test", &env));
    
        OrtSessionOptions* session_options;
        CheckStatus(g_ort, g_ort->CreateSessionOptions(&session_options));
        CheckStatus(g_ort, g_ort->SetIntraOpNumThreads(session_options, 1));
        CheckStatus(g_ort, g_ort->SetSessionGraphOptimizationLevel(session_options, ORT_ENABLE_BASIC));
    
        std::vector<const char*> options_keys = { "runtime", "buffer_type" };
        std::vector<const char*> options_values = { backend.c_str(), "FLOAT" };  // set to TF8 if use quantized data
    
        OrtSession* session;
        CheckStatus(g_ort, g_ort->CreateSession(env, model_path, session_options, &session));
    
        OrtAllocator* allocator;
        CheckStatus(g_ort, g_ort->GetAllocatorWithDefaultOptions(&allocator));
        size_t num_input_nodes;
        CheckStatus(g_ort, g_ort->SessionGetInputCount(session, &num_input_nodes));
    
        std::vector<const char*> input_node_names;
        std::vector<std::vector<int64_t>> input_node_dims;
        std::vector<ONNXTensorElementDataType> input_types;
        std::vector<OrtValue*> input_tensors;
    
        input_node_names.resize(num_input_nodes);
        input_node_dims.resize(num_input_nodes);
        input_types.resize(num_input_nodes);
        input_tensors.resize(num_input_nodes);
    
        for (size_t i = 0; i < num_input_nodes; i++) {
            // Get input node names
            char* input_name;
            CheckStatus(g_ort, g_ort->SessionGetInputName(session, i, allocator, &input_name));
            input_node_names[i] = input_name;
    
            std::cout << "input name :" << input_name << std::endl;
    
            // Get input node types
            OrtTypeInfo* typeinfo;
            CheckStatus(g_ort, g_ort->SessionGetInputTypeInfo(session, i, &typeinfo));
            const OrtTensorTypeAndShapeInfo* tensor_info;
            CheckStatus(g_ort, g_ort->CastTypeInfoToTensorInfo(typeinfo, &tensor_info));
            ONNXTensorElementDataType type;
            CheckStatus(g_ort, g_ort->GetTensorElementType(tensor_info, &type));
            input_types[i] = type;
    
            // Get input shapes/dims
            size_t num_dims;
            CheckStatus(g_ort, g_ort->GetDimensionsCount(tensor_info, &num_dims));
            input_node_dims[i].resize(num_dims);
            CheckStatus(g_ort, g_ort->GetDimensions(tensor_info, input_node_dims[i].data(), num_dims));
    
            std::cout << "input dims :" << num_dims << std::endl;
    
            size_t tensor_size;
            CheckStatus(g_ort, g_ort->GetTensorShapeElementCount(tensor_info, &tensor_size));
    
            if (typeinfo) g_ort->ReleaseTypeInfo(typeinfo);
        }
    
        size_t num_output_nodes;
        std::vector<const char*> output_node_names;
        std::vector<std::vector<int64_t>> output_node_dims;
        std::vector<OrtValue*> output_tensors;
        CheckStatus(g_ort, g_ort->SessionGetOutputCount(session, &num_output_nodes));
        output_node_names.resize(num_output_nodes);
        output_node_dims.resize(num_output_nodes);
        output_tensors.resize(num_output_nodes);
    
        for (size_t i = 0; i < num_output_nodes; i++) {
            // Get output node names
            char* output_name;
            CheckStatus(g_ort, g_ort->SessionGetOutputName(session, i, allocator, &output_name));
            output_node_names[i] = output_name;
    
            std::cout << "output dims :" << output_name << std::endl;
    
            OrtTypeInfo* typeinfo;
            CheckStatus(g_ort, g_ort->SessionGetOutputTypeInfo(session, i, &typeinfo));
            const OrtTensorTypeAndShapeInfo* tensor_info;
            CheckStatus(g_ort, g_ort->CastTypeInfoToTensorInfo(typeinfo, &tensor_info));
    
            // Get output shapes/dims
            size_t num_dims;
            CheckStatus(g_ort, g_ort->GetDimensionsCount(tensor_info, &num_dims));
            output_node_dims[i].resize(num_dims);
            CheckStatus(g_ort, g_ort->GetDimensions(tensor_info, (int64_t*)output_node_dims[i].data(), num_dims));
    
            std::cout << "output dims :" << num_dims << std::endl;
    
            size_t tensor_size;
            CheckStatus(g_ort, g_ort->GetTensorShapeElementCount(tensor_info, &tensor_size));
    
            if (typeinfo) g_ort->ReleaseTypeInfo(typeinfo);
        }
    
        //加载图片
        Mat img = imread(input_path);
        Mat det1;
        //resize(img, det1, Size(500, 500), INTER_AREA);
        img.convertTo(img, CV_32FC3);
        PreProcess(img, det1);         //标准化处理
        Mat blob = dnn::blobFromImage(det1, 1., Size(500, 500), Scalar(0, 0, 0), false, false);
        printf("Load success!\n");
    
        OrtMemoryInfo* memory_info;
        CheckStatus(g_ort, g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info));
        CheckStatus(g_ort, g_ort->CreateTensorWithDataAsOrtValue(memory_info, blob.ptr<float>(), blob.total() * sizeof(float), input_node_dims[0].data(),
            input_node_dims[0].size(), input_types[0], &input_tensors[0]));
    
        CheckStatus(g_ort, g_ort->Run(session, nullptr, input_node_names.data(), (const OrtValue* const*)input_tensors.data(),
            input_tensors.size(), output_node_names.data(), output_node_names.size(),
            output_tensors.data()));
    
        size_t output_data_size = 500 * 500;
        size_t output_data_length = output_data_size * sizeof(int64_t*);
        std::vector<int64_t*> output_data(output_data_length);
        void* output_buffer;
        CheckStatus(g_ort, g_ort->GetTensorMutableData(output_tensors[0], &output_buffer));
        int64_t* int_buffer = reinterpret_cast<int64_t*>(output_buffer);
    
        /* auto max = std::max_element(int_buffer, int_buffer + output_data_size);
         int max_index = static_cast<int>(std::distance(int_buffer, max));*/
    
         //std::cout << *max << std::endl;
    
        int count = 0;
        Mat newarr = Mat_<int>(500, 500); //定义一个500*500的矩阵
        for (int i = 0; i < newarr.rows; i++)
        {
            for (int j = 0; j < newarr.cols; j++) //矩阵列数循环
            {
                if ((int)int_buffer[i * j + j] >= 1) {
                    count++;
                    newarr.at<int>(i, j) = 255;
                    continue;
                }
                newarr.at<int>(i, j) = int_buffer[i * j + j];
            }
        }
        cout << count << endl;
    
        imwrite("./test.png", newarr);
        newarr = imread("./test.png", IMREAD_GRAYSCALE);
        cout << newarr.channels() << endl;
        imshow("mask", newarr);
        cv::waitKey();
    }
    
    int main(int argc, char* argv[]) {
        std::string backend = "CPU";
        std::string input_path = "./1.jpg";
        run_ort_net(backend, input_path);
        return 0;
    }
    

    结果为了更好的显示,把非背景的值置为255,如下图:

    3.libtorch部署

    pytorch训练的模型,需要转换为script model,参考在C++平台上部署PyTorch模型流程+踩坑实录

    #include <torch/script.h>
    #include <iostream>
    #include <opencv2/opencv.hpp>
    #include <torch/torch.h>
    
    int main()
    {
        torch::DeviceType device_type;
        if (torch::cuda::is_available()) {
            std::cout << "CUDA available! Predicting on GPU." << std::endl;
            device_type = torch::kCUDA;
        }
        else {
            std::cout << "Predicting on CPU." << std::endl;
            device_type = torch::kCUDA;
        }
        torch::Device device(device_type);
    
        //Init model
        std::string model_pb = "./cpu.pth";
        auto module = torch::jit::load(model_pb);
        module.to(at::kCUDA);
    
        auto image = cv::imread("./1_35.jpg", cv::ImreadModes::IMREAD_COLOR);
        cv::Mat image_transfomed;
        cv::resize(image, image_transfomed, cv::Size(500, 500));
    
        // convert to tensort
        torch::Tensor tensor_image = torch::from_blob(image_transfomed.data,
            { image_transfomed.rows, image_transfomed.cols,3 }, torch::kByte);
        tensor_image = tensor_image.permute({ 2,0,1 });
        tensor_image = tensor_image.toType(torch::kFloat);
        tensor_image = tensor_image.div(255);
        tensor_image = tensor_image.unsqueeze(0);
        tensor_image = tensor_image.to(at::kCUDA);
        torch::Tensor output = module.forward({ tensor_image }).toTensor();
        auto max_result = output.max(1, true);
        auto max_index = std::get<1>(max_result).item<float>();
        std::cout << output << std::endl;
        //return max_index;
        return 0;
    }
    

    参考资料

    [1] https://github.com/microsoft/onnxruntime-inference-examples/blob/main/c_cxx/Snpe_EP/main.cpp
    [2] https://blog.csdn.net/qq_44747572/article/details/120820964?spm=1001.2014.3001.5501
    [3] https://zhuanlan.zhihu.com/p/191569603
    [4] https://zhuanlan.zhihu.com/p/414317269

    物联沃分享整理
    物联沃-IOTWORD物联网 » 将 Pytorch 模型部署到 C 环境

    发表评论