代码收藏家技术教程 2025-04-04

【TensorRT】Python环境下MobileNetV4三分类任务：从安装到推理全过程解析

我想开发一个基于深度学习的分类小软件，逐渐了解到了TensorRT在模型推理速度上的优势，经过一下午资料的查找实现了将onnx模型转为TensorRT格式模型的推理及测试过程。将实现过程记录下来方便日后查看。
实际上本文的测试方法并不准确，在我的代码，将TensorRT分配和释放显存的时间、数据预处理的时间都计算入了推理时间中，如果要准确的测量出推理时间，应该只测量模型推理的时间而将其余步骤排除。尽管如此TensorRT在我的设备上也比ONNX快的多。

本文实验设备是MX350显卡 2G显存

一、安装TensorRT

点击TensorRT下载链接，选择合适的TensorRT版本下载，读者选择使用TensorRT进行推理，默认已经配置好cuda和cudnn环境，如果没配置好请移步这篇博客Windows配置深度学习环境(从查询合适的torch版本开始)——torch+CUDA+cuDNN

TensorRT与cuda版本对应方式查看如下：

点击TensorRT版本
点击同意
点击版本号
查看cuda版本是否符合你设备，点击下载即可

二、环境配置

下载后得到文件结构如下所示
添加环境变量，右键此电脑点击属性，根据图中序号依次点击并添加环境变量
我的环境变量如下所示

D:\Software\TensorRT-8.6.1.6\lib
D:\Software\TensorRT-8.6.1.6\bin

三、模型转换

打开命令行窗口，切换到D:\Software\TensorRT-8.6.1.6\bin目录，执行如下命令

trtexec --onnx=mymodel.onnx --saveEngine=model.trt --fp16

这里的–fp16应该也可以改成int8，但是精度损失会有点大，我没有实验
这个mymodel.onnx需要你自己的onnx文件名，这个model.trt 就随便起名字了

如下图所示为转换成功

四、TensorRT与ONNX推理速度与精度测试

（1）推理时间测试

TensorRTPredictor

import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import os
import numpy as np
import time
from typing import Tuple
from utils.utils import softmax, preprocess_image


class TensorRTPredictor:
    def __init__(self, engine_path: str):
        """初始化TensorRT预测器（显存分配在初始化阶段完成）"""
        self.logger = trt.Logger(trt.Logger.WARNING)
        self.engine = self._load_engine(engine_path)
        self.context = self.engine.create_execution_context()
        self.input_shape = tuple(self.engine.get_tensor_shape(self.engine.get_tensor_name(0)))
        self.output_shape = tuple(self.engine.get_tensor_shape(self.engine.get_tensor_name(1)))

        # 预分配显存
        self._setup_buffers()

        # 创建固定流
        self.stream = cuda.Stream()
        self.is_warmed_up = False

    def _load_engine(self, engine_path: str) -> trt.ICudaEngine:
        """加载TensorRT引擎"""
        load_start_time = time.time()
        with open(engine_path, "rb") as f, trt.Runtime(self.logger) as runtime:
            engine = runtime.deserialize_cuda_engine(f.read())
        load_end_time = time.time()
        load_time = (load_end_time - load_start_time) * 1000
        print(f"加载引擎时间: {load_time:.2f} ms")
        return engine

    def _setup_buffers(self):
        """预分配输入输出显存"""
        # 计算缓冲区大小
        input_size = int(np.prod(self.input_shape)) * np.float32().itemsize
        output_size = int(np.prod(self.output_shape)) * np.float32().itemsize

        # 分配固定显存
        self.d_input = cuda.mem_alloc(input_size)
        self.d_output = cuda.mem_alloc(output_size)

        # 预分配主机锁页内存
        self.h_output = cuda.pagelocked_empty(self.output_shape, dtype=np.float32)

    def warmup(self, iterations: int = 10):
        """模型预热（使用预分配显存）"""
        if self.is_warmed_up:
            print("模型已经预热，跳过预热步骤")
            return

        warmup_start_time = time.time()
        dummy_input = np.random.rand(*self.input_shape).astype(np.float32)

        for _ in range(iterations):
            cuda.memcpy_htod_async(self.d_input, dummy_input, self.stream)
            self.context.execute_async_v2(
                bindings=[int(self.d_input), int(self.d_output)],
                stream_handle=self.stream.handle
            )
        self.stream.synchronize()

        warmup_end_time = time.time()
        warmup_time = (warmup_end_time - warmup_start_time) * 1000
        print(f"  预热时间: {warmup_time:.2f} ms")
        self.is_warmed_up = True

    def infer(self, image) -> Tuple[float, np.ndarray]:
        image = preprocess_image(image)
        """执行推理（复用预分配显存）"""
        if not self.is_warmed_up:
            print("警告：模型尚未预热，推理性能可能受影响")

        # 预处理
        input_data = np.ascontiguousarray(np.expand_dims(image, axis=0), dtype=np.float32)

        # 异步拷贝数据
        cuda.memcpy_htod_async(self.d_input, input_data, self.stream)

        # 执行推理
        self.context.execute_async_v2(
            bindings=[int(self.d_input), int(self.d_output)],
            stream_handle=self.stream.handle
        )

        # 异步拷贝结果回主机
        cuda.memcpy_dtoh_async(self.h_output, self.d_output, self.stream)
        self.stream.synchronize()

        # 后处理
        confidence = softmax(self.h_output[0])
        return self.h_output.copy(), confidence

    def __del__(self):
        """析构函数自动释放显存"""
        if hasattr(self, 'd_input'):
            self.d_input.free()
        if hasattr(self, 'd_output'):
            self.d_output.free()
        print("显存资源已释放")

ONNXPredictor

import onnxruntime as ort
import numpy as np
from torchvision import transforms
from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
import time
import os
from utils.utils import softmax, preprocess_image


class ONNXPredictor:
    def __init__(self, model_path="mobilenetv4_hybrid_medium.onnx", size=224):
        # 自动检测可用provider
        self.providers = self._get_available_providers()
        print(f"可用推理后端: {self.providers}")

        # 初始化ONNX Runtime会话
        self.session = ort.InferenceSession(model_path, providers=self.providers)

        # 获取当前使用的provider信息
        current_provider = self.session.get_providers()
        print(f"实际使用的推理后端: {current_provider}")

        # 获取输入输出名称
        self.input_name = self.session.get_inputs()[0].name
        self.output_name = self.session.get_outputs()[0].name
        # 预处理变换
        self.transform = self.build_transform(size)
        # 预热标志
        self.is_warmed_up = False

    def _get_available_providers(self):
        """获取可用的推理后端，优先使用CUDA且仅使用CUDA（如果可用）"""
        available_providers = ort.get_available_providers()

        # 优先使用CUDA且仅使用CUDA
        if 'CUDAExecutionProvider' in available_providers:
            return ['CUDAExecutionProvider']  # 仅返回CUDA
        # 如果没有CUDA，则回退到CPU
        elif 'CPUExecutionProvider' in available_providers:
            return ['CPUExecutionProvider']
        else:
            raise RuntimeError("没有可用的执行提供程序（既没有CUDA也没有CPU）")

    def build_transform(self, size: int):
        """构建图像预处理流水线"""
        return transforms.Compose([
            transforms.Resize(size, interpolation=transforms.InterpolationMode.BICUBIC),
            transforms.CenterCrop(size),
            transforms.ToTensor(),
            transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD)
        ])

    def warmup(self, iterations=10):
        """预热模型"""
        dummy_input = np.random.rand(1, 3, 224, 224).astype(np.float32)
        for _ in range(iterations):
            self.session.run([self.output_name], {self.input_name: dummy_input})
        self.is_warmed_up = True
        print(f"模型已预热 {iterations} 次")

    def preprocess(self, image):
        return preprocess_image(image)

    def infer(self, image):
        """执行预测"""
        # 预处理
        input_data = self.preprocess(image)
        # 运行模型
        outputs = self.session.run([self.output_name], {self.input_name: input_data})[0]
        confidence = softmax(outputs[0])

        return outputs, confidence

Predictor

import numpy as np
from typing import Tuple
from onnxPredictor import ONNXPredictor
from TensorRTPredictor import TensorRTPredictor
import time


class Predictor:
    def __init__(self,
                 model_path: str = None,
                 mode: str = "speed",
                 engine_path: str = None,
                 size: int = 224):
        """
        混合预测器，根据模式选择 ONNX 或 TensorRT 后端

        参数:
            model_path: ONNX 模型路径
            mode: "speed"(速度优先) 或 "precision"(精度优先)
            engine_path: TensorRT 引擎路径 (当 mode="speed" 时必需)
            size: 输入图像尺寸
        """
        assert mode in ["speed", "precision"], "mode 必须是 'speed' 或 'precision'"

        self.mode = mode
        self.size = size

        if self.mode == "speed":
            if engine_path is None:
                raise ValueError("TensorRT 引擎路径必须提供当选择速度优先模式")
            print("初始化 TensorRT 预测器 (速度优先模式)...")
            self.predictor = TensorRTPredictor(engine_path)
        else:
            print("初始化 ONNX 预测器 (精度优先模式)...")
            self.predictor = ONNXPredictor(model_path, size)

        self.is_warmed_up = False

    def warmup(self, iterations: int = 10):
        """预热模型"""
        self.predictor.warmup(iterations)
        self.is_warmed_up = True

    def infer(self, image) -> Tuple[np.ndarray, np.ndarray]:
        """
        执行推理

        返回:
            对于速度模式: (输出张量, 置信度)
            对于精度模式: (输出张量, 置信度)
        """
        if not self.is_warmed_up:
            print("警告: 模型尚未预热，性能可能受影响")

        if self.mode == "speed":
            outputs, confidence = self.predictor.infer(image)
            return outputs, confidence
        else:
            outputs, confidence = self.predictor.infer(image)
            return outputs, confidence

    def get_backend(self) -> str:
        """获取当前使用的后端"""
        return "TensorRT" if self.mode == "speed" else "ONNX"

测试代码


if __name__ == "__main__":
    # 配置路径
    PATHS = {
        "image_folder": "D:/Desktop/DATA/balance_bei_liao_hu/temp",  # 图片文件夹路径
        "engine": "../assets/weights/mnv4.engine" , # TensorRT引擎文件路径
        "model_path": "../assets/weights/mobilenetv4_hybrid_medium.onnx"  # ONNX模型文件路径

    }

    # 验证文件夹和文件存在
    if not os.path.exists(PATHS["image_folder"]):
        print(f"错误: 图片文件夹不存在 -> {os.path.abspath(PATHS['image_folder'])}")
        exit(1)
    if not os.path.exists(PATHS["engine"]):
        print(f"错误: 引擎文件不存在 -> {os.path.abspath(PATHS['engine'])}")
        exit(1)

    # 获取文件夹中所有图片文件（包括子文件夹）
    image_files = []
    for root, _, files in os.walk(PATHS["image_folder"]):
        for file in files:
            if file.endswith(('.jpg', '.png', '.bmp', '.jpeg')):
                image_files.append(os.path.join(root, file))

    if not image_files:
        print(f"错误: 文件夹中没有图片文件 -> {PATHS['image_folder']}")
        exit(1)

    # 初始化预测器
    predictor = Predictor(engine_path=PATHS["engine"], mode="speed", size=224)
    predictor.warmup(iterations=10)  # 预热模型

    total_time = 0
    for image_path in image_files:
        try:
            print(f"处理图片: {image_path}")
            # torch.cuda.synchronize(device="cuda")
            # start_time = time.perf_counter()
            start_time = time.time()
            predictions, confidence = predictor.infer(image_path)
            end_time = time.time()
            # end_time = time.perf_counter()
            # torch.cuda.synchronize(device="cuda")
            print(end_time - start_time)

            inference_time = (end_time - start_time) * 1000  # 转换为毫秒
            print(f"  ONNX 推理时间: {inference_time:.2f} ms")
            print(f"  ONNX 输出: {np.argmax(predictions)} (置信度: {np.max(confidence):.4f})")
            total_time += inference_time
        except Exception as e:
            print(f"处理图片时出错: {image_path} -> {str(e)}")

    avg_time = total_time / len(image_files)
    print(f"\n平均推理时间: {avg_time:.2f} ms")

TensorRT推理150张224×224图片平均速度为6.40ms，而ONNX推理需要10ms左右

两种格式的模型分别预测了150张尺寸为224×224的三类图片，每一类有50张，调用TensorRT平均每张图片需要6.4ms，而onnx平均每张图片需要10.08ms，TensorRT模型的推理速度缩短为onnx的二分之一，根据查找的资料显示，转换后的模型推理时间的缩短可能与设备有关。

（2）精度测试

TensorRT推理代码

import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import cv2
import os
import numpy as np
import time
from typing import Tuple
from sklearn.metrics import classification_report, accuracy_score, f1_score
from collections import Counter

class TensorRTPredictor:
    def __init__(self, engine_path: str):
        """初始化TensorRT预测器"""
        self.logger = trt.Logger(trt.Logger.WARNING)
        self.engine = self._load_engine(engine_path)
        self.context = self.engine.create_execution_context()
        self.input_shape = tuple(self.engine.get_tensor_shape(self.engine.get_tensor_name(0)))
        self.output_shape = tuple(self.engine.get_tensor_shape(self.engine.get_tensor_name(1)))
        self.is_warmed_up = False
        self.warmup(iterations=10)  # 在初始化时进行预热

    def _load_engine(self, engine_path: str) -> trt.ICudaEngine:
        """加载TensorRT引擎"""
        load_start_time = time.time()
        with open(engine_path, "rb") as f, trt.Runtime(self.logger) as runtime:
            engine = runtime.deserialize_cuda_engine(f.read())
        load_end_time = time.time()
        load_time = (load_end_time - load_start_time) * 1000
        print(f"加载引擎时间: {load_time:.2f} ms")
        return engine

    def preprocess_image(self, image_path: str) -> np.ndarray:
        """图像预处理"""
        preprocess_start_time = time.time()
        if not os.path.exists(image_path):
            raise FileNotFoundError(f"图像文件不存在: {os.path.abspath(image_path)}")

        image = cv2.imread(image_path)
        if image is None:
            raise ValueError("无法读取图像，请检查文件格式和完整性")

        try:
            image = cv2.resize(image, (224, 224))
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = np.ascontiguousarray(image.transpose(2, 0, 1).astype(np.float32) / 255.0)
            mean = np.array([0.362, 0.279, 0.258]).reshape(3, 1, 1)
            std = np.array([0.222, 0.191, 0.185]).reshape(3, 1, 1)
            image = (image - mean) / std
        except Exception as e:
            raise RuntimeError(f"图像预处理失败: {str(e)}")
        preprocess_end_time = time.time()
        preprocess_time = (preprocess_end_time - preprocess_start_time) * 1000
        print(f"  预处理时间: {preprocess_time:.2f} ms")
        return image

    def warmup(self, iterations: int = 10):
        """模型预热"""
        if self.is_warmed_up:
            print("模型已经预热，跳过预热步骤")
            return

        warmup_start_time = time.time()
        input_size = int(np.prod(self.input_shape)) * np.float32().itemsize
        output_size = int(np.prod(self.output_shape)) * np.float32().itemsize
        d_input = cuda.mem_alloc(input_size)
        d_output = cuda.mem_alloc(output_size)
        stream = cuda.Stream()

        dummy_input = np.random.rand(*self.input_shape).astype(np.float32)
        for _ in range(iterations):
            cuda.memcpy_htod_async(d_input, dummy_input, stream)
            self.context.execute_async_v2(
                bindings=[int(d_input), int(d_output)],
                stream_handle=stream.handle
            )
            stream.synchronize()
        d_input.free()
        d_output.free()
        warmup_end_time = time.time()
        warmup_time = (warmup_end_time - warmup_start_time) * 1000
        print(f"  预热时间: {warmup_time:.2f} ms")
        self.is_warmed_up = True

    def infer(self, image: np.ndarray) -> Tuple[float, np.ndarray]:
        """执行TensorRT推理"""
        if not self.is_warmed_up:
            print("警告：模型尚未预热，推理性能可能受影响")

        input_size = int(np.prod(self.input_shape)) * np.float32().itemsize
        output_size = int(np.prod(self.output_shape)) * np.float32().itemsize
        d_input = cuda.mem_alloc(input_size)
        d_output = cuda.mem_alloc(output_size)
        stream = cuda.Stream()

        input_data = np.ascontiguousarray(np.expand_dims(image, axis=0), dtype=np.float32)

        # 正式推理
        infer_start_time = time.time()

        cuda.memcpy_htod_async(d_input, input_data, stream)
        self.context.execute_async_v2(
            bindings=[int(d_input), int(d_output)],
            stream_handle=stream.handle
        )
        stream.synchronize()
        infer_end_time = time.time()
        infer_time = (infer_end_time - infer_start_time) * 1000
        print(f"  TensorRT 推理时间: {infer_time:.2f} ms")

        # 获取输出
        output_data = np.empty(self.output_shape, dtype=np.float32)
        output_start_time = time.time()
        cuda.memcpy_dtoh_async(output_data, d_output, stream)
        stream.synchronize()
        output_end_time = time.time()
        output_time = (output_end_time - output_start_time) * 1000
        print(f"  获取输出时间: {output_time:.2f} ms")

        d_input.free()
        d_output.free()
        return infer_time, output_data


if __name__ == "__main__":
    # 配置路径
    PATHS = {
        "image_folder": "D:/Desktop/DATA/balance_bei_liao_hu/temp",  # 图片文件夹路径
        "engine": "mnv4.engine"  # TensorRT引擎文件路径
    }

    # 验证文件夹和文件存在
    if not os.path.exists(PATHS["image_folder"]):
        print(f"错误: 图片文件夹不存在 -> {os.path.abspath(PATHS['image_folder'])}")
        exit(1)
    if not os.path.exists(PATHS["engine"]):
        print(f"错误: 引擎文件不存在 -> {os.path.abspath(PATHS['engine'])}")
        exit(1)

    # 获取文件夹中所有图片文件（包括子文件夹）
    image_files = []
    for root, _, files in os.walk(PATHS["image_folder"]):
        for file in files:
            if file.endswith(('.jpg', '.png', '.bmp', '.jpeg')):
                image_files.append(os.path.join(root, file))

    if not image_files:
        print(f"错误: 文件夹中没有图片文件 -> {PATHS['image_folder']}")
        exit(1)

    # 初始化预测器
    predictor = TensorRTPredictor(PATHS["engine"])

    # 初始化分类结果统计
    true_labels = []
    predicted_labels = []
    label_mapping = {0: "B", 1: "D", 2: "E"}

    total_time = 0
    for image_path in image_files:
        try:
            print(f"处理图片: {image_path}")
            img = predictor.preprocess_image(image_path)
            trt_time, trt_out = predictor.infer(img)
            print(f"  TensorRT 推理时间: {trt_time:.2f} ms")
            predicted_label = np.argmax(trt_out)
            predicted_labels.append(predicted_label)

            # 从文件路径中提取真实标签
            true_label = os.path.basename(os.path.dirname(image_path))
            true_labels.append(true_label)
            total_time += trt_time
        except Exception as e:
            print(f"处理图片时出错: {image_path} -> {str(e)}")

    avg_time = total_time / len(image_files)
    print(f"\n平均推理时间: {avg_time:.2f} ms")

    # 计算分类结果
    true_labels = [label for label in true_labels]
    predicted_labels = [label_mapping[label] for label in predicted_labels]

    print("\n分类结果统计：")
    print(f"图片总数: {len(image_files)}")
    print(f"分类结果: {Counter(predicted_labels)}")

    # 计算准确率和 F1 分数
    accuracy = accuracy_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels, average='weighted')
    print(f"准确率: {accuracy:.4f}")  # 保留四位小数
    print(f"F1 分数: {f1:.4f}")  # 保留四位小数

    # 输出详细的分类报告
    print("\n分类报告：")
    print(classification_report(true_labels, predicted_labels, digits=4))  # 保留四位小数

onnx推理代码

from datasets.split_data import read_split_data
from datasets.mydataset import MyDataset
from torchvision import transforms
from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
import torch
from estimate_model import Predictor, Plot_ROC
from timm.models import create_model
import os, cv2, json, random
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt


def read_test_data(root, plot_image=False):
    filepaths = []
    labels = []
    bad_images = []

    random.seed(0)
    assert os.path.exists(root), 'Your root does not exists!!!'

    classes = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    classes.sort()
    class_indices = {k: v for v, k in enumerate(classes)}

    json_str = json.dumps({v: k for k, v in class_indices.items()}, indent=4)

    with open('output/classes_indices.json', 'w') as json_file:
        json_file.write(json_str)

    every_class_num = []
    supported = ['.jpg', '.png', '.jpeg', '.PNG', '.JPG', '.JPEG', '.bmp']

    for klass in classes:
        classpath = os.path.join(root, klass)
        images = [os.path.join(root, klass, i) for i in os.listdir(classpath) if os.path.splitext(i)[-1] in supported]
        every_class_num.append(len(images))
        flist = sorted(os.listdir(classpath))
        desc = f'{klass:23s}'
        for f in tqdm(flist, ncols=110, desc=desc, unit='file', colour='blue'):
            fpath = os.path.join(classpath, f)
            fl = f.lower()
            index = fl.rfind('.')
            ext = fl[index:]
            if ext in supported:
                try:
                    img = cv2.imread(fpath)
                    filepaths.append(fpath)
                    labels.append(klass)
                except:
                    bad_images.append(fpath)
                    print('defective image file: ', fpath)
            else:
                bad_images.append(fpath)

    Fseries = pd.Series(filepaths, name='filepaths')
    Lseries = pd.Series(labels, name='labels')
    df = pd.concat([Fseries, Lseries], axis=1)

    print(f'{len(df.labels.unique())} kind of images were found in the dataset')

    test_image_path = df['filepaths'].tolist()

    test_image_label = [class_indices[i] for i in df['labels'].tolist()]

    sample_df = df.sample(n=50, replace=False)
    ht, wt, count = 0, 0, 0
    for i in range(len(sample_df)):
        fpath = sample_df['filepaths'].iloc[i]
        try:
            img = cv2.imread(fpath)
            h = img.shape[0]
            w = img.shape[1]
            ht += h
            wt += w
            count += 1
        except:
            pass
    have = int(ht / count)
    wave = int(wt / count)
    aspect_ratio = have / wave
    print('{} images were found in the dataset.\n{} for test'.format(
        sum(every_class_num), len(test_image_path)
    ))
    print('average image height= ', have, '  average image width= ', wave, ' aspect ratio h/w= ', aspect_ratio)

    if plot_image:
        plt.bar(range(len(classes)), every_class_num, align='center')
        plt.xticks(range(len(classes)), classes)

        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')

        plt.xlabel('image class')
        plt.ylabel('number of images')

        plt.title('class distribution')
        plt.show()

    return test_image_path, test_image_label


test_image_path, test_image_label = read_test_data(
    'D:/Desktop/DATA/balance_bei_liao_hu/temp', False)


def build_transform(img_size):
    t = []
    t.append(
        # to maintain same ratio w.r.t. 224 images
        transforms.Resize(img_size, interpolation=3),
    )
    t.append(transforms.CenterCrop(img_size))

    t.append(transforms.ToTensor())
    t.append(transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD))
    return transforms.Compose(t)


test_transform = build_transform(224)

test_set = MyDataset(test_image_path, test_image_label, test_transform)

sampler_val = torch.utils.data.SequentialSampler(test_set)

data_loader_val = torch.utils.data.DataLoader(
    test_set, sampler=sampler_val,
    batch_size=int(1.5 * 24),
    num_workers=0,
    pin_memory=True,
    drop_last=False
)
model_predict = create_model('mobilenetv4_hybrid_medium')

model_predict.reset_classifier(num_classes=3)
model_predict.to('cuda')
device = torch.device('cuda')

Predictor(model_predict, data_loader_val, f'./output/mobilenetv4_hybrid_medium_best_checkpoint.pth', device)
Plot_ROC(model_predict, data_loader_val, f'./output/mobilenetv4_hybrid_medium_best_checkpoint.pth', device)

结果：

TensorRT:

onnx：

可以观察到在转成TensorRT推理后模型精度下降明显，宏平均Precision下降了约4%，宏平均召回下降了约10%，宏平均F1下降了约10%。

注：

（1）预处理方式


def preprocess_image(image_path: str) -> np.ndarray:
    """图像预处理
    Args:
        image_path: 输入图像路径
    Returns:
        np.ndarray: 预处理后的图像张量，形状为(1, 3, H, W)，类型为float32
    """
    preprocess_start_time = time.time()
    if not os.path.exists(image_path):
        raise FileNotFoundError(f"图像文件不存在: {os.path.abspath(image_path)}")

    # 读取图像
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError("无法读取图像，请检查文件格式和完整性")

    try:
        # 调整尺寸和颜色空间转换
        image = cv2.resize(image, (224, 224))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # 转换为CHW格式并确保数据类型为float32
        image = np.ascontiguousarray(image.transpose(2, 0, 1)).astype(np.float32)

        # 归一化到[0,1]范围
        image /= 255.0

        # 标准化处理（确保使用float32运算）
        mean = np.array([0.362, 0.279, 0.258], dtype=np.float32).reshape(3, 1, 1)
        std = np.array([0.222, 0.191, 0.185], dtype=np.float32).reshape(3, 1, 1)
        image = (image - mean) / std

        # 添加batch维度
        image = np.expand_dims(image, axis=0)

    except Exception as e:
        raise RuntimeError(f"图像预处理失败: {str(e)}")

    # 确保最终输出是float32
    if image.dtype != np.float32:
        image = image.astype(np.float32)

    preprocess_end_time = time.time()
    preprocess_time = (preprocess_end_time - preprocess_start_time) * 1000
    print(f"预处理时间: {preprocess_time:.2f} ms")

    return image

（2）TensorRT与torchvision包导入

torchvision包与TensorRT包同时导入可能会抢夺cuda资源从而报错
当先导入tensorrt后导入torchvision时

import tensorrt as trt
import torchvision.transforms as transforms

Traceback (most recent call last):
  File "D:\Desktop\SRM\srm_GUI\Predictor\TensorRTPredictor.py", line 2, in <module>
    import torchvision.transforms as transforms
  File "D:\Software\anaconda3\envs\CV\lib\site-packages\torchvision\__init__.py", line 5, in <module>
    import torch
  File "D:\Software\anaconda3\envs\CV\lib\site-packages\torch\__init__.py", line 122, in <module>
    raise err
OSError: [WinError 127] 找不到指定的程序。 Error loading "D:\Software\anaconda3\envs\CV\lib\site-packages\torch\lib\cublas64_11.dll" or one of its dependencies.

作者：学习BigData

物联沃分享整理
物联沃-IOTWORD物联网 » 【TensorRT】Python环境下MobileNetV4三分类任务：从安装到推理全过程解析