win10环境实现yolov5 TensorRT加速试验(环境配置+训练+推理)
2.1 数据准备
import os
import shutil
import xml.etree.ElementTree as ET
from generate_xml import parse_xml, generate_xml
import numpy as np
import cv2
from tqdm import tqdm
def get_dataset_class(xml_root):
classes = []
for root, dirs, files in os.walk(xml_root):
if files is not None:
for file in files:
if file.endswith('.xml'):
xml_path = os.path.join(root, file)
dict_info = parse_xml(xml_path)
return list(set(classes))
def convert(size, bbox):
dw = 1.0 / size[0]
dh = 1.0 / size[1]
center_x = (bbox[0] + bbox[2]) / 2.0
center_y = (bbox[1] + bbox[3]) / 2.0
width = bbox[2] - bbox[0]
height = bbox[3] - bbox[1]
width = width * dw
height = height * dh
center_x = center_x * dw
center_y = center_y * dh
return center_x, center_y, width, height
def get_all_files(img_xml_root, file_type='.xml'):
img_paths = []
xml_paths = []
# get all files
for root, dirs, files in os.walk(img_xml_root):
if files is not None:
for file in files:
if file.endswith(file_type):
file_path = os.path.join(root, file)
if file_type in ['.xml']:
img_path = file_path[:-4] + '.jpg'
if os.path.exists(img_path):
elif file_type in ['.jpg']:
xml_path = file_path[:-4] + '.xml'
if os.path.exists(xml_path):
elif file_type in ['.json']:
img_path = file_path[:-5] + '.jpg'
if os.path.exists(img_path):
return img_paths, xml_paths
def train_test_split(img_paths, xml_paths, test_size=0.2):
img_xml_union = list(zip(img_paths, xml_paths))
train_set = img_xml_union[:int(len(img_xml_union) * (1 - test_size))]
test_set = img_xml_union[int(len(img_xml_union) * (1 - test_size)):]
return train_set, test_set
def convert_annotation(img_xml_set, classes, save_path, is_train=True):
os.makedirs(os.path.join(save_path, 'images', 'train' if is_train else 'val'), exist_ok=True)
img_root = os.path.join(save_path, 'images', 'train' if is_train else 'val')
os.makedirs(os.path.join(save_path, 'labels', 'train' if is_train else 'val'), exist_ok=True)
txt_root = os.path.join(save_path, 'labels', 'train' if is_train else 'val')
for item in tqdm(img_xml_set):
img_path = item[0]
txt_file_name = os.path.split(img_path)[-1][:-4] + '.txt'
shutil.copy(img_path, img_root)
img = cv2.imread(img_path)
size = (img.shape[1], img.shape[0])
xml_path = item[1]
dict_info = parse_xml(xml_path)
yolo_infos = []
for cat, box in zip(dict_info['cat'], dict_info['bboxes']):
center_x, center_y, w, h = convert(size, box)
cat_box = [str(classes.index(cat)), str(center_x), str(center_y), str(w), str(h)]
yolo_infos.append(' '.join(cat_box))
if len(yolo_infos) > 0:
with open(os.path.join(txt_root, txt_file_name), 'w', encoding='utf_8') as f:
for info in yolo_infos:
if __name__ == '__main__':
xml_root = r'dataset\man'
save_path = r'dataset\man\yolo'
os.makedirs(save_path, exist_ok=True)
classes = get_dataset_class(xml_root)
res = get_all_files(xml_root, file_type='.xml')
train_set, test_set = train_test_split(res[0], res[1], test_size=0.2)
convert_annotation(train_set, classes, save_path, is_train=True)
convert_annotation(test_set, classes, save_path, is_train=False)
2.2 模型训练
def parse_opt(known=False):
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default='/home/ubuntu/cb/workspace/yolov5-6.1/', help='initial weights path')
parser.add_argument('--cfg', type=str, default='/home/ubuntu/cb/workspace/deepsort_yolov5/yolov5-6.1/models/yolov5s.yaml', help='model.yaml path')
parser.add_argument('--data', type=str, default='/home/ubuntu/cb/workspace/deepsort_yolov5/yolov5-6.1/data/jiaotong.yaml', help='dataset.yaml path')
parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
parser.add_argument('--epochs', type=int, default=200)
parser.add_argument('--batch-size', type=int, default=32, help='total batch size for all GPUs, -1 for autobatch')
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
parser.add_argument('--rect', action='store_true', help='rectangular training')
parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
parser.add_argument('--noval', action='store_true', help='only validate final epoch')
parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
parser.add_argument('--name', default='exp', help='save to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--quad', action='store_true', help='quad dataloader')
parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
# Weights & Biases arguments
parser.add_argument('--entity', default=None, help='W&B: Entity')
parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
opt = parser.parse_known_args()[0] if known else parser.parse_args()
return opt
python --weights --cfg models/yolov5s.yaml --data data/jiaotong.yaml --batch-size 64 --multi-scale --device 0,1
- 1
3.1 模型转换为onnx
1、是否要将后处理放在转换为onnx的过程中,这个可以参考yolov8-TensorRT end2end的实现上
3.2 模型转换为engine
trtexec.exe --onnx=best.onnx --saveEngine=best.engine
- 1
上述没有指定精度,默认为32位,可以额外指定其他精度,把模型内的计算转换成 fp16 或者 int8 的类型,可以只开一个也可以两个都开,trtexec 会倾向于速度最快的方式(有些网络模块不支持 int8)
trtexec.exe --onnx=best.onnx --saveEngine=best.engine --fp16
- 1
trtexec命令的参数具体设定以及应用请参看TensorRT: TensorRT Command-Line Wrapper: trtexec (
int get_trtengine_yolo(Parameters_yolo& cfg) {
cout << "\n\n\n" << "\tbuilding yolo engine ,please wait .......\n\n" << endl;
IHostMemory* modelStream{ nullptr };
APIToModel(cfg, &modelStream);
assert(modelStream != nullptr);
std::ofstream p(cfg.engine_path, std::ios::binary);
if (!p)
std::cerr << "could not open plan output file" << std::endl;
return -1;
p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
cout << "\n\n" << "\tfinished yolo engine created" << endl;
return 0;
void APIToModel(Parameters_yolo& cfg, IHostMemory** modelStream)
// Create builder
IBuilder* builder = createInferBuilder(gLogger);
IBuilderConfig* config = builder->createBuilderConfig();
// Create model to populate the network, then set the outputs and create an engine
ICudaEngine* engine = createEngine(cfg, builder, config);
assert(engine != nullptr);
// Serialize the engine
(*modelStream) = engine->serialize();
// Close everything down
