Windows10下YOLOv8-Pose(8.2.10)从零部署:自定义数据集训练与工程化推理实战

张开发
2026/4/12 4:44:23 15 分钟阅读

分享文章

Windows10下YOLOv8-Pose(8.2.10)从零部署:自定义数据集训练与工程化推理实战
1. 环境准备与工具安装在Windows10系统下部署YOLOv8-Pose需要先搭建好开发环境。我建议使用Anaconda来管理Python环境这样可以避免不同项目之间的依赖冲突。首先下载并安装Anaconda最新版这个步骤很简单就像安装普通软件一样一路下一步即可。装好Anaconda后打开命令提示符(CMD)或Anaconda Prompt创建一个新的Python虚拟环境conda create -n yolo8 python3.8 conda activate yolo8这里我选择Python 3.8是因为它在兼容性方面表现最稳定。激活环境后我们需要安装PyTorch。这里有个坑要注意必须安装与你的CUDA版本匹配的PyTorch。你可以通过运行nvidia-smi命令查看CUDA版本。以CUDA 11.6为例pip install torch1.12.1cu116 torchvision0.13.1cu116 torchaudio0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116接下来安装其他必要的依赖包pip install numpy opencv-python pillow pandas matplotlib seaborn tqdm wandb seedir -i https://pypi.tuna.tsinghua.edu.cn/simple最后安装Ultralytics官方库这是YOLOv8的核心pip install ultralytics2. 数据标注与预处理2.1 使用LabelMe标注关键点对于自定义关键点检测任务我推荐使用LabelMe这个开源工具。它支持矩形框和关键点标注而且生成的JSON格式很容易转换。安装LabelMe很简单pip install labelme labelme标注时有个小技巧先标注完所有图片的边界框然后再统一标注某一类关键点。比如你要检测工业零件的三个角度关键点可以先把所有图片的30度角点标完再标60度的最后标90度的。这样做有两个好处一是效率高二是减少标注错误。标注完成后每个图片会生成对应的JSON文件。我强烈建议写个脚本检查标注质量import os import cv2 import json def visualize_annotations(image_folder): for file in os.listdir(image_folder): if file.endswith(.jpg): img_path os.path.join(image_folder, file) json_path os.path.splitext(img_path)[0] .json img cv2.imread(img_path) with open(json_path) as f: data json.load(f) # 绘制标注框和关键点 for shape in data[shapes]: if shape[shape_type] rectangle: points shape[points] cv2.rectangle(img, (int(points[0][0]), int(points[0][1])), (int(points[1][0]), int(points[1][1])), (0,255,0), 2) elif shape[shape_type] point: point shape[points][0] cv2.circle(img, (int(point[0]), int(point[1])), 5, (0,0,255), -1) cv2.imshow(Annotation Check, img) if cv2.waitKey(0) ord(q): break2.2 数据集划分与格式转换标注完成后我们需要将数据集划分为训练集和验证集通常采用8:2的比例。同时要把LabelMe的JSON格式转换为YOLO格式的TXT文件。这里我分享一个完整的处理流程import os import json import random from tqdm import tqdm def convert_labelme_to_yolo(json_folder, output_folder, class_map, kpt_classes): os.makedirs(output_folder, exist_okTrue) for json_file in tqdm(os.listdir(json_folder)): if not json_file.endswith(.json): continue with open(os.path.join(json_folder, json_file)) as f: data json.load(f) txt_lines [] img_width data[imageWidth] img_height data[imageHeight] # 处理每个标注对象 for shape in data[shapes]: if shape[shape_type] rectangle: # 转换边界框 points shape[points] x_center (points[0][0] points[1][0]) / 2 / img_width y_center (points[0][1] points[1][1]) / 2 / img_height width abs(points[1][0] - points[0][0]) / img_width height abs(points[1][1] - points[0][1]) / img_height line f{class_map[shape[label]]} {x_center:.5f} {y_center:.5f} {width:.5f} {height:.5f} # 收集该框内的关键点 kpts {} for kpt_shape in data[shapes]: if kpt_shape[shape_type] point and \ points[0][0] kpt_shape[points][0][0] points[1][0] and \ points[0][1] kpt_shape[points][0][1] points[1][1]: kpts[kpt_shape[label]] kpt_shape[points][0] # 按预定顺序添加关键点 for cls in kpt_classes: if cls in kpts: x kpts[cls][0] / img_width y kpts[cls][1] / img_height line f {x:.5f} {y:.5f} 2 # 2表示可见 else: line 0 0 0 # 0表示不存在 txt_lines.append(line) # 保存YOLO格式标签 txt_filename os.path.splitext(json_file)[0] .txt with open(os.path.join(output_folder, txt_filename), w) as f: f.write(\n.join(txt_lines))3. 模型训练与调优3.1 准备配置文件YOLOv8-Pose需要一个YAML配置文件来定义数据集和模型参数。创建一个custom_pose.yaml文件# 数据集路径 path: ./datasets/custom_pose train: images/train val: images/val # 关键点类别 kpt_shape: [3] # 关键点数量 flip_idx: [] # 对称关键点索引如[1,0]表示第0和第1个关键点对称 # 类别名称 names: 0: object # 关键点名称和连接关系 skeleton: []3.2 启动训练训练命令有很多可调参数这里我分享几个实用的组合# 基础训练使用预训练权重 yolo pose train datacustom_pose.yaml modelyolov8m-pose.pt epochs100 imgsz640 batch16 device0 # 高级训练自定义参数 yolo pose train datacustom_pose.yaml modelyolov8n-pose.pt pretrainedTrue \ epochs150 batch8 imgsz640 optimizerAdam lr00.001 \ pose12.0 kobj1.5 cls0.5 box7.5 device0 workers4训练过程中有几个关键点需要注意学习率太大容易震荡太小收敛慢。可以从0.01开始尝试。批量大小受限于显存在保证不OOM的情况下尽可能大。损失权重pose控制关键点定位kobj控制关键点置信度。数据增强默认开启马赛克增强对小目标很有效。3.3 训练监控推荐使用Weights Biases(WandB)来监控训练过程pip install wandb wandb login训练时会自动记录各项指标你可以在网页上实时查看损失曲线、验证精度等。4. 模型部署与推理4.1 图片推理训练完成后可以使用最佳模型进行推理。这里我封装了一个更易用的推理类from ultralytics import YOLO import cv2 import numpy as np class PoseDetector: def __init__(self, model_path, kpt_colorsNone, kpt_radius5, line_thickness2): self.model YOLO(model_path) self.kpt_colors kpt_colors or [(255,0,0), (0,255,0), (0,0,255)] self.kpt_radius kpt_radius self.line_thickness line_thickness def detect(self, img, conf_threshold0.5): results self.model(img, confconf_threshold) visualized_img img.copy() for result in results: boxes result.boxes.data.tolist() keypoints result.keypoints.data.cpu().numpy() for box, kpts in zip(boxes, keypoints): # 绘制边界框 x1, y1, x2, y2 map(int, box[:4]) cv2.rectangle(visualized_img, (x1,y1), (x2,y2), (0,255,0), 2) # 绘制关键点 for i, (x, y, conf) in enumerate(kpts): if conf conf_threshold: color self.kpt_colors[i % len(self.kpt_colors)] cv2.circle(visualized_img, (int(x), int(y)), self.kpt_radius, color, -1) return visualized_img, results # 使用示例 detector PoseDetector(best.pt) img cv2.imread(test.jpg) result_img, results detector.detect(img) cv2.imwrite(result.jpg, result_img)4.2 视频流处理对于视频或摄像头实时处理可以使用以下优化后的代码import cv2 from pose_detector import PoseDetector # 上面的类 def process_video(input_path, output_path, model_path): cap cv2.VideoCapture(input_path) fps cap.get(cv2.CAP_PROP_FPS) width int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fourcc cv2.VideoWriter_fourcc(*mp4v) out cv2.VideoWriter(output_path, fourcc, fps, (width, height)) detector PoseDetector(model_path) while cap.isOpened(): ret, frame cap.read() if not ret: break result_frame, _ detector.detect(frame) out.write(result_frame) cv2.imshow(Preview, result_frame) if cv2.waitKey(1) ord(q): break cap.release() out.release() cv2.destroyAllWindows() # 处理视频文件 process_video(input.mp4, output.mp4, best.pt) # 处理摄像头 def process_camera(camera_id0, model_pathbest.pt): cap cv2.VideoCapture(camera_id) detector PoseDetector(model_path) while True: ret, frame cap.read() if not ret: break result_frame, _ detector.detect(frame) cv2.imshow(Camera, result_frame) if cv2.waitKey(1) ord(q): break cap.release() cv2.destroyAllWindows()5. 工程化优化建议在实际项目中部署YOLOv8-Pose时有几个优化方向值得考虑模型量化使用TensorRT或ONNX Runtime加速推理yolo export modelbest.pt formatonnx opset12 simplifyTrue多线程处理使用生产者-消费者模式处理视频流from queue import Queue from threading import Thread class VideoProcessor: def __init__(self, model_path, buffer_size10): self.detector PoseDetector(model_path) self.frame_queue Queue(maxsizebuffer_size) self.result_queue Queue(maxsizebuffer_size) def start(self, input_path): self.capture_thread Thread(targetself._capture_frames, args(input_path,)) self.process_thread Thread(targetself._process_frames) self.capture_thread.start() self.process_thread.start() def _capture_frames(self, input_path): cap cv2.VideoCapture(input_path) while cap.isOpened(): ret, frame cap.read() if not ret: break self.frame_queue.put(frame) cap.release() def _process_frames(self): while True: frame self.frame_queue.get() if frame is None: break result self.detector.detect(frame) self.result_queue.put(result)性能监控添加FPS计数和资源监控import time import psutil class PerformanceMonitor: def __init__(self): self.start_time time.time() self.frame_count 0 self.fps_history [] def update(self): self.frame_count 1 if self.frame_count % 10 0: elapsed time.time() - self.start_time fps self.frame_count / elapsed self.fps_history.append(fps) # 获取内存和CPU使用情况 mem psutil.virtual_memory().percent cpu psutil.cpu_percent() print(fFPS: {fps:.1f} | Memory: {mem}% | CPU: {cpu}%)结果后处理添加关键点滤波和平滑from collections import deque class KeypointSmoother: def __init__(self, window_size5): self.window_size window_size self.history {} def smooth(self, current_kpts): smoothed {} for obj_id, kpts in current_kpts.items(): if obj_id not in self.history: self.history[obj_id] deque(maxlenself.window_size) self.history[obj_id].append(kpts) # 使用移动平均平滑关键点 smoothed[obj_id] np.mean(self.history[obj_id], axis0) return smoothed

更多文章