PyTorch物体检测实战:测试集划分与模型评估全流程解析

一、测试集划分的重要性与基本原则

在物体检测任务中,测试集是评估模型泛化能力的核心依据。合理的测试集划分需遵循三个基本原则:

  1. 数据分布一致性:测试集应与训练集保持相同的类别分布和场景特征。例如在COCO数据集中,若训练集包含80%的日常场景图片,测试集也应维持相近比例。
  2. 独立性要求:测试样本必须完全独立于训练集。实际应用中常采用分层抽样法,按类别、场景等维度划分数据。以VOC数据集为例,可将20个类别均匀分配到训练集和测试集。
  3. 规模合理性:测试集规模通常占数据集总量的10%-30%。对于包含10万张图片的数据集,建议测试集规模控制在1万-3万张之间。

二、PyTorch测试集构建实战

1. 数据集加载与预处理

使用PyTorch的torchvision.datasets模块可高效加载标准数据集:

  1. from torchvision.datasets import VOCDetection
  2. from torchvision.transforms import Compose, ToTensor
  3. # 定义数据转换
  4. transform = Compose([
  5. ToTensor(), # 转换为Tensor并归一化到[0,1]
  6. # 可添加其他预处理操作如Resize、Normalize
  7. ])
  8. # 加载测试集
  9. test_dataset = VOCDetection(
  10. root='./data',
  11. year='2012',
  12. image_set='test',
  13. download=True,
  14. transform=transform
  15. )

对于自定义数据集,建议使用torch.utils.data.Dataset类实现:

  1. class CustomDetectionDataset(Dataset):
  2. def __init__(self, img_dir, label_dir, transform=None):
  3. self.img_dir = img_dir
  4. self.label_dir = label_dir
  5. self.transform = transform
  6. self.img_list = os.listdir(img_dir)
  7. def __len__(self):
  8. return len(self.img_list)
  9. def __getitem__(self, idx):
  10. img_path = os.path.join(self.img_dir, self.img_list[idx])
  11. label_path = os.path.join(self.label_dir, self.img_list[idx].replace('.jpg', '.txt'))
  12. image = Image.open(img_path).convert('RGB')
  13. boxes = []
  14. labels = []
  15. # 解析标注文件(假设为YOLO格式)
  16. with open(label_path) as f:
  17. for line in f:
  18. class_id, x_center, y_center, width, height = map(float, line.split())
  19. boxes.append([x_center-width/2, y_center-height/2, x_center+width/2, y_center+height/2])
  20. labels.append(int(class_id))
  21. target = {
  22. 'boxes': torch.tensor(boxes, dtype=torch.float32),
  23. 'labels': torch.tensor(labels, dtype=torch.int64)
  24. }
  25. if self.transform:
  26. image = self.transform(image)
  27. return image, target

2. 数据加载器配置

使用DataLoader实现批量加载和并行处理:

  1. from torch.utils.data import DataLoader
  2. test_loader = DataLoader(
  3. test_dataset,
  4. batch_size=8, # 根据GPU内存调整
  5. shuffle=False, # 测试集不需要打乱
  6. num_workers=4, # 多线程加载
  7. collate_fn=lambda x: tuple(zip(*x)) # 处理变长目标
  8. )

三、物体检测模型评估流程

1. 模型加载与预处理

以Faster R-CNN为例展示模型加载:

  1. import torchvision
  2. from torchvision.models.detection import fasterrcnn_resnet50_fpn
  3. # 加载预训练模型
  4. model = fasterrcnn_resnet50_fpn(pretrained=True)
  5. model.eval() # 切换到评估模式
  6. # 如果有自定义修改,需重新注册组件
  7. # model.roi_heads.box_predictor = ...

2. 评估指标计算

PyTorch提供了torchvision.ops中的IoU计算工具:

  1. from torchvision.ops import box_iou
  2. def calculate_iou(pred_boxes, target_boxes):
  3. """
  4. pred_boxes: [N,4] (x1,y1,x2,y2)
  5. target_boxes: [M,4]
  6. 返回: [N,M]的IoU矩阵
  7. """
  8. return box_iou(pred_boxes, target_boxes)
  9. def evaluate_model(model, test_loader, iou_threshold=0.5):
  10. model.eval()
  11. total_tp = 0
  12. total_fp = 0
  13. total_fn = 0
  14. with torch.no_grad():
  15. for images, targets in test_loader:
  16. images = list(image.to('cuda') for image in images)
  17. predictions = model(images)
  18. for pred, target in zip(predictions, targets):
  19. gt_boxes = target['boxes'].to('cuda')
  20. gt_labels = target['labels'].to('cuda')
  21. if len(pred['boxes']) == 0:
  22. total_fn += len(gt_boxes)
  23. continue
  24. # 计算预测框与真实框的IoU
  25. ious = calculate_iou(pred['boxes'], gt_boxes)
  26. max_ious, max_idx = ious.max(dim=1)
  27. # 统计TP/FP/FN
  28. matched_gt = torch.zeros(len(gt_boxes), dtype=torch.bool)
  29. for i in range(len(pred['boxes'])):
  30. if max_ious[i] >= iou_threshold:
  31. gt_idx = max_idx[i]
  32. if not matched_gt[gt_idx]:
  33. matched_gt[gt_idx] = True
  34. if pred['labels'][i] == gt_labels[gt_idx]:
  35. total_tp += 1
  36. else:
  37. total_fp += 1 # 分类错误
  38. else:
  39. total_fp += 1 # 重复检测
  40. total_fn += len(gt_boxes) - matched_gt.sum().item()
  41. precision = total_tp / (total_tp + total_fp + 1e-10)
  42. recall = total_tp / (total_tp + total_fn + 1e-10)
  43. f1 = 2 * (precision * recall) / (precision + recall + 1e-10)
  44. return {'precision': precision.item(),
  45. 'recall': recall.item(),
  46. 'f1': f1.item()}

3. 评估结果可视化

使用matplotlib实现检测结果可视化:

  1. import matplotlib.pyplot as plt
  2. import matplotlib.patches as patches
  3. def visualize_predictions(image, target, prediction, class_names):
  4. fig, ax = plt.subplots(1, figsize=(12, 9))
  5. ax.imshow(image.permute(1, 2, 0).cpu().numpy())
  6. # 绘制真实框
  7. for box, label in zip(target['boxes'], target['labels']):
  8. x1, y1, x2, y2 = box.cpu().numpy()
  9. rect = patches.Rectangle((x1, y1), x2-x1, y2-y1,
  10. linewidth=1, edgecolor='r', facecolor='none')
  11. ax.add_patch(rect)
  12. ax.text(x1, y1-5, class_names[label], color='r', fontsize=12)
  13. # 绘制预测框
  14. for box, label, score in zip(prediction['boxes'],
  15. prediction['labels'],
  16. prediction['scores']):
  17. if score < 0.5: # 置信度阈值
  18. continue
  19. x1, y1, x2, y2 = box.cpu().numpy()
  20. rect = patches.Rectangle((x1, y1), x2-x1, y2-y1,
  21. linewidth=1, edgecolor='g', facecolor='none')
  22. ax.add_patch(rect)
  23. ax.text(x1, y1-5, f"{class_names[label]}: {score:.2f}",
  24. color='g', fontsize=12)
  25. plt.axis('off')
  26. plt.show()

四、进阶优化技巧

  1. 测试时增强(TTA)

    1. def apply_tta(image, model):
    2. transformations = [
    3. lambda x: x, # 原始图像
    4. lambda x: torch.flip(x, [2]), # 水平翻转
    5. # 可添加旋转、缩放等变换
    6. ]
    7. all_predictions = []
    8. for transform in transformations:
    9. transformed_img = transform(image)
    10. with torch.no_grad():
    11. pred = model([transformed_img.to('cuda')])[0]
    12. if transform == lambda x: torch.flip(x, [2]): # 翻转回正
    13. pred['boxes'][:, [0, 2]] = 1 - pred['boxes'][:, [2, 0]]
    14. all_predictions.append(pred)
    15. # 合并预测结果(简单实现,实际需NMS)
    16. merged_boxes = torch.cat([p['boxes'] for p in all_predictions])
    17. merged_labels = torch.cat([p['labels'] for p in all_predictions])
    18. merged_scores = torch.cat([p['scores'] for p in all_predictions])
    19. return {'boxes': merged_boxes,
    20. 'labels': merged_labels,
    21. 'scores': merged_scores}
  2. 分布式评估
    使用torch.distributed实现多GPU评估:
    ```python
    import torch.distributed as dist
    from torch.nn.parallel import DistributedDataParallel as DDP

def setup(rank, world_size):
dist.init_process_group(‘nccl’, rank=rank, world_size=world_size)

def cleanup():
dist.destroy_process_group()

class DistributedSampler(torch.utils.data.sampler.Sampler):
def init(self, dataset, num_replicas, rank):
self.dataset = dataset
self.num_replicas = num_replicas
self.rank = rank
self.num_samples = int(math.ceil(len(dataset) 1.0 / self.num_replicas))
self.total_size = self.num_samples
self.num_replicas

  1. def __iter__(self):
  2. indices = list(range(len(self.dataset)))
  3. indices += indices[:(self.total_size - len(indices))]
  4. offset = self.num_samples * self.rank
  5. indices = indices[offset:offset + self.num_samples]
  6. return iter(indices)
  7. def __len__(self):
  8. return self.num_samples
  1. # 五、常见问题解决方案
  2. 1. **类别不平衡问题**:
  3. - 在损失函数中添加类别权重:
  4. ```python
  5. from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
  6. def get_class_weights(dataset):
  7. class_counts = torch.zeros(num_classes)
  8. for _, target in dataset:
  9. class_counts[target['labels']] += 1
  10. weights = 1. / (class_counts / class_counts.sum())
  11. return weights
  12. # 修改模型中的类别权重
  13. model.roi_heads.box_predictor.cls_score.bias.data.zero_()
  14. model.roi_heads.box_predictor.cls_score.weight.data.normal_(0, 0.01)
  1. 小目标检测优化
  • 修改锚框生成参数:
    ```python
    from torchvision.models.detection.anchor_utils import AnchorGenerator

anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) # 增加小尺寸锚框
aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)

anchor_generator = AnchorGenerator(
sizes=anchor_sizes,
aspect_ratios=aspect_ratios
)

model.rpn.anchor_generator = anchor_generator
```

通过系统化的测试集划分和评估流程,开发者可以准确评估物体检测模型的性能。本文提供的完整代码和优化技巧,可直接应用于工业级物体检测系统的开发与优化。建议在实际项目中结合具体数据集特点,调整评估参数和模型结构以获得最佳效果。