YoloV5-Lite目标检测之“微调 + 模型转换”

在YoloV5-Lite目标检测之“安装推理”中，我们完成了安装和预训练权重的推理，下面介绍自定义训练数据、模型转换(ncnn)

1 训练数据准备

├── train

│ ├── 000000000049.jpg

│ ├── 000000000049.txt

......

│ ├── 000000581880.txt

│ ├── 000000581900.jpg

│ └── 000000581900.txt

└── val

├── 000000000139.jpg

├── 000000000139.txt

......

├── 000000581357.jpg

└── 000000581357.txt

3 directories, 46201 files

. ├── train │ ├── 000000000049.jpg │ ├── 000000000049.txt ...... │ ├── 000000581880.txt │ ├── 000000581900.jpg │ └── 000000581900.txt └── val ├── 000000000139.jpg ├── 000000000139.txt ...... ├── 000000581357.jpg └── 000000581357.txt 3 directories, 46201 files

.
├── train
│   ├── 000000000049.jpg
│   ├── 000000000049.txt
......
│   ├── 000000581880.txt
│   ├── 000000581900.jpg
│   └── 000000581900.txt
└── val
    ├── 000000000139.jpg
    ├── 000000000139.txt
......
├── 000000581357.jpg
    └── 000000581357.txt

3 directories, 46201 files

2 配置文件修改

复制要训练的yml配置

cd data

cp ./data/coco128.yaml custom.yaml

cd data cp ./data/coco128.yaml custom.yaml

cd data
cp ./data/coco128.yaml custom.yaml

修改配置，主要是目录

# path

train: coco_2017/train

val: coco_2017/val

# number of classes

nc: 1

# class names

names: [ 'person']

# path train: coco_2017/train val: coco_2017/val # number of classes nc: 1 # class names names: [ 'person']

# path
train: coco_2017/train
val: coco_2017/val

# number of classes
nc: 1

# class names
names: [ 'person']

复制模型配置

cp ./models/./models/v5Lite-s.yaml ./v5Lite-s-custom.yaml

cp ./models/./models/v5Lite-s.yaml ./v5Lite-s-custom.yaml

修改，主要是classes

nc: 1 # number of classes

nc: 1  # number of classes

3 代码修改

有一些版本兼容bug修一下

diff --git a/train.py b/train.py

index 787c243..ff3096e 100644

--- a/train.py

+++ b/train.py

@@ -302,7 +302,7 @@ def train(hyp, opt, device, tb_writer=None):

# Forward

with amp.autocast(enabled=cuda):

pred = model(imgs) # forward

- loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size

+ loss, loss_items = compute_loss(pred, targets.to('cpu')) # loss scaled by batch_size

if rank != -1:

loss *= opt.world_size # gradient averaged between devices in DDP mode

if opt.quad:

diff --git a/utils/datasets.py b/utils/datasets.py

index ec597b6..de07ff1 100644

--- a/utils/datasets.py

+++ b/utils/datasets.py

@@ -408,7 +408,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing

x[:, 0] = 0

n = len(shapes) # number of images

- bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index

+ bi = np.floor(np.arange(n) / batch_size).astype(np.int64) # batch index

nb = bi[-1] + 1 # number of batches

self.batch = bi # batch index of image

self.n = n

@@ -436,7 +436,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing

elif mini > 1:

shapes[i] = [1, 1 / mini]

- self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride

+ self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int64) * stride

# Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)

self.imgs = [None] * n

@@ -648,7 +648,7 @@ def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):

hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))

dtype = img.dtype # uint8

- x = np.arange(0, 256, dtype=np.int16)

+ x = np.arange(0, 256, dtype=np.int64)

lut_hue = ((x * r[0]) % 180).astype(dtype)

lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)

lut_val = np.clip(x * r[2], 0, 255).astype(dtype)

@@ -1034,7 +1034,7 @@ def extract_boxes(path='../coco128/'): # from utils.datasets import *; extract_

b = x[1:] * [w, h, w, h] # box

# b[2:] = b[2:].max() # rectangle to square

b[2:] = b[2:] * 1.2 + 3 # pad

- b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)

+ b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int64)

b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image

b[[1, 3]] = np.clip(b[[1, 3]], 0, h)

diff --git a/utils/general.py b/utils/general.py

index 5482629..4cc691b 100644

--- a/utils/general.py

+++ b/utils/general.py

@@ -219,7 +219,7 @@ def labels_to_class_weights(labels, nc=80):

return torch.Tensor()

labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO

- classes = labels[:, 0].astype(np.int) # labels = [class xywh]

+ classes = labels[:, 0].astype(np.int64) # labels = [class xywh]

weights = np.bincount(classes, minlength=nc) # occurrences per class

# Prepend gridpoint count (for uCE training)

@@ -234,7 +234,7 @@ def labels_to_class_weights(labels, nc=80):

def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):

# Produces image weights based on class_weights and image contents

- class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels])

+ class_counts = np.array([np.bincount(x[:, 0].astype(np.int64), minlength=nc) for x in labels])

image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)

# index = random.choices(range(n), weights=image_weights, k=1) # weight image sample

return image_weights

diff --git a/utils/loss.py b/utils/loss.py

index 9e78df1..2c6d94b 100644

--- a/utils/loss.py

+++ b/utils/loss.py

@@ -164,7 +164,7 @@ class ComputeLoss:

# Build targets for compute_loss(), input targets(image,class,x,y,w,h)

na, nt = self.na, targets.shape[0] # number of anchors, targets

tcls, tbox, indices, anch = [], [], [], []

- gain = torch.ones(7, device=targets.device) # normalized to gridspace gain

+ gain = torch.ones(7, device=targets.device).long() # normalized to gridspace gain

ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)

targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices

diff --git a/train.py b/train.py index 787c243..ff3096e 100644 --- a/train.py +++ b/train.py @@ -302,7 +302,7 @@ def train(hyp, opt, device, tb_writer=None): # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward - loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size + loss, loss_items = compute_loss(pred, targets.to('cpu')) # loss scaled by batch_size if rank != -1: loss *= opt.world_size # gradient averaged between devices in DDP mode if opt.quad: diff --git a/utils/datasets.py b/utils/datasets.py index ec597b6..de07ff1 100644 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -408,7 +408,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing x[:, 0] = 0 n = len(shapes) # number of images - bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index + bi = np.floor(np.arange(n) / batch_size).astype(np.int64) # batch index nb = bi[-1] + 1 # number of batches self.batch = bi # batch index of image self.n = n @@ -436,7 +436,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing elif mini > 1: shapes[i] = [1, 1 / mini] - self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride + self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int64) * stride # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) self.imgs = [None] * n @@ -648,7 +648,7 @@ def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5): hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV)) dtype = img.dtype # uint8 - x = np.arange(0, 256, dtype=np.int16) + x = np.arange(0, 256, dtype=np.int64) lut_hue = ((x * r[0]) % 180).astype(dtype) lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) lut_val = np.clip(x * r[2], 0, 255).astype(dtype) @@ -1034,7 +1034,7 @@ def extract_boxes(path='../coco128/'): # from utils.datasets import *; extract_ b = x[1:] * [w, h, w, h] # box # b[2:] = b[2:].max() # rectangle to square b[2:] = b[2:] * 1.2 + 3 # pad - b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) + b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int64) b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image b[[1, 3]] = np.clip(b[[1, 3]], 0, h) diff --git a/utils/general.py b/utils/general.py index 5482629..4cc691b 100644 --- a/utils/general.py +++ b/utils/general.py @@ -219,7 +219,7 @@ def labels_to_class_weights(labels, nc=80): return torch.Tensor() labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO - classes = labels[:, 0].astype(np.int) # labels = [class xywh] + classes = labels[:, 0].astype(np.int64) # labels = [class xywh] weights = np.bincount(classes, minlength=nc) # occurrences per class # Prepend gridpoint count (for uCE training) @@ -234,7 +234,7 @@ def labels_to_class_weights(labels, nc=80): def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)): # Produces image weights based on class_weights and image contents - class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels]) + class_counts = np.array([np.bincount(x[:, 0].astype(np.int64), minlength=nc) for x in labels]) image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1) # index = random.choices(range(n), weights=image_weights, k=1) # weight image sample return image_weights diff --git a/utils/loss.py b/utils/loss.py index 9e78df1..2c6d94b 100644 --- a/utils/loss.py +++ b/utils/loss.py @@ -164,7 +164,7 @@ class ComputeLoss: # Build targets for compute_loss(), input targets(image,class,x,y,w,h) na, nt = self.na, targets.shape[0] # number of anchors, targets tcls, tbox, indices, anch = [], [], [], [] - gain = torch.ones(7, device=targets.device) # normalized to gridspace gain + gain = torch.ones(7, device=targets.device).long() # normalized to gridspace gain ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices

diff --git a/train.py b/train.py
index 787c243..ff3096e 100644
--- a/train.py
+++ b/train.py
@@ -302,7 +302,7 @@ def train(hyp, opt, device, tb_writer=None):
             # Forward
             with amp.autocast(enabled=cuda):
                 pred = model(imgs)  # forward
-                loss, loss_items = compute_loss(pred, targets.to(device))  # loss scaled by batch_size
+                loss, loss_items = compute_loss(pred, targets.to('cpu'))  # loss scaled by batch_size
                 if rank != -1:
                     loss *= opt.world_size  # gradient averaged between devices in DDP mode
                 if opt.quad:
diff --git a/utils/datasets.py b/utils/datasets.py
index ec597b6..de07ff1 100644
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -408,7 +408,7 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
                 x[:, 0] = 0
 
         n = len(shapes)  # number of images
-        bi = np.floor(np.arange(n) / batch_size).astype(np.int)  # batch index
+        bi = np.floor(np.arange(n) / batch_size).astype(np.int64)  # batch index
         nb = bi[-1] + 1  # number of batches
         self.batch = bi  # batch index of image
         self.n = n
@@ -436,7 +436,7 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
                 elif mini > 1:
                     shapes[i] = [1, 1 / mini]
 
-            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
+            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int64) * stride
 
         # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
         self.imgs = [None] * n
@@ -648,7 +648,7 @@ def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
     hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
     dtype = img.dtype  # uint8
 
-    x = np.arange(0, 256, dtype=np.int16)
+    x = np.arange(0, 256, dtype=np.int64)
     lut_hue = ((x * r[0]) % 180).astype(dtype)
     lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
     lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
@@ -1034,7 +1034,7 @@ def extract_boxes(path='../coco128/'):  # from utils.datasets import *; extract_
                     b = x[1:] * [w, h, w, h]  # box
                     # b[2:] = b[2:].max()  # rectangle to square
                     b[2:] = b[2:] * 1.2 + 3  # pad
-                    b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
+                    b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int64)
 
                     b[[0, 2]] = np.clip(b[[0, 2]], 0, w)  # clip boxes outside of image
                     b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
diff --git a/utils/general.py b/utils/general.py
index 5482629..4cc691b 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -219,7 +219,7 @@ def labels_to_class_weights(labels, nc=80):
         return torch.Tensor()
 
     labels = np.concatenate(labels, 0)  # labels.shape = (866643, 5) for COCO
-    classes = labels[:, 0].astype(np.int)  # labels = [class xywh]
+    classes = labels[:, 0].astype(np.int64)  # labels = [class xywh]
     weights = np.bincount(classes, minlength=nc)  # occurrences per class
 
     # Prepend gridpoint count (for uCE training)
@@ -234,7 +234,7 @@ def labels_to_class_weights(labels, nc=80):
 
 def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
     # Produces image weights based on class_weights and image contents
-    class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels])
+    class_counts = np.array([np.bincount(x[:, 0].astype(np.int64), minlength=nc) for x in labels])
     image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)
     # index = random.choices(range(n), weights=image_weights, k=1)  # weight image sample
     return image_weights
diff --git a/utils/loss.py b/utils/loss.py
index 9e78df1..2c6d94b 100644
--- a/utils/loss.py
+++ b/utils/loss.py
@@ -164,7 +164,7 @@ class ComputeLoss:
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
         na, nt = self.na, targets.shape[0]  # number of anchors, targets
         tcls, tbox, indices, anch = [], [], [], []
-        gain = torch.ones(7, device=targets.device)  # normalized to gridspace gain
+        gain = torch.ones(7, device=targets.device).long()  # normalized to gridspace gain
         ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)
         targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)  # append anchor indices

4 训练

cpu

python ./train.py --data ./custom.yaml --cfg ./v5Lite-s-custom.yaml --weights ./v5lite-s.pt --batch-size 64 --device cpu --epochs 100

python ./train.py --data ./custom.yaml --cfg ./v5Lite-s-custom.yaml --weights ./v5lite-s.pt --batch-size 64 --device cpu --epochs 100

gpu

python ./train.py --data ./custom.yaml --cfg ./v5Lite-s-custom.yaml --weights ./v5lite-s.pt --batch-size 64 --device 0 --epochs 100

python ./train.py --data ./custom.yaml --cfg ./v5Lite-s-custom.yaml --weights ./v5lite-s.pt --batch-size 64 --device 0 --epochs 100

以我的例子，效果如下：

2K训练数据，能做到mAP@.5 = 0.67 /mAP.05:.95 = 0.384，大概45轮就没法提升了
8K + 10%负向训练数据，能在3个Epoch就做到 0.574 / 0.274，最终0.695 / 0.411，一个Epoch 47s （L4）
8K + 50%负向训练数据，

5 模型转换

TODO

四号程序员

Keep It Simple and Stupid