YoloV5-Lite目标检测之“微调 + 模型转换”

YoloV5-Lite目标检测之“安装推理”中,我们完成了安装和预训练权重的推理,下面介绍自定义训练数据、模型转换(ncnn)

1 训练数据准备

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
.
├── train
│ ├── 000000000049.jpg
│ ├── 000000000049.txt
......
│ ├── 000000581880.txt
│ ├── 000000581900.jpg
│ └── 000000581900.txt
└── val
├── 000000000139.jpg
├── 000000000139.txt
......
├── 000000581357.jpg
└── 000000581357.txt
3 directories, 46201 files
. ├── train │ ├── 000000000049.jpg │ ├── 000000000049.txt ...... │ ├── 000000581880.txt │ ├── 000000581900.jpg │ └── 000000581900.txt └── val ├── 000000000139.jpg ├── 000000000139.txt ...... ├── 000000581357.jpg └── 000000581357.txt 3 directories, 46201 files
.
├── train
│   ├── 000000000049.jpg
│   ├── 000000000049.txt
......
│   ├── 000000581880.txt
│   ├── 000000581900.jpg
│   └── 000000581900.txt
└── val
    ├── 000000000139.jpg
    ├── 000000000139.txt
......
├── 000000581357.jpg
    └── 000000581357.txt

3 directories, 46201 files

2 配置文件修改

复制要训练的yml配置

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
cd data
cp ./data/coco128.yaml custom.yaml
cd data cp ./data/coco128.yaml custom.yaml
cd data
cp ./data/coco128.yaml custom.yaml

修改配置,主要是目录

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
# path
train: coco_2017/train
val: coco_2017/val
# number of classes
nc: 1
# class names
names: [ 'person']
# path train: coco_2017/train val: coco_2017/val # number of classes nc: 1 # class names names: [ 'person']
# path
train: coco_2017/train
val: coco_2017/val

# number of classes
nc: 1

# class names
names: [ 'person']

复制模型配置

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
cp ./models/./models/v5Lite-s.yaml ./v5Lite-s-custom.yaml
cp ./models/./models/v5Lite-s.yaml ./v5Lite-s-custom.yaml
cp ./models/./models/v5Lite-s.yaml ./v5Lite-s-custom.yaml

修改,主要是classes

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
nc: 1 # number of classes
nc: 1 # number of classes
nc: 1  # number of classes

3 代码修改

有一些版本兼容bug修一下

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
diff --git a/train.py b/train.py
index 787c243..ff3096e 100644
--- a/train.py
+++ b/train.py
@@ -302,7 +302,7 @@ def train(hyp, opt, device, tb_writer=None):
# Forward
with amp.autocast(enabled=cuda):
pred = model(imgs) # forward
- loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size
+ loss, loss_items = compute_loss(pred, targets.to('cpu')) # loss scaled by batch_size
if rank != -1:
loss *= opt.world_size # gradient averaged between devices in DDP mode
if opt.quad:
diff --git a/utils/datasets.py b/utils/datasets.py
index ec597b6..de07ff1 100644
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -408,7 +408,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing
x[:, 0] = 0
n = len(shapes) # number of images
- bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
+ bi = np.floor(np.arange(n) / batch_size).astype(np.int64) # batch index
nb = bi[-1] + 1 # number of batches
self.batch = bi # batch index of image
self.n = n
@@ -436,7 +436,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing
elif mini > 1:
shapes[i] = [1, 1 / mini]
- self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
+ self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int64) * stride
# Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
self.imgs = [None] * n
@@ -648,7 +648,7 @@ def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
dtype = img.dtype # uint8
- x = np.arange(0, 256, dtype=np.int16)
+ x = np.arange(0, 256, dtype=np.int64)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
@@ -1034,7 +1034,7 @@ def extract_boxes(path='../coco128/'): # from utils.datasets import *; extract_
b = x[1:] * [w, h, w, h] # box
# b[2:] = b[2:].max() # rectangle to square
b[2:] = b[2:] * 1.2 + 3 # pad
- b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
+ b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int64)
b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
diff --git a/utils/general.py b/utils/general.py
index 5482629..4cc691b 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -219,7 +219,7 @@ def labels_to_class_weights(labels, nc=80):
return torch.Tensor()
labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO
- classes = labels[:, 0].astype(np.int) # labels = [class xywh]
+ classes = labels[:, 0].astype(np.int64) # labels = [class xywh]
weights = np.bincount(classes, minlength=nc) # occurrences per class
# Prepend gridpoint count (for uCE training)
@@ -234,7 +234,7 @@ def labels_to_class_weights(labels, nc=80):
def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
# Produces image weights based on class_weights and image contents
- class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels])
+ class_counts = np.array([np.bincount(x[:, 0].astype(np.int64), minlength=nc) for x in labels])
image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)
# index = random.choices(range(n), weights=image_weights, k=1) # weight image sample
return image_weights
diff --git a/utils/loss.py b/utils/loss.py
index 9e78df1..2c6d94b 100644
--- a/utils/loss.py
+++ b/utils/loss.py
@@ -164,7 +164,7 @@ class ComputeLoss:
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
na, nt = self.na, targets.shape[0] # number of anchors, targets
tcls, tbox, indices, anch = [], [], [], []
- gain = torch.ones(7, device=targets.device) # normalized to gridspace gain
+ gain = torch.ones(7, device=targets.device).long() # normalized to gridspace gain
ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices
diff --git a/train.py b/train.py index 787c243..ff3096e 100644 --- a/train.py +++ b/train.py @@ -302,7 +302,7 @@ def train(hyp, opt, device, tb_writer=None): # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward - loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size + loss, loss_items = compute_loss(pred, targets.to('cpu')) # loss scaled by batch_size if rank != -1: loss *= opt.world_size # gradient averaged between devices in DDP mode if opt.quad: diff --git a/utils/datasets.py b/utils/datasets.py index ec597b6..de07ff1 100644 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -408,7 +408,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing x[:, 0] = 0 n = len(shapes) # number of images - bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index + bi = np.floor(np.arange(n) / batch_size).astype(np.int64) # batch index nb = bi[-1] + 1 # number of batches self.batch = bi # batch index of image self.n = n @@ -436,7 +436,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing elif mini > 1: shapes[i] = [1, 1 / mini] - self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride + self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int64) * stride # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) self.imgs = [None] * n @@ -648,7 +648,7 @@ def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5): hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV)) dtype = img.dtype # uint8 - x = np.arange(0, 256, dtype=np.int16) + x = np.arange(0, 256, dtype=np.int64) lut_hue = ((x * r[0]) % 180).astype(dtype) lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) lut_val = np.clip(x * r[2], 0, 255).astype(dtype) @@ -1034,7 +1034,7 @@ def extract_boxes(path='../coco128/'): # from utils.datasets import *; extract_ b = x[1:] * [w, h, w, h] # box # b[2:] = b[2:].max() # rectangle to square b[2:] = b[2:] * 1.2 + 3 # pad - b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) + b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int64) b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image b[[1, 3]] = np.clip(b[[1, 3]], 0, h) diff --git a/utils/general.py b/utils/general.py index 5482629..4cc691b 100644 --- a/utils/general.py +++ b/utils/general.py @@ -219,7 +219,7 @@ def labels_to_class_weights(labels, nc=80): return torch.Tensor() labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO - classes = labels[:, 0].astype(np.int) # labels = [class xywh] + classes = labels[:, 0].astype(np.int64) # labels = [class xywh] weights = np.bincount(classes, minlength=nc) # occurrences per class # Prepend gridpoint count (for uCE training) @@ -234,7 +234,7 @@ def labels_to_class_weights(labels, nc=80): def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)): # Produces image weights based on class_weights and image contents - class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels]) + class_counts = np.array([np.bincount(x[:, 0].astype(np.int64), minlength=nc) for x in labels]) image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1) # index = random.choices(range(n), weights=image_weights, k=1) # weight image sample return image_weights diff --git a/utils/loss.py b/utils/loss.py index 9e78df1..2c6d94b 100644 --- a/utils/loss.py +++ b/utils/loss.py @@ -164,7 +164,7 @@ class ComputeLoss: # Build targets for compute_loss(), input targets(image,class,x,y,w,h) na, nt = self.na, targets.shape[0] # number of anchors, targets tcls, tbox, indices, anch = [], [], [], [] - gain = torch.ones(7, device=targets.device) # normalized to gridspace gain + gain = torch.ones(7, device=targets.device).long() # normalized to gridspace gain ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices
diff --git a/train.py b/train.py
index 787c243..ff3096e 100644
--- a/train.py
+++ b/train.py
@@ -302,7 +302,7 @@ def train(hyp, opt, device, tb_writer=None):
             # Forward
             with amp.autocast(enabled=cuda):
                 pred = model(imgs)  # forward
-                loss, loss_items = compute_loss(pred, targets.to(device))  # loss scaled by batch_size
+                loss, loss_items = compute_loss(pred, targets.to('cpu'))  # loss scaled by batch_size
                 if rank != -1:
                     loss *= opt.world_size  # gradient averaged between devices in DDP mode
                 if opt.quad:
diff --git a/utils/datasets.py b/utils/datasets.py
index ec597b6..de07ff1 100644
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -408,7 +408,7 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
                 x[:, 0] = 0
 
         n = len(shapes)  # number of images
-        bi = np.floor(np.arange(n) / batch_size).astype(np.int)  # batch index
+        bi = np.floor(np.arange(n) / batch_size).astype(np.int64)  # batch index
         nb = bi[-1] + 1  # number of batches
         self.batch = bi  # batch index of image
         self.n = n
@@ -436,7 +436,7 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
                 elif mini > 1:
                     shapes[i] = [1, 1 / mini]
 
-            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
+            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int64) * stride
 
         # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
         self.imgs = [None] * n
@@ -648,7 +648,7 @@ def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
     hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
     dtype = img.dtype  # uint8
 
-    x = np.arange(0, 256, dtype=np.int16)
+    x = np.arange(0, 256, dtype=np.int64)
     lut_hue = ((x * r[0]) % 180).astype(dtype)
     lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
     lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
@@ -1034,7 +1034,7 @@ def extract_boxes(path='../coco128/'):  # from utils.datasets import *; extract_
                     b = x[1:] * [w, h, w, h]  # box
                     # b[2:] = b[2:].max()  # rectangle to square
                     b[2:] = b[2:] * 1.2 + 3  # pad
-                    b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
+                    b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int64)
 
                     b[[0, 2]] = np.clip(b[[0, 2]], 0, w)  # clip boxes outside of image
                     b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
diff --git a/utils/general.py b/utils/general.py
index 5482629..4cc691b 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -219,7 +219,7 @@ def labels_to_class_weights(labels, nc=80):
         return torch.Tensor()
 
     labels = np.concatenate(labels, 0)  # labels.shape = (866643, 5) for COCO
-    classes = labels[:, 0].astype(np.int)  # labels = [class xywh]
+    classes = labels[:, 0].astype(np.int64)  # labels = [class xywh]
     weights = np.bincount(classes, minlength=nc)  # occurrences per class
 
     # Prepend gridpoint count (for uCE training)
@@ -234,7 +234,7 @@ def labels_to_class_weights(labels, nc=80):
 
 def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
     # Produces image weights based on class_weights and image contents
-    class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels])
+    class_counts = np.array([np.bincount(x[:, 0].astype(np.int64), minlength=nc) for x in labels])
     image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)
     # index = random.choices(range(n), weights=image_weights, k=1)  # weight image sample
     return image_weights
diff --git a/utils/loss.py b/utils/loss.py
index 9e78df1..2c6d94b 100644
--- a/utils/loss.py
+++ b/utils/loss.py
@@ -164,7 +164,7 @@ class ComputeLoss:
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
         na, nt = self.na, targets.shape[0]  # number of anchors, targets
         tcls, tbox, indices, anch = [], [], [], []
-        gain = torch.ones(7, device=targets.device)  # normalized to gridspace gain
+        gain = torch.ones(7, device=targets.device).long()  # normalized to gridspace gain
         ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)
         targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)  # append anchor indices
 

4 训练

cpu

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
python ./train.py --data ./custom.yaml --cfg ./v5Lite-s-custom.yaml --weights ./v5lite-s.pt --batch-size 64 --device cpu --epochs 100
python ./train.py --data ./custom.yaml --cfg ./v5Lite-s-custom.yaml --weights ./v5lite-s.pt --batch-size 64 --device cpu --epochs 100
python ./train.py --data ./custom.yaml --cfg ./v5Lite-s-custom.yaml --weights ./v5lite-s.pt --batch-size 64 --device cpu --epochs 100

gpu

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
python ./train.py --data ./custom.yaml --cfg ./v5Lite-s-custom.yaml --weights ./v5lite-s.pt --batch-size 64 --device 0 --epochs 100
python ./train.py --data ./custom.yaml --cfg ./v5Lite-s-custom.yaml --weights ./v5lite-s.pt --batch-size 64 --device 0 --epochs 100
python ./train.py --data ./custom.yaml --cfg ./v5Lite-s-custom.yaml --weights ./v5lite-s.pt --batch-size 64 --device 0 --epochs 100

以我的例子,效果如下:

  • 2K训练数据,能做到mAP@.5 = 0.67 /mAP.05:.95 =  0.384,大概45轮就没法提升了
  • 8K + 10%负向训练数据,能在3个Epoch就做到 0.574 /  0.274,最终0.695 / 0.411,一个Epoch 47s (L4)
  • 8K + 50%负向训练数据,

5 模型转换

TODO

 

Leave a Reply

Your email address will not be published. Required fields are marked *