在YoloV5-Lite目标检测之“安装推理”中,我们完成了安装和预训练权重的推理,下面介绍自定义训练数据、模型转换(ncnn)
1 训练数据准备
.
├── train
│ ├── 000000000049.jpg
│ ├── 000000000049.txt
......
│ ├── 000000581880.txt
│ ├── 000000581900.jpg
│ └── 000000581900.txt
└── val
├── 000000000139.jpg
├── 000000000139.txt
......
├── 000000581357.jpg
└── 000000581357.txt
3 directories, 46201 files
.
├── train
│ ├── 000000000049.jpg
│ ├── 000000000049.txt
......
│ ├── 000000581880.txt
│ ├── 000000581900.jpg
│ └── 000000581900.txt
└── val
├── 000000000139.jpg
├── 000000000139.txt
......
├── 000000581357.jpg
└── 000000581357.txt
3 directories, 46201 files
. ├── train │ ├── 000000000049.jpg │ ├── 000000000049.txt ...... │ ├── 000000581880.txt │ ├── 000000581900.jpg │ └── 000000581900.txt └── val ├── 000000000139.jpg ├── 000000000139.txt ...... ├── 000000581357.jpg └── 000000581357.txt 3 directories, 46201 files
2 配置文件修改
复制要训练的yml配置
cd data
cp ./data/coco128.yaml custom.yaml
cd data
cp ./data/coco128.yaml custom.yaml
cd data cp ./data/coco128.yaml custom.yaml
修改配置,主要是目录
# path
train: coco_2017/train
val: coco_2017/val
# number of classes
nc: 1
# class names
names: [ 'person']
# path
train: coco_2017/train
val: coco_2017/val
# number of classes
nc: 1
# class names
names: [ 'person']
# path train: coco_2017/train val: coco_2017/val # number of classes nc: 1 # class names names: [ 'person']
复制模型配置
cp ./models/./models/v5Lite-s.yaml ./v5Lite-s-custom.yaml
cp ./models/./models/v5Lite-s.yaml ./v5Lite-s-custom.yaml
cp ./models/./models/v5Lite-s.yaml ./v5Lite-s-custom.yaml
修改,主要是classes
nc: 1 # number of classes
nc: 1 # number of classes
nc: 1 # number of classes
3 代码修改
有一些版本兼容bug修一下
diff --git a/train.py b/train.py
index 787c243..ff3096e 100644
--- a/train.py
+++ b/train.py
@@ -302,7 +302,7 @@ def train(hyp, opt, device, tb_writer=None):
# Forward
with amp.autocast(enabled=cuda):
pred = model(imgs) # forward
- loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size
+ loss, loss_items = compute_loss(pred, targets.to('cpu')) # loss scaled by batch_size
if rank != -1:
loss *= opt.world_size # gradient averaged between devices in DDP mode
if opt.quad:
diff --git a/utils/datasets.py b/utils/datasets.py
index ec597b6..de07ff1 100644
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -408,7 +408,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing
x[:, 0] = 0
n = len(shapes) # number of images
- bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
+ bi = np.floor(np.arange(n) / batch_size).astype(np.int64) # batch index
nb = bi[-1] + 1 # number of batches
self.batch = bi # batch index of image
self.n = n
@@ -436,7 +436,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing
elif mini > 1:
shapes[i] = [1, 1 / mini]
- self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
+ self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int64) * stride
# Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
self.imgs = [None] * n
@@ -648,7 +648,7 @@ def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
dtype = img.dtype # uint8
- x = np.arange(0, 256, dtype=np.int16)
+ x = np.arange(0, 256, dtype=np.int64)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
@@ -1034,7 +1034,7 @@ def extract_boxes(path='../coco128/'): # from utils.datasets import *; extract_
b = x[1:] * [w, h, w, h] # box
# b[2:] = b[2:].max() # rectangle to square
b[2:] = b[2:] * 1.2 + 3 # pad
- b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
+ b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int64)
b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
diff --git a/utils/general.py b/utils/general.py
index 5482629..4cc691b 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -219,7 +219,7 @@ def labels_to_class_weights(labels, nc=80):
return torch.Tensor()
labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO
- classes = labels[:, 0].astype(np.int) # labels = [class xywh]
+ classes = labels[:, 0].astype(np.int64) # labels = [class xywh]
weights = np.bincount(classes, minlength=nc) # occurrences per class
# Prepend gridpoint count (for uCE training)
@@ -234,7 +234,7 @@ def labels_to_class_weights(labels, nc=80):
def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
# Produces image weights based on class_weights and image contents
- class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels])
+ class_counts = np.array([np.bincount(x[:, 0].astype(np.int64), minlength=nc) for x in labels])
image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)
# index = random.choices(range(n), weights=image_weights, k=1) # weight image sample
return image_weights
diff --git a/utils/loss.py b/utils/loss.py
index 9e78df1..2c6d94b 100644
--- a/utils/loss.py
+++ b/utils/loss.py
@@ -164,7 +164,7 @@ class ComputeLoss:
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
na, nt = self.na, targets.shape[0] # number of anchors, targets
tcls, tbox, indices, anch = [], [], [], []
- gain = torch.ones(7, device=targets.device) # normalized to gridspace gain
+ gain = torch.ones(7, device=targets.device).long() # normalized to gridspace gain
ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices
diff --git a/train.py b/train.py
index 787c243..ff3096e 100644
--- a/train.py
+++ b/train.py
@@ -302,7 +302,7 @@ def train(hyp, opt, device, tb_writer=None):
# Forward
with amp.autocast(enabled=cuda):
pred = model(imgs) # forward
- loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size
+ loss, loss_items = compute_loss(pred, targets.to('cpu')) # loss scaled by batch_size
if rank != -1:
loss *= opt.world_size # gradient averaged between devices in DDP mode
if opt.quad:
diff --git a/utils/datasets.py b/utils/datasets.py
index ec597b6..de07ff1 100644
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -408,7 +408,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing
x[:, 0] = 0
n = len(shapes) # number of images
- bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
+ bi = np.floor(np.arange(n) / batch_size).astype(np.int64) # batch index
nb = bi[-1] + 1 # number of batches
self.batch = bi # batch index of image
self.n = n
@@ -436,7 +436,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing
elif mini > 1:
shapes[i] = [1, 1 / mini]
- self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
+ self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int64) * stride
# Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
self.imgs = [None] * n
@@ -648,7 +648,7 @@ def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
dtype = img.dtype # uint8
- x = np.arange(0, 256, dtype=np.int16)
+ x = np.arange(0, 256, dtype=np.int64)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
@@ -1034,7 +1034,7 @@ def extract_boxes(path='../coco128/'): # from utils.datasets import *; extract_
b = x[1:] * [w, h, w, h] # box
# b[2:] = b[2:].max() # rectangle to square
b[2:] = b[2:] * 1.2 + 3 # pad
- b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
+ b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int64)
b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
diff --git a/utils/general.py b/utils/general.py
index 5482629..4cc691b 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -219,7 +219,7 @@ def labels_to_class_weights(labels, nc=80):
return torch.Tensor()
labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO
- classes = labels[:, 0].astype(np.int) # labels = [class xywh]
+ classes = labels[:, 0].astype(np.int64) # labels = [class xywh]
weights = np.bincount(classes, minlength=nc) # occurrences per class
# Prepend gridpoint count (for uCE training)
@@ -234,7 +234,7 @@ def labels_to_class_weights(labels, nc=80):
def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
# Produces image weights based on class_weights and image contents
- class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels])
+ class_counts = np.array([np.bincount(x[:, 0].astype(np.int64), minlength=nc) for x in labels])
image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)
# index = random.choices(range(n), weights=image_weights, k=1) # weight image sample
return image_weights
diff --git a/utils/loss.py b/utils/loss.py
index 9e78df1..2c6d94b 100644
--- a/utils/loss.py
+++ b/utils/loss.py
@@ -164,7 +164,7 @@ class ComputeLoss:
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
na, nt = self.na, targets.shape[0] # number of anchors, targets
tcls, tbox, indices, anch = [], [], [], []
- gain = torch.ones(7, device=targets.device) # normalized to gridspace gain
+ gain = torch.ones(7, device=targets.device).long() # normalized to gridspace gain
ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices
diff --git a/train.py b/train.py index 787c243..ff3096e 100644 --- a/train.py +++ b/train.py @@ -302,7 +302,7 @@ def train(hyp, opt, device, tb_writer=None): # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward - loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size + loss, loss_items = compute_loss(pred, targets.to('cpu')) # loss scaled by batch_size if rank != -1: loss *= opt.world_size # gradient averaged between devices in DDP mode if opt.quad: diff --git a/utils/datasets.py b/utils/datasets.py index ec597b6..de07ff1 100644 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -408,7 +408,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing x[:, 0] = 0 n = len(shapes) # number of images - bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index + bi = np.floor(np.arange(n) / batch_size).astype(np.int64) # batch index nb = bi[-1] + 1 # number of batches self.batch = bi # batch index of image self.n = n @@ -436,7 +436,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing elif mini > 1: shapes[i] = [1, 1 / mini] - self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride + self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int64) * stride # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) self.imgs = [None] * n @@ -648,7 +648,7 @@ def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5): hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV)) dtype = img.dtype # uint8 - x = np.arange(0, 256, dtype=np.int16) + x = np.arange(0, 256, dtype=np.int64) lut_hue = ((x * r[0]) % 180).astype(dtype) lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) lut_val = np.clip(x * r[2], 0, 255).astype(dtype) @@ -1034,7 +1034,7 @@ def extract_boxes(path='../coco128/'): # from utils.datasets import *; extract_ b = x[1:] * [w, h, w, h] # box # b[2:] = b[2:].max() # rectangle to square b[2:] = b[2:] * 1.2 + 3 # pad - b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) + b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int64) b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image b[[1, 3]] = np.clip(b[[1, 3]], 0, h) diff --git a/utils/general.py b/utils/general.py index 5482629..4cc691b 100644 --- a/utils/general.py +++ b/utils/general.py @@ -219,7 +219,7 @@ def labels_to_class_weights(labels, nc=80): return torch.Tensor() labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO - classes = labels[:, 0].astype(np.int) # labels = [class xywh] + classes = labels[:, 0].astype(np.int64) # labels = [class xywh] weights = np.bincount(classes, minlength=nc) # occurrences per class # Prepend gridpoint count (for uCE training) @@ -234,7 +234,7 @@ def labels_to_class_weights(labels, nc=80): def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)): # Produces image weights based on class_weights and image contents - class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels]) + class_counts = np.array([np.bincount(x[:, 0].astype(np.int64), minlength=nc) for x in labels]) image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1) # index = random.choices(range(n), weights=image_weights, k=1) # weight image sample return image_weights diff --git a/utils/loss.py b/utils/loss.py index 9e78df1..2c6d94b 100644 --- a/utils/loss.py +++ b/utils/loss.py @@ -164,7 +164,7 @@ class ComputeLoss: # Build targets for compute_loss(), input targets(image,class,x,y,w,h) na, nt = self.na, targets.shape[0] # number of anchors, targets tcls, tbox, indices, anch = [], [], [], [] - gain = torch.ones(7, device=targets.device) # normalized to gridspace gain + gain = torch.ones(7, device=targets.device).long() # normalized to gridspace gain ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices
4 训练
cpu
python ./train.py --data ./custom.yaml --cfg ./v5Lite-s-custom.yaml --weights ./v5lite-s.pt --batch-size 64 --device cpu --epochs 100
python ./train.py --data ./custom.yaml --cfg ./v5Lite-s-custom.yaml --weights ./v5lite-s.pt --batch-size 64 --device cpu --epochs 100
python ./train.py --data ./custom.yaml --cfg ./v5Lite-s-custom.yaml --weights ./v5lite-s.pt --batch-size 64 --device cpu --epochs 100
gpu
python ./train.py --data ./custom.yaml --cfg ./v5Lite-s-custom.yaml --weights ./v5lite-s.pt --batch-size 64 --device 0 --epochs 100
python ./train.py --data ./custom.yaml --cfg ./v5Lite-s-custom.yaml --weights ./v5lite-s.pt --batch-size 64 --device 0 --epochs 100
python ./train.py --data ./custom.yaml --cfg ./v5Lite-s-custom.yaml --weights ./v5lite-s.pt --batch-size 64 --device 0 --epochs 100
以我的例子,效果如下:
- 2K训练数据,能做到mAP@.5 = 0.67 /mAP.05:.95 = 0.384,大概45轮就没法提升了
- 8K + 10%负向训练数据,能在3个Epoch就做到 0.574 / 0.274,最终0.695 / 0.411,一个Epoch 47s (L4)
- 8K + 50%负向训练数据,
5 模型转换
TODO