yolo v2使用总结

时间:2024-01-05 18:43:38

以下都是基于yolo v2版本的,对于现在的v3版本,可以先clone下来,再git checkout回v2版本。

玩了三四个月的yolo后发现数值相当不稳定,yolo只能用来小打小闹了。

v2训练的权重用v3做预测,结果不一样。

我的环境是 window 10 + cuda9.0 + opencv 3.4.0 + VS2015

先在这个地方下源文件:https://github.com/AlexeyAB/darknet

下好后,先打开用文本编辑器打开 darknet.vcxproj,将两处 cuda9.1 改成 cuda9.0

还要拷贝opencv的两个dll到 x64 下

用 VS2015 打开 darknet.sln,生成一下,在 darknet-master\build\darknet\x64 下面得到一个 darknet.exe

这个时候已经可以用 训练好的模型 对 训练时的类别 做预测了
当然,要预先下好对应的 weights 文件
darknet.exe detector test cfg/combine9k.data yolo9000.cfg yolo9000.weights data/dog.jpg

先准备好训练图片!

一定要先做好文件重命名工作!不然后面想添加、修改、删减样本都很痛苦。

然后再用 windows 版的 labelImg 做标注
然后修改相关配置文件,然后就可以开始训练了。

训练前下好一个 darknet19_448.conv.23 文件

训练命令如下
darknet.exe detector train cfg/voc.data cfg/yolo-voc.cfg cfg/darknet19_448.conv.23
要准备好.data 和 .cfg 文件 以及 训练数据集

训练时如果说找不到 txt 文件,直接把 txt 文件拷贝到image文件夹下

最后会得到一个自己的 weights 文件可以用来预测自己的类别

2018年8月24日08:58:19

总结一个比较完整的流程出来,大致有以下几个步骤:

1,不停的图片采集,以及不停的对新采集的图片重命名。

因为采集到的图片名称可能是以秒命名的,也有可能是按日期命名的,或者有其他命名规范。

后续标注和训练完测试的时候,如果发现某些样本图片不理想要剔除或跳过的时候,名字不易找就比较麻烦。

2,图片标注的技巧,实际标注的xml数量是少于采集到的图片数量的,因为有些图片拍的角度或光照不理想。

3,离线数据增强

yolo本身有一些在线的数据增强,然而没有深入阅读过代码的话很难改的动,而且提供的增强手段有限。所以做了个

简单的离线数据增强。

4,训练数据预处理,修改cfg文件等。

5,训练和测试

6,在opencv中调用,C++和python两种版本。以及批量抠图。

=====================================================

1,重命名    rename.py

 import os

 def getFilenames(filepath):
'''
得到一个文件夹下所有的文件名,不包含后缀, 忽略文件夹
'''
filenames = [] for file in os.listdir(filepath):
pt = os.path.join(filepath, file) if( os.path.isfile(pt) ):
filename = os.path.splitext(file)[0]
filenames.append(filename)
return filenames filepath = "origin_img"
filenames = getFilenames(filepath)
print(filenames) # 只对文件夹中新增加的图片重命名,增量式重命名。所以要取得图片文件名中最大的数字
def get_max_num(filenames):
max_num = 0 for name in filenames:
if( name.isdigit() and int(name) < 10000):
if(int(name) > max_num):
max_num = int(name) return max_num print("max num:", get_max_num(filenames)) renameCount = get_max_num(filenames)+1 #renameCount = 1 for file in os.listdir(filepath):
if( os.path.isfile( os.path.join(filepath, file) ) ): # 如果是文件
filename = os.path.splitext(file)[0] # 取得文件名 if( not filename.isdigit() ): # 如果文件名不是数字,则重命名
print("rename count 1:", renameCount)
os.rename(os.path.join(filepath, file), os.path.join(filepath, str('%03d'%renameCount)+".jpeg"))
renameCount+=1 if( filename.isdigit() and int(filename) > 10000 ):
print("rename count 2:", renameCount)
os.rename(os.path.join(filepath, file), os.path.join(filepath, str('%03d'%renameCount)+".jpeg"))
renameCount+=1

2,图片标注技巧

用labelimg,尽量从特征的角度考虑,把目标物体最明显的特征框进去,跟程序实际工作情况差距太大的图片

就不要标注了,如果特征不是太明显的也标了,那样本数量就要上去。

标注的xml是少于图片数量的,需要进一步将标注过的图片拿出来。

 import os
import shutil def getFilenames(filepath):
'''得到一个文件夹下所有的文件名,不包含后缀
'''
filelist = os.listdir(filepath) filenames = [] for files in filelist:
filename = os.path.splitext(files)[0]
# print(files)
# print(filename)
filenames.append(filename)
return filenames xmlpath = 'xml' # 这个是标注过后xml所在文件夹
imgpath = 'img' # 这个是标注时图片所在文件夹,图片数目多于xml img_write_path = 'img_less' # 把标注过的图像拷贝到这个文件夹 filenames = getFilenames(xmlpath) for i in range(len(filenames)):
filename = filenames[i]
# print(filename) jpgpath = imgpath + "/" + str(filename) + ".jpeg"
# print(jpgpath) jpg_wrt_path = img_write_path + "/" + str(filename) + ".jpg"
shutil.copy(jpgpath, jpg_wrt_path)

3,离线数据增强

有些数据增强要修改xml,比如水平翻转,旋转,裁剪,有些不要,比如对颜色、光照做扰动等。

本来写了水平翻转和随机裁剪,后来想想yolo把图片resize到416,随机裁剪没什么卵用。

注意数据增强的时候不要搞出一堆人为的特征让yolo去学,要合理的增强。

这里只放一个水平翻转的功能出来。

util.py

 import numpy as np
import cv2
import matplotlib.pyplot as plt
import random
import os def showimg(img):
channelNum = len(img.shape) if channelNum == 3:
fig = plt.subplots(1),plt.imshow( cv2.cvtColor(img, cv2.COLOR_BGR2RGB) )
if channelNum == 2:
fig = plt.subplots(1),plt.imshow( img ) def scaleimg(img, scale = 1.0):
H, W, C = img.shape
size = (int(scale*W), int(scale*H))
img = cv2.resize(img, size, interpolation=cv2.INTER_AREA)
del H, W, C, size, scale
return img.copy() # img = rotateimg(image, angle)
def rotateimg(image, angle, center=None, scale=1.0):
# 获取图像尺寸
(h, w) = image.shape[:2] # 若未指定旋转中心,则将图像中心设为旋转中心
if center is None:
center = (w / 2, h / 2) # 执行旋转
M = cv2.getRotationMatrix2D(center, angle, scale) # 给的角度为正的时候,则逆时针旋转
rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC)
# rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) return rotated # 返回旋转后的图像, angle是角度制,不是弧度制 '''
1, 读取xml返回结果
输入:CLASS_NAMES元组 xml路径
返回:(H, W, boxes) boxes是一个二维np数组,6列分别为
id classid xmin xmax ymin ymax
0 1 2 3 4 5 2, 将boxes CLASS_NAMES H W 信息写入xml
输入:boxes CLASS_NAMES H W xml路径
输出:硬盘上的一个xml 3, 根据 img,boxes数组,class_names,画出一个图来
输入:img,boxes,class_names
''' import xml.etree.ElementTree as ET
import numpy as np #CLASS_NAMES = ('person', 'dog') # 下标从0开始,这里可以没有顺序,最好有顺序 # id classid xmin xmax ymin ymax
# 0 1 2 3 4 5
def xml2boxes(xmlpath, CLASS_NAMES):
print("xmlpath:", xmlpath) cls_to_idx = dict( zip( CLASS_NAMES , range(len(CLASS_NAMES)) ))
idx_to_cls = dict( zip( range(len(CLASS_NAMES)), CLASS_NAMES )) # print(cls_to_idx)
# print(idx_to_cls) annotations = ET.parse(xmlpath)
# 获得 HWC
size = annotations.find('size')
W = int(size.find('width').text)
H = int(size.find('height').text)
C = int(size.find('depth').text)
# 获得类别和具体坐标
bbox = list()
count = 1
for obj in annotations.iter('object'): # 提取 xml文件中的信息
line = []
bndbox_anno = obj.find('bndbox')
# xmin等从 1 开始计数
tmp = map(int, [bndbox_anno.find('xmin').text,
bndbox_anno.find('xmax').text,
bndbox_anno.find('ymin').text,
bndbox_anno.find('ymax').text])
tmp = list(tmp) # 1 x 4 name = obj.find('name').text.lower().strip() line.append(count)
line.append(cls_to_idx[name])
line.append(tmp[0])
line.append(tmp[1])
line.append(tmp[2])
line.append(tmp[3])
count = count + 1
# print(line)
bbox.append( line ) boxes = np.stack(bbox).astype(np.int32)
return boxes, H, W #boxes, H, W = xml2boxes("1.xml", CLASS_NAMES)
#print("boxes:\n", boxes)
# 对只有一个类别的时候,CLASS_NAMES要在后面加一个字符串
# 比如 CLASS_NAMES = ("apple", "xxxx") 这是个bug,还没修 from lxml.etree import Element, SubElement, tostring
from xml.dom.minidom import parseString ######################################################
# boxes2xml_labelImg(boxes, CLASS_NAMES, H, W, xmlpath, wrtin_img_folder_name, imgName, img_fullpath)
def boxes2xml_labelImg(boxes, CLASS_NAMES, H, W, xmlpath, wrtin_img_folder_name,
imgName, img_fullpath):
'''
这是一个labelImg可以查看的版本 这个时候要求 CLASS_NAMES 是有顺序的,和boxes里头的第二列
的类别id要一一对应
'''
cls_to_idx = dict( zip( CLASS_NAMES , range(len(CLASS_NAMES)) ))
idx_to_cls = dict( zip( range(len(CLASS_NAMES)), CLASS_NAMES )) node_annotation = Element('annotation')
#################################################
node_folder = SubElement(node_annotation, 'folder')
node_filename = SubElement(node_annotation, 'filename')
node_path = SubElement(node_annotation, 'path') node_source = SubElement(node_annotation, 'source')
node_database = SubElement(node_source, 'database') node_folder.text = wrtin_img_folder_name # 这个是定死的,赋值一次就不会变了
node_filename.text = imgName # 图片的文件名,不包含后缀
node_path.text = img_fullpath # 随着文件名变化 node_database.text = "Unknown" node_size = SubElement(node_annotation, 'size')
#################################################
# node_size
node_width = SubElement(node_size, 'width')
node_height = SubElement(node_size, 'height')
node_depth = SubElement(node_size, 'depth') node_width.text = str(W)
node_height.text = str(H)
node_depth.text = str(3) # 默认是彩色
#################################################
node_segmented = SubElement(node_annotation, 'segmented')
node_segmented.text = ""
################################################# # node_object 若干 要循环
for i in range(boxes.shape[0]):
node_object = SubElement(node_annotation, 'object')
classid = boxes[i, 1]
# print(idx_to_cls[classid])
node_name = SubElement(node_object, 'name')
node_name.text = idx_to_cls[classid] node_pose = SubElement(node_object, 'pose')
node_truncated = SubElement(node_object, 'truncated')
node_Difficult = SubElement(node_object, 'Difficult') node_pose.text = "Unspecified"
node_truncated.text = ""
node_Difficult.text = "" node_bndbox = SubElement(node_object, 'bndbox') node_xmin = SubElement(node_bndbox, 'xmin')
node_ymin = SubElement(node_bndbox, 'ymin')
node_xmax = SubElement(node_bndbox, 'xmax')
node_ymax = SubElement(node_bndbox, 'ymax') node_xmin.text = str(boxes[i, 2])
node_xmax.text = str(boxes[i, 3])
node_ymin.text = str(boxes[i, 4])
node_ymax.text = str(boxes[i, 5]) ###################
xml = tostring(node_annotation, pretty_print=True) #格式化显示,该换行的换行
dom = parseString(xml) test_string = xml.decode('utf-8')
#print('test:\n', test_string) with open(xmlpath, "w") as text_file:
text_file.write(test_string) ######################################################
def drawboxes(imgpath, boxes, CLASS_NAMES):
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import cv2 cls_to_idx = dict( zip( CLASS_NAMES , range(len(CLASS_NAMES)) ))
idx_to_cls = dict( zip( range(len(CLASS_NAMES)), CLASS_NAMES )) if isinstance(imgpath, str):
img = cv2.imread(imgpath)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
if isinstance(imgpath, np.ndarray):
img = imgpath fig, ax = plt.subplots(1) for i in range(boxes.shape[0]):
bndbox = list(boxes[i,:])
x = bndbox[2]
y = bndbox[4]
w = bndbox[3] - bndbox[2]
h = bndbox[5] - bndbox[4]
rect = patches.Rectangle( (x,y),w,h, linewidth=1,edgecolor='yellow',facecolor='none')
ax.add_patch(rect)
name = idx_to_cls[boxes[i, 1]]
ax.text(x-5, y-5, name, style='italic', color='yellow', fontsize=12) ax.imshow(img) #drawboxes("1.jpg", boxes, CLASS_NAMES) ################################## def getFilenames(filepath):
'''得到一个文件夹下所有的文件名,不包含后缀
'''
filelist = os.listdir(filepath) filenames = [] for files in filelist:
filename = os.path.splitext(files)[0]
# print(files)
# print(filename)
filenames.append(filename)
return filenames def fliplr_boxes(boxes, W):
''' 对boxes做水平翻转'''
boxes_copy = boxes.copy() xmin = boxes[:, 2].copy()
xmax = boxes[:, 3].copy()
boxes_copy[:, 3] = W - 1 - xmin # 注意这里不是 2,3 是 3,2 不然xmin会大于xmax
boxes_copy[:, 2] = W - 1 - xmax
return boxes_copy

main.py

 import os
import cv2
import numpy as np
import random
import matplotlib.pyplot as plt
import matplotlib.patches as patches from util import * img_read_path = "img_less"
xml_read_path = "xml" img_write_path = "fliped_img" # 图片和xml水平翻转后的写入文件夹
xml_write_path = "fliped_xml" filenames = getFilenames(xml_read_path) CLASS_NAMES = ('person', 'aa') # 这里有个bug懒得改,一个类别时也要写两个进去 count = 201 wrtin_img_folder_name = "fliped_img" for i in range(len(filenames)):
name = filenames[i] imgname = img_read_path + "/" + str(name) + ".jpg"
img = cv2.imread(imgname) xmlname = xml_read_path + "/" + str(name) + ".xml"
boxes, H, W = xml2boxes(xmlname, CLASS_NAMES)
# print("xmlname:", xmlname) H,W,C = img.shape
##############################
fliped_boxes = fliplr_boxes(boxes, W)
fliped_img = cv2.flip(img, 1)
##############################
FileName = str(count) jpgpath = img_write_path + "/" + FileName + ".jpg"
cv2.imwrite(jpgpath, fliped_img) xmlpath = xml_write_path + "/" + FileName + ".xml"
boxes2xml_labelImg(fliped_boxes, CLASS_NAMES, H, W, xmlpath, wrtin_img_folder_name, FileName, jpgpath) count = count + 1

4,训练数据预处理,修改cfg文件

下面这两个文件一定要执行,我从别的地方copy过来的。

trans1.py

 import os
import shutil savepath = os.getcwd() img_path = savepath + "/img_less" # 存放训练图片的文件夹名
xml_path = savepath + "/xml" # 总共标注了 X 张图片 val_num = 10 #验证集数量,可修改 # 下面新建了 4 个目录
validateImage_path = savepath + "/validateImage";
trainImage_path = savepath + "/trainImage"; if os.path.exists(validateImage_path)== False:
os.mkdir(validateImage_path)
if os.path.exists(trainImage_path) == False:
os.mkdir(trainImage_path) validateImageXML_path = savepath + "/validateImageXML"
trainImageXML_path = savepath + "/trainImageXML" if os.path.exists(validateImageXML_path)== False:
os.mkdir(validateImageXML_path)
if os.path.exists(trainImageXML_path) == False:
os.mkdir(trainImageXML_path)
#================================================= filelist = os.listdir(xml_path) # 以xml文件夹中的数量为标准 count = 0
for files in filelist:
filename = os.path.splitext(files)[0] # 文件名 origin_jpg_name = os.path.join(img_path, filename + '.jpg') validateImage_jpg_name = os.path.join(validateImage_path, filename + '.jpg')
trainImage_jpg_name = os.path.join(trainImage_path, filename + '.jpg')
# print(validateImage_jpg_name) if count < val_num:
shutil.copy(origin_jpg_name, validateImage_jpg_name); # 拷贝 validate 图片 xml_olddir = os.path.join(xml_path, filename + ".xml")
xml_newdir = os.path.join(validateImageXML_path, filename + ".xml") shutil.copyfile(xml_olddir, xml_newdir) # 拷贝 validate xml文件
else:
shutil.copy(origin_jpg_name, trainImage_jpg_name) xml_olddir = os.path.join(xml_path, filename + ".xml")
xml_newdir = os.path.join(trainImageXML_path, filename + ".xml") shutil.copyfile(xml_olddir, xml_newdir) count=count+1; validate_txtpath = savepath + "/validateImageId.txt"
train_txtpath = savepath + "/trainImageId.txt" def listname(path, idtxtpath):
filelist = os.listdir(path) # 该文件夹下所有的文件(包括文件夹)
f = open(idtxtpath, 'w') for files in filelist: # 遍历所有文件
Olddir = os.path.join(path, files) # 原来的文件路径
if os.path.isdir(Olddir): # 如果是文件夹则跳过
continue
filename = os.path.splitext(files)[0] # 文件名 f.write(filename)
f.write('\n')
f.close() listname(validateImage_path, validate_txtpath)
listname(trainImage_path, train_txtpath)

trans2.py

 import xml.etree.ElementTree as ET
import pickle
import string
import os
import shutil
from os import listdir, getcwd
from os.path import join sets=[('', 'train')] classes = ["person"] def convert(size, box):
dw = 1./size[0]
dh = 1./size[1]
x = (box[0] + box[1])/2.0
y = (box[2] + box[3])/2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h) def convert_annotation(image_id,flag,savepath):
if flag == 0:
in_file = open(savepath+'/trainImageXML/%s.xml' % (image_id))
labeltxt = savepath+'/trainImageLabelTxt';
if os.path.exists(labeltxt) == False:
os.mkdir(labeltxt);
out_file = open(savepath+'/trainImageLabelTxt/%s.txt' % (image_id), 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
elif flag == 1:
in_file = open(savepath+'/validateImageXML/%s.xml' % (image_id))
labeltxt = savepath + '/validateImageLabelTxt';
if os.path.exists(labeltxt) == False:
os.mkdir(labeltxt);
out_file = open(savepath+'/validateImageLabelTxt/%s.txt' % (image_id), 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text) for obj in root.iter('object'):
# difficult = obj.find('difficult').text
cls = obj.find('name').text
# if cls not in classes or int(difficult) == 1:
# continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') wd = getcwd() for year, image_set in sets:
savepath = os.getcwd();
idtxt = savepath + "/validateImageId.txt";
pathtxt = savepath + "/validateImagePath.txt"; image_ids = open(idtxt).read().strip().split()
list_file = open(pathtxt, 'w') s = '\xef\xbb\xbf'
for image_id in image_ids:
nPos = image_id.find(s)
if nPos >= 0:
image_id = image_id[3:]
list_file.write('%s/validateImage/%s.jpg\n' % (wd, image_id))
print(image_id)
convert_annotation(image_id, 1, savepath)
list_file.close() idtxt = savepath + "/trainImageId.txt";
pathtxt = savepath + "/trainImagePath.txt" ;
image_ids = open(idtxt).read().strip().split()
list_file = open(pathtxt, 'w')
s = '\xef\xbb\xbf'
for image_id in image_ids:
nPos = image_id.find(s)
if nPos >= 0:
image_id = image_id[3:]
list_file.write('%s/trainImage/%s.jpg\n'%(wd,image_id))
print(image_id)
convert_annotation(image_id,0,savepath)
list_file.close()

训练的时候,编译好了,在windows下生成了darknet.exe,ubuntu下生成了可执行文件darknet。

然后在darknet.exe同级目录新建一个文件夹叫(比如训练做行人识别)train_person,在该文件夹下新建一个

backup文件夹,新建一个data文件夹,把数据增强过的图片文件夹img_less和xml拷贝到data下面,将trans1.py

和trans2.py也放到data文件夹下,然后先执行trans1.py,再执行trans2.py。然后把生成的txt拷贝到训练图片目录下。

至于修改cfg文件,看其他的博客吧。我就提一下修改那个anchors,faster-rcnn中有9个比例确定的anchor。yolo中则是

统计了样本中的标注框然后做聚类。用了效果不错我才修改的。

 # coding=utf-8
# k-means ++ for YOLOv2 anchors
# 通过k-means ++ 算法获取YOLOv2需要的anchors的尺寸
import numpy as np # 定义Box类,描述bounding box的坐标
class Box():
def __init__(self, x, y, w, h):
self.x = x
self.y = y
self.w = w
self.h = h # 计算两个box在某个轴上的重叠部分
# x1是box1的中心在该轴上的坐标
# len1是box1在该轴上的长度
# x2是box2的中心在该轴上的坐标
# len2是box2在该轴上的长度
# 返回值是该轴上重叠的长度
def overlap(x1, len1, x2, len2):
len1_half = len1 / 2
len2_half = len2 / 2 left = max(x1 - len1_half, x2 - len2_half)
right = min(x1 + len1_half, x2 + len2_half) return right - left # 计算box a 和box b 的交集面积
# a和b都是Box类型实例
# 返回值area是box a 和box b 的交集面积
def box_intersection(a, b):
w = overlap(a.x, a.w, b.x, b.w)
h = overlap(a.y, a.h, b.y, b.h)
if w < 0 or h < 0:
return 0 area = w * h
return area # 计算 box a 和 box b 的并集面积
# a和b都是Box类型实例
# 返回值u是box a 和box b 的并集面积
def box_union(a, b):
i = box_intersection(a, b)
#print a.w,a.h,b.w,b.h
u = a.w * a.h + b.w * b.h - i
return u # 计算 box a 和 box b 的 iou
# a和b都是Box类型实例
# 返回值是box a 和box b 的iou
def box_iou(a, b):
#print box_union(a, b)
return box_intersection(a, b) / box_union(a, b) # 使用k-means ++ 初始化 centroids,减少随机初始化的centroids对最终结果的影响
# boxes是所有bounding boxes的Box对象列表
# n_anchors是k-means的k值
# 返回值centroids 是初始化的n_anchors个centroid
def init_centroids(boxes,n_anchors):
centroids = []
boxes_num = len(boxes) centroid_index = np.random.choice(boxes_num, 1)
centroids.append(boxes[centroid_index]) print(centroids[0].w,centroids[0].h) for centroid_index in range(0,n_anchors-1): sum_distance = 0
distance_thresh = 0
distance_list = []
cur_sum = 0 for box in boxes:
min_distance = 1
for centroid_i, centroid in enumerate(centroids):
distance = (1 - box_iou(box, centroid))
if distance < min_distance:
min_distance = distance
sum_distance += min_distance
distance_list.append(min_distance) distance_thresh = sum_distance*np.random.random() for i in range(0,boxes_num):
cur_sum += distance_list[i]
if cur_sum > distance_thresh:
centroids.append(boxes[i])
print(boxes[i].w, boxes[i].h)
break return centroids # 进行 k-means 计算新的centroids
# boxes是所有bounding boxes的Box对象列表
# n_anchors是k-means的k值
# centroids是所有簇的中心
# 返回值new_centroids 是计算出的新簇中心
# 返回值groups是n_anchors个簇包含的boxes的列表
# 返回值loss是所有box距离所属的最近的centroid的距离的和
def do_kmeans(n_anchors, boxes, centroids):
loss = 0
groups = []
new_centroids = []
for i in range(n_anchors):
groups.append([])
new_centroids.append(Box(0, 0, 0, 0)) for box in boxes:
min_distance = 1
group_index = 0
for centroid_index, centroid in enumerate(centroids):
distance = (1 - box_iou(box, centroid))
if distance < min_distance:
min_distance = distance
group_index = centroid_index
groups[group_index].append(box)
loss += min_distance
new_centroids[group_index].w += box.w
new_centroids[group_index].h += box.h for i in range(n_anchors):
new_centroids[i].w /= len(groups[i])
new_centroids[i].h /= len(groups[i]) return new_centroids, groups, loss # 计算给定bounding boxes的n_anchors数量的centroids
# label_path是训练集列表文件地址
# n_anchors 是anchors的数量
# loss_convergence是允许的loss的最小变化值
# grid_size * grid_size 是栅格数量
# iterations_num是最大迭代次数
# plus = 1时启用k means ++ 初始化centroids
def compute_centroids(label_path,n_anchors,loss_convergence,grid_size,iterations_num,plus): boxes = []
label_files = []
f = open(label_path)
for line in f:
label_path = line.rstrip().replace('images', 'labels')
label_path = label_path.replace('JPEGImages', 'labels')
label_path = label_path.replace('.jpg', '.txt')
label_path = label_path.replace('.JPEG', '.txt')
label_files.append(label_path)
f.close() for label_file in label_files:
f = open(label_file)
for line in f:
temp = line.strip().split(" ")
if len(temp) > 1:
boxes.append(Box(0,0, float(temp[3]), float(temp[4])))
print(temp[3],temp[4])
if float(temp[3])<0:
print(label_file)
print('done')
if plus:
centroids = init_centroids(boxes, n_anchors)
else:
centroid_indices = np.random.choice(len(boxes), n_anchors)
centroids = []
for centroid_index in centroid_indices:
centroids.append(boxes[centroid_index]) # iterate k-means
centroids, groups, old_loss = do_kmeans(n_anchors, boxes, centroids)
iterations = 1
while (True):
centroids, groups, loss = do_kmeans(n_anchors, boxes, centroids)
iterations = iterations + 1
print("loss = %f" % loss)
if abs(old_loss - loss) < loss_convergence or iterations > iterations_num:
break
old_loss = loss for centroid in centroids:
print(centroid.w * grid_size, centroid.h * grid_size) # print result
for centroid in centroids:
#print("k-means result:\t ", centroid.w * grid_size, ",", centroid.h * grid_size)
#str('%.03f'%maxVal) print("k-means result:\t ", str('%.06f'%(centroid.w * grid_size)), ",", str('%.06f'%(centroid.h * grid_size))) label_path = "xx/train_person/data/trainImagePath.txt"
n_anchors = 5
loss_convergence = 1e-3
grid_size = 13
iterations_num = 10000000
plus = 0
compute_centroids(label_path,n_anchors,loss_convergence,grid_size,iterations_num,plus)

5,训练和测试

直接命令行了。训练的时候内存够的话,再开一个命令行窗口做测试,可以一边训练,一边偶尔看看预测效果。

6,在opencv中调用

C++版本

 #include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp> #include <fstream>
#include <iostream>
#include <algorithm>
#include <cstdlib> using namespace std;
using namespace cv;
using namespace cv::dnn; //char* jpgpath = "";
//char* cfgpath = "";
//char* weightspath = "";
//char* namespath = "";
//Mat boxes = yoloMultiPredict(jpgpath, cfgpath, weightspath, namespath);
//cout << "boxes:\n" << boxes << endl; // class prob xmin xmax ymin ymax
Mat yoloMultiPredict(char* jpgpath, char* cfgpath, char* weightspath, char* namespath)
{
Mat boxes = Mat::zeros(, , CV_16UC1);
Mat frame = imread(jpgpath); dnn::Net net = readNetFromDarknet(cfgpath, weightspath);
if (net.empty())
{
printf("Could not load net...\n");
} // 得到类别名称
if ()
{
ifstream classNamesFile(namespath);
vector<string> classNamesVec; if (classNamesFile.is_open())
{
string className = "";
while (std::getline(classNamesFile, className))
classNamesVec.push_back(className);
}
for (int i = ; i < classNamesVec.size(); i++)
cout << i << "\t" << classNamesVec[i] << endl;
cout << endl;
} Mat inputBlob = blobFromImage(frame, / .F, Size(, ), Scalar(), true, false);
net.setInput(inputBlob, "data"); // 检测
Mat detectionMat = net.forward("detection_out");
//cout << "forward" << endl; // 输出结果
for (int i = ; i < detectionMat.rows; i++)
{
const int probability_index = ;
const int probability_size = detectionMat.cols - probability_index;
float *prob_array_ptr = &detectionMat.at<float>(i, probability_index);
size_t objectClass = max_element(prob_array_ptr, prob_array_ptr + probability_size) - prob_array_ptr; float confidence = detectionMat.at<float>(i, (int)objectClass + probability_index); if (confidence > 0.24)
{
float x = detectionMat.at<float>(i, );
float y = detectionMat.at<float>(i, );
float width = detectionMat.at<float>(i, );
float height = detectionMat.at<float>(i, ); int xmin = static_cast<int>((x - width / ) * frame.cols);
int xmax = static_cast<int>((x + width / ) * frame.cols); int ymin = static_cast<int>((y - height / ) * frame.rows);
int ymax = static_cast<int>((y + height / ) * frame.rows); // clip
if (xmin<)
xmin = ;
if (xmax > frame.cols)
xmax = frame.cols - ;
if (ymin<)
ymin = ;
if (ymax > frame.rows)
ymax = frame.rows - ; //rectangle(frame, cvPoint(xmin, ymin), cvPoint(xmax, ymax), Scalar(0, 0, 255), 4, 1, 0);
//cout << "x y w h\t" << x << "\t" << y << "\t" << width << "\t" << height << endl; // class prob xmin xmax ymin ymax
Mat L = (Mat_<short>(, ) << (short)objectClass, (short)(confidence * ), xmin, xmax, ymin, ymax);
//cout << "L:" << L << endl;
boxes.push_back(L);
}
} return boxes; }

2018年4月18日以后的opencv已经可以导入yolo v3的训练文件了。

直接用命令行做的预测结果和opencv中导入配置文件的预测结果不一样,有的时候相差还很大,不堪重用啊。

还是要在什么框架下训练就在什么框架下用,不然数值稳定性不能保证。。。

python版本

opencv的samples/dnn下的例子改造的。

 import cv2
import numpy as np
import os cwd = os.path.split(os.path.realpath(__file__))[0] def darknetPredict(jpgpathOrMat, cfgpath, wtspath):
net = cv2.dnn.readNetFromDarknet(cfgpath, wtspath) confThreshold = 0.24
nmsThreshold = 0.4 def getOutputsNames(net):
layersNames = net.getLayerNames()
return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()] if(isinstance(jpgpathOrMat, str)):
frame = cv2.imread(jpgpathOrMat)
if(isinstance(jpgpathOrMat, np.ndarray)):
frame = jpgpathOrMat frameHeight = frame.shape[0]
frameWidth = frame.shape[1] # Create a 4D blob from a frame.
inpW = 416
inpH = 416
blob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inpW, inpH), (0, 0, 0), swapRB=True, crop=False) net.setInput(blob) # Run a model outs = net.forward(getOutputsNames(net))
#
classIds = []
confidences = []
boxes = []
for out in outs:
# print('out:', out)
for detection in out:
scores = detection[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > confThreshold:
center_x = int(detection[0] * frameWidth)
center_y = int(detection[1] * frameHeight)
width = int(detection[2] * frameWidth)
height = int(detection[3] * frameHeight)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
classIds.append(classId)
confidences.append(float(confidence))
boxes.append([left, top, width, height]) rst_boxes = []
indices = cv2.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
for i in indices:
i = i[0]
box = boxes[i]
# left top w h
# 0 1 2 3
# xmin ymin w h left = box[0]
top = box[1]
width = box[2]
height = box[3]
# print("confidences:", confidences[i]) xmin, ymin, xmax, ymax = [left, top, left+width, top+height] xmin = np.clip(xmin, 0, frameWidth-1 )
xmax = np.clip(xmax, 0, frameWidth-1 )
ymin = np.clip(ymin, 0, frameHeight-1)
ymax = np.clip(ymax, 0, frameHeight-1) line = [classIds[i], confidences[i], xmin, ymin, xmax, ymax]
# classid prob xmin ymin xmax ymax
# 0 1 2 3 4 5
rst_boxes.append(line) rst_boxes = np.asarray(rst_boxes)
return rst_boxes

差不多就是这样了。