k-means需要有数据,中心点个数是需要人为指定的,位置可以随机初始化,但是还需要度量到聚类中心的距离。这里怎么度量这个距离是很关键的。
距离度量如果使用标准的欧氏距离,大盒子会比小盒子产生更多的错误。例因此这里使用其他的距离度量公式。聚类的目的是anchor boxes和临近的ground truth有更大的IOU值,这和anchor box的尺寸没有直接关系。自定义的距离度量公式:
到聚类中心的距离越小越好,但IOU值是越大越好,所以使用 1 – IOU,这样就保证距离越小,IOU值越大。
代码实现主要是AlexeyAB/darknet中scripts/gen_anchors.py,这里根据yolov2,yolov3的版本不同进行部分修改。yolov2的配置文件yolov2.cfg需要的anchors是相对特征图的,值很小基本都小于13;yolov3的配置文件yolov3.cfg需要的3个anchors是相对于原图来说的,相对都比较大。还有输入图片的大小(32的倍数)对于输出也是有影响的。
例:
yolov2.cfg中[region] anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
yolov3.cfg中[region] anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
from os import listdir from os.path import isfile, join import argparse #import cv2 import numpy as np import sys import os import shutil import random import math def IOU(x,centroids): ''' :param x: 某一个ground truth的w,h :param centroids: anchor的w,h的集合[(w,h),(),...],共k个 :return: 单个ground truth box与所有k个anchor box的IoU值集合 ''' IoUs = [] w, h = x # ground truth的w,h for centroid in centroids: c_w,c_h = centroid #anchor的w,h if c_w>=w and c_h>=h: #anchor包围ground truth iou = w*h/(c_w*c_h) elif c_w>=w and c_h<=h: #anchor宽矮 iou = w*c_h/(w*h + (c_w-w)*c_h) elif c_w<=w and c_h>=h: #anchor瘦长 iou = c_w*h/(w*h + c_w*(c_h-h)) else: #ground truth包围anchor means both w,h are bigger than c_w and c_h respectively iou = (c_w*c_h)/(w*h) IoUs.append(iou) # will become (k,) shape return np.array(IoUs) def avg_IOU(X,centroids): ''' :param X: ground truth的w,h的集合[(w,h),(),...] :param centroids: anchor的w,h的集合[(w,h),(),...],共k个 ''' n,d = X.shape sum = 0. for i in range(X.shape[0]): sum+= max(IOU(X[i],centroids)) #返回一个ground truth与所有anchor的IoU中的最大值 return sum/n #对所有ground truth求平均 def write_anchors_to_file(centroids,X,anchor_file,input_shape,yolo_version): ''' :param centroids: anchor的w,h的集合[(w,h),(),...],共k个 :param X: ground truth的w,h的集合[(w,h),(),...] :param anchor_file: anchor和平均IoU的输出路径 ''' f = open(anchor_file,'w') anchors = centroids.copy() print(anchors.shape) if yolo_version=='yolov2': for i in range(anchors.shape[0]): #yolo中对图片的缩放倍数为32倍,所以这里除以32, # 如果网络架构有改变,根据实际的缩放倍数来 #求出anchor相对于缩放32倍以后的特征图的实际大小(yolov2) anchors[i][0]*=input_shape/32. anchors[i][1]*=input_shape/32. elif yolo_version=='yolov3': for i in range(anchors.shape[0]): #求出yolov3相对于原图的实际大小 anchors[i][0]*=input_shape anchors[i][1]*=input_shape else: print("the yolo version is not right!") exit(-1) widths = anchors[:,0] sorted_indices = np.argsort(widths) print('Anchors = ', anchors[sorted_indices]) for i in sorted_indices[:-1]: f.write('%0.2f,%0.2f, '%(anchors[i,0],anchors[i,1])) #there should not be comma after last anchor, that's why f.write('%0.2f,%0.2f\n'%(anchors[sorted_indices[-1:],0],anchors[sorted_indices[-1:],1])) f.write('%f\n'%(avg_IOU(X,centroids))) print() def kmeans(X,centroids,eps,anchor_file,input_shape,yolo_version): N = X.shape[0] #ground truth的个数 iterations = 0 print("centroids.shape",centroids) k,dim = centroids.shape #anchor的个数k以及w,h两维,dim默认等于2 prev_assignments = np.ones(N)*(-1) #对每个ground truth分配初始标签 iter = 0 old_D = np.zeros((N,k)) #初始化每个ground truth对每个anchor的IoU while True: D = [] iter+=1 for i in range(N): d = 1 - IOU(X[i],centroids) D.append(d) D = np.array(D) # D.shape = (N,k) 得到每个ground truth对每个anchor的IoU print("iter {}: dists = {}".format(iter,np.sum(np.abs(old_D-D)))) #计算每次迭代和前一次IoU的变化值 #assign samples to centroids assignments = np.argmin(D,axis=1) #将每个ground truth分配给距离d最小的anchor序号 if (assignments == prev_assignments).all() : #如果前一次分配的结果和这次的结果相同,就输出anchor以及平均IoU print("Centroids = ",centroids) write_anchors_to_file(centroids,X,anchor_file,input_shape,yolo_version) return #calculate new centroids centroid_sums=np.zeros((k,dim),np.float) #初始化以便对每个簇的w,h求和 for i in range(N): centroid_sums[assignments[i]]+=X[i] #将每个簇中的ground truth的w和h分别累加 for j in range(k): #对簇中的w,h求平均 centroids[j] = centroid_sums[j]/(np.sum(assignments==j)+1) prev_assignments = assignments.copy() old_D = D.copy() def main(argv): parser = argparse.ArgumentParser() parser.add_argument('-filelist', default = r'E:\BaiduNetdiskDownload\darknetHG8245\scripts\train.txt', help='path to filelist\n' ) parser.add_argument('-output_dir', default = r'E:\BaiduNetdiskDownload\darknetHG8245', type = str, help='Output anchor directory\n' ) parser.add_argument('-num_clusters', default = 0, type = int, help='number of clusters\n' ) ''' 需要注意的是yolov2输出的值比较小是相对特征图来说的, yolov3输出值较大是相对原图来说的, 所以yolov2和yolov3的输出是有区别的 ''' parser.add_argument('-yolo_version', default='yolov2', type=str, help='yolov2 or yolov3\n') parser.add_argument('-yolo_input_shape', default=416, type=int, help='input images shape,multiples of 32. etc. 416*416\n') args = parser.parse_args() if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) f = open(args.filelist) lines = [line.rstrip('\n') for line in f.readlines()] annotation_dims = [] for line in lines: line = line.replace('JPEGImages','labels') line = line.replace('.jpg','.txt') line = line.replace('.png','.txt') print(line) f2 = open(line) for line in f2.readlines(): line = line.rstrip('\n') w,h = line.split(' ')[3:] #print(w,h) annotation_dims.append((float(w),float(h))) annotation_dims = np.array(annotation_dims) #保存所有ground truth框的(w,h) eps = 0.005 if args.num_clusters == 0: for num_clusters in range(1,11): #we make 1 through 10 clusters anchor_file = join( args.output_dir,'anchors%d.txt'%(num_clusters)) indices = [ random.randrange(annotation_dims.shape[0]) for i in range(num_clusters)] centroids = annotation_dims[indices] kmeans(annotation_dims,centroids,eps,anchor_file,args.yolo_input_shape,args.yolo_version) print('centroids.shape', centroids.shape) else: anchor_file = join( args.output_dir,'anchors%d.txt'%(args.num_clusters)) indices = [ random.randrange(annotation_dims.shape[0]) for i in range(args.num_clusters)] centroids = annotation_dims[indices] kmeans(annotation_dims,centroids,eps,anchor_file,args.yolo_input_shape,args.yolo_version) print('centroids.shape', centroids.shape) if __name__=="__main__": main(sys.argv) |
这是其中的yolov3的结果