代码源自Github:https://github.com/AITTSMD/MTCNN-Tensorflow
该阶段代码取自 Git主 代码目录下 MTCNN-Tensorflow-master / prepare_data / gen_12net_data.py
#coding:utf-8
import os #满足不同系统路径适应要求
import cv2
import numpy as np
import numpy.random as npr #简化矩阵模块中随机函数表示为npr
from prepare_data.utils import IoU #从...中加载IOU模块
##文件读取和存储路径
anno_file = "wider_face_train.txt"
im_dir = ".../DATA/WIDER_train/images"
pos_save_dir = ".../DATA/12/positive" #生成的**正样本**存放路径
part_save_dir = ".../DATA/12/part" #生成的**无关样本**存放路径
neg_save_dir = '.../DATA/12/negative' #生成的**负样本**存放路径
save_dir = ".../DATA/12"
##如果文件路径不存在,则创建该路径
if not os.path.exists(save_dir):
os.mkdir(save_dir)
if not os.path.exists(pos_save_dir):
os.mkdir(pos_save_dir)
if not os.path.exists(part_save_dir):
os.mkdir(part_save_dir)
if not os.path.exists(neg_save_dir):
os.mkdir(neg_save_dir)
##对应的样本的文档建立,open(,)以“写入”的方式打开.text,join(,)将路径结合在一起
f1 = open(os.path.join(save_dir, 'pos_12.txt'), 'w')
f2 = open(os.path.join(save_dir, 'neg_12.txt'), 'w')
f3 = open(os.path.join(save_dir, 'part_12.txt'), 'w')
with open(anno_file, 'r') as f:
annotations = f.readlines() #按行读取并存入列表annotations里面
num = len(annotations) #里面的每一个元素对应着一张照片的人脸数据,所以这个列表的大小就是数据集的照片数量
print("%d pics in total" % num) #照片数量为num,并将其打印出来
p_idx = 0 # positive
n_idx = 0 # negative
d_idx = 0 # don't care
idx = 0
box_idx = 0
for annotation in annotations: #for循环来读取数据
annotation = annotation.strip().split(' ') #涉及strip().split(' ') 函数,意思为去掉annotations列表里每一行的首、尾处空格、换行字符,并以空格为界限拆分成一个个的字符!注意Python中的字代表着一个字符串!!!
im_path = annotation[0] #image_path,序列号为0的元素(即第一个元素)代表路径
#print(im_path)
#boxed change to float type
bbox = list(map(float, annotation[1:])) #从序列号为1的元素,即第二个元素开始到结束,每四个元素代表着一个人脸框;map(,)函数会根据提供的函数对指定序列做映射,然后转为float型列表
#gt
boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4) #将人脸框的坐标进行reshape,变成n行4列的array(阵列),reshape(-1,x)中的-1代表不知道几行,用-1来表示
#load image
img = cv2.imread(os.path.join(im_dir, im_path + '.jpg')) #路径拼接,然后读取图片
idx += 1
#if idx % 100 == 0: ##英文翻译:如果读取的数量idx对100取余==0,那么打印idx
#print(idx, "images done")
height, width, channel = img.shape #读取图片的宽、高、通道数并记录下来,.shape()函数显示的是阵列的形状,比如几行几列几层,对应着图片的参数就是高度、宽度、通道数
neg_num = 0 #将负样本50个循环计数器数量初始化为0
#1---->50
# keep crop random parts, until have 50 negative examples ##英文翻译:持续裁剪,直到产生50个负样本;也就意味着从每个图片那里得到50个负样本
# get 50 negative sample from every image
while neg_num < 50: #当负样本数量<50时,执行当前循环
#neg_num's size [40,min(width, height) / 2],min_size:40 #
# size is a random number between 12 and min(width,height)
size = npr.randint(12, min(width, height) / 2) #size是一个随机数,.randint(a,b)用于生成一个指定范围内的整数。其中参数a是下限,参数b是上限,生成的随机数满足: a <= n <= b
#top_left coordinate ##英文翻译:左上角坐标
nx = npr.randint(0, width - size) #左上方的x坐标是一个随机数
ny = npr.randint(0, height - size) #左上方的y坐标是一个随机数
#random crop ##随机剪裁
crop_box = np.array([nx, ny, nx + size, ny + size]) #随机裁剪的样本
#calculate iou
Iou = IoU(crop_box, boxes) #引入Iou()函数,含有两个参数,随机裁剪的样本crop_box和实际的人脸框boxes,计算出Iou()值
#crop a part from inital image
cropped_im = img[ny : ny + size, nx : nx + size, :] #将这个部分样本从原始图片上裁剪下来
#resize the cropped image to size 12*12
resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)
#将裁剪下来的部分resize为12*12,插值为双线性插值(默认)
if np.max(Iou) < 0.3: #当Iou的值小于0.3的时候为负样本
# Iou with all gts must below 0.3
save_file = os.path.join(neg_save_dir, "%s.jpg"%n_idx)
f2.write(".../DATA/12/negative/%s.jpg"%n_idx + ' 0\n') #样本的路径保存下来
cv2.imwrite(save_file, resized_im) #把负样本图片保存下来
n_idx += 1
neg_num += 1
#for every bounding boxes
for box in boxes: #历遍所有边界框
# box (x_left, y_top, x_right, y_bottom)
x1, y1, x2, y2 = box
#gt's width
w = x2 - x1 + 1
#gt's height
h = y2 - y1 + 1 #求样本的宽和高
# ignore small faces and those faces has left-top corner out of the image ##英文翻译:忽略(滤除)太小的人脸和那些左上角溢出图片的人脸框,防止那些太小的人脸边界框不够准确
# in case the ground truth boxes of small faces are not accurate
if max(w, h) < 20 or x1 < 0 or y1 < 0:
continue
# crop another 5 images near the bounding box if IoU less than 0.5, save as negative samples
#下面仍然是返回另外5个负样本,但是这5个返回的样本一定是和真实的人脸框有一定的交集,即(0<IoU<0.3),上面返回的50个负样本是不一定和真实人脸框有交集
for i in range(5):
#size of the image to be cropped
size = npr.randint(12, min(width, height) / 2)
# delta_x and delta_y are offsets of (x1, y1) # delta_x和delta_y是(x1, y1)的偏移量
# max can make sure if the delta is a negative number , x1+delta_x >0
# parameter high of randint make sure there will be intersection between bbox and cropped_box
delta_x = npr.randint(max(-size, -x1), w) #求(-size和-x1之间的最大值可以保证x1+delta_x一定大于等于0
delta_y = npr.randint(max(-size, -y1), h) #求(-size和-y1之间的最大值可以保证y1+delta_y一定大于等于0
##这个max函数放在这里觉得并不是必要的
nx1 = int(max(0, x1 + delta_x)) #得到x1的偏移坐标nx1
ny1 = int(max(0, y1 + delta_y)) #得到y1的偏移坐标ny1
# if the right bottom point is out of image then skip #如果裁剪图片的右下坐标超出了图片范围就跳过此次循环,进行下一次截图(注意:这里的width是指的原始图片的宽度,而不是真实人脸框的宽度)
if nx1 + size > width or ny1 + size > height:
continue
crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size]) #获取裁剪后的矩形框(可以确定矩形框位置)
Iou = IoU(crop_box, boxes) #计算IoU(crop_box, boxes)值
cropped_im = img[ny1: ny1 + size, nx1: nx1 + size, :] #裁剪下图片的位置坐标
#rexize cropped image to be 12 * 12
resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) #将裁剪下的图片resize到12*12
#接下来将符合条件的样本框保存,每个图片都生成了55个负样本
if np.max(Iou) < 0.3:
# Iou with all gts must below 0.3
save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
f2.write(".../DATA/12/negative/%s.jpg" % n_idx + ' 0\n')
cv2.imwrite(save_file, resized_im)
n_idx += 1
#接下来,生成正样本和无关样本
for i in range(20):
# pos and part face size [minsize*0.8,maxsize*1.25]
size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) #设置正样本和无关样本的size
# delta here is the offset of box center #这里的delta指的是边框中心的偏移量
if w<5:
print (w)
continue
#print (box)
#x1和y1的偏移量
delta_x = npr.randint(-w * 0.2, w * 0.2)
delta_y = npr.randint(-h * 0.2, h * 0.2)
#show this way: nx1 = max(x1+w/2-size/2+delta_x)
#show this way: ny1 = max(y1+h/2-size/2+delta_y)
# x1+ w/2 is the central point, then add offset , then deduct size/2(人脸框中点坐标是x1+ w/2)
# deduct size/2 to make sure that the right bottom corner will be out of
#nx1是人脸框的中点的x坐标加减0.2倍宽度再减去一半的size、和0之间的最大值
#ny1是人脸框的中点的y坐标加减0.2倍高度再减去一半的size、和0之间的最大值
nx1 = int(max(x1 + w / 2 + delta_x - size / 2, 0))
ny1 = int(max(y1 + h / 2 + delta_y - size / 2, 0))
#获得右下角nx2和ny2的坐标
nx2 = nx1 + size
ny2 = ny1 + size
#去掉溢出图片的坐标点
if nx2 > width or ny2 > height:
continue
crop_box = np.array([nx1, ny1, nx2, ny2])
#yu gt de offset
#接下来这是一个bounding box regression即边框回归
offset_x1 = (x1 - nx1) / float(size)
offset_y1 = (y1 - ny1) / float(size)
offset_x2 = (x2 - nx2) / float(size)
offset_y2 = (y2 - ny2) / float(size)
#crop img
cropped_im = img[ny1 : ny2, nx1 : nx2, :]
#resize img
resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)
box_ = box.reshape(1, -1) #把box边框reshape成一行未知列的数组
iou = IoU(crop_box, box_) #计算IoU(crop_box, box_)值
if iou >= 0.65: #保存正样本
save_file = os.path.join(pos_save_dir, "%s.jpg"%p_idx)
f1.write(".../DATA/12/positive/%s.jpg"%p_idx + ' 1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2))
cv2.imwrite(save_file, resized_im)
p_idx += 1
elif iou >= 0.4: #保存无关样本
save_file = os.path.join(part_save_dir, "%s.jpg"%d_idx)
f3.write(".../DATA/12/part/%s.jpg"%d_idx + ' -1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2))
cv2.imwrite(save_file, resized_im)
d_idx += 1
box_idx += 1
#输出图片处理进度信息
if idx % 100 == 0:
print("%s images done, pos: %s part: %s neg: %s" % (idx, p_idx, d_idx, n_idx))
f1.close()
f2.close()
f3.close()