faster rcnn源碼解讀（五）之layer（網絡裡的input-data）

轉載自：faster rcnn源碼解讀（五）之layer（網絡裡的input-data） - 野孩子的專欄 - 部落格頻道 - CSDN.NET

http://blog.csdn.net/u010668907/article/details/51945844

faster rcnn用python版本的https://github.com/rbgirshick/py-faster-rcnn

layer源碼位址：https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/roi_data_layer/layer.py

源碼：

[python] view plain copy print ?

faster rcnn源碼解讀（五）之layer（網絡裡的input-data）

# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
"""The data layer used during training to train a Fast R-CNN network.
RoIDataLayer implements a Caffe Python layer.
"""
import caffe
from fast_rcnn.config import cfg
from roi_data_layer.minibatch import get_minibatch
import numpy as np
import yaml
from multiprocessing import Process, Queue
class RoIDataLayer(caffe.Layer):
"""Fast R-CNN data layer used for training."""
def _shuffle_roidb_inds(self):
"""Randomly permute the training roidb."""
if cfg.TRAIN.ASPECT_GROUPING:
widths = np.array([r['width'] for r in self._roidb])
heights = np.array([r['height'] for r in self._roidb])
horz = (widths >= heights)
vert = np.logical_not(horz)
horz_inds = np.where(horz)[0]
vert_inds = np.where(vert)[0]
inds = np.hstack((
np.random.permutation(horz_inds),
np.random.permutation(vert_inds)))
inds = np.reshape(inds, (-1, 2))
row_perm = np.random.permutation(np.arange(inds.shape[0]))
inds = np.reshape(inds[row_perm, :], (-1,))
self._perm = inds#把roidb的索引打亂，造成的shuffle，打亂的索引存儲的地方
else:
self._perm = np.random.permutation(np.arange(len(self._roidb)))
self._cur = 0
def _get_next_minibatch_inds(self):
"""Return the roidb indices for the next minibatch."""
if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
self._shuffle_roidb_inds()
db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
self._cur += cfg.TRAIN.IMS_PER_BATCH#相當于一個指向_perm的指針，每次取走圖檔後，他會跟着變化#cfg.TRAIN.IMS_PER_BATCH：（猜測，每次取圖檔的數量）
return db_inds#本次取得圖檔的索引
def _get_next_minibatch(self):#取得本次圖檔的索引，即db_inds
"""Return the blobs to be used for the next minibatch.
If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a
separate process and made available through self._blob_queue.
"""
if cfg.TRAIN.USE_PREFETCH:
return self._blob_queue.get()
else:
db_inds = self._get_next_minibatch_inds()
minibatch_db = [self._roidb[i] for i in db_inds]#本次的roidb
return get_minibatch(minibatch_db, self._num_classes)
def set_roidb(self, roidb):
"""Set the roidb to be used by this layer during training."""
self._roidb = roidb
self._shuffle_roidb_inds()
if cfg.TRAIN.USE_PREFETCH:
self._blob_queue = Queue(10)
self._prefetch_process = BlobFetcher(self._blob_queue,
self._roidb,
self._num_classes)
self._prefetch_process.start()
# Terminate the child process when the parent exists
def cleanup():
print 'Terminating BlobFetcher'
self._prefetch_process.terminate()
self._prefetch_process.join()
import atexit
atexit.register(cleanup)
def setup(self, bottom, top):
"""Setup the RoIDataLayer."""
# parse the layer parameter string, which must be valid YAML
layer_params = yaml.load(self.param_str_)
self._num_classes = layer_params['num_classes']#網絡裡的類别數值21
self._name_to_top_map = {}#{'gt_boxes': 2, 'data': 0, 'im_info': 1}字典的value值是top的對應索引
# data blob: holds a batch of N images, each with 3 channels
idx = 0
top[idx].reshape(cfg.TRAIN.IMS_PER_BATCH, 3,
max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE)
self._name_to_top_map['data'] = idx
idx += 1
if cfg.TRAIN.HAS_RPN:
top[idx].reshape(1, 3)
self._name_to_top_map['im_info'] = idx
idx += 1
top[idx].reshape(1, 4)
self._name_to_top_map['gt_boxes'] = idx
idx += 1
else: # not using RPN
# rois blob: holds R regions of interest, each is a 5-tuple
# (n, x1, y1, x2, y2) specifying an image batch index n and a
# rectangle (x1, y1, x2, y2)
top[idx].reshape(1, 5)
self._name_to_top_map['rois'] = idx
idx += 1
# labels blob: R categorical labels in [0, ..., K] for K foreground
# classes plus background
top[idx].reshape(1)
self._name_to_top_map['labels'] = idx
idx += 1
if cfg.TRAIN.BBOX_REG:
# bbox_targets blob: R bounding-box regression targets with 4
# targets per class
top[idx].reshape(1, self._num_classes * 4)
self._name_to_top_map['bbox_targets'] = idx
idx += 1
# bbox_inside_weights blob: At most 4 targets per roi are active;
# thisbinary vector sepcifies the subset of active targets
top[idx].reshape(1, self._num_classes * 4)
self._name_to_top_map['bbox_inside_weights'] = idx
idx += 1
top[idx].reshape(1, self._num_classes * 4)
self._name_to_top_map['bbox_outside_weights'] = idx
idx += 1
print 'RoiDataLayer: name_to_top:', self._name_to_top_map
assert len(top) == len(self._name_to_top_map)
def forward(self, bottom, top):
"""Get blobs and copy them into this layer's top blob vector."""
blobs = self._get_next_minibatch()
for blob_name, blob in blobs.iteritems():
top_ind = self._name_to_top_map[blob_name]
# Reshape net's input blobs
top[top_ind].reshape(*(blob.shape))
# Copy data into net's input blobs
top[top_ind].data[...] = blob.astype(np.float32, copy=False)
def backward(self, top, propagate_down, bottom):
"""This layer does not propagate gradients."""
pass
def reshape(self, bottom, top):
"""Reshaping happens during the call to forward."""
pass
class BlobFetcher(Process):
"""Experimental class for prefetching blobs in a separate process."""
def __init__(self, queue, roidb, num_classes):
super(BlobFetcher, self).__init__()
self._queue = queue
self._roidb = roidb
self._num_classes = num_classes
self._perm = None
self._cur = 0
self._shuffle_roidb_inds()
# fix the random seed for reproducibility
np.random.seed(cfg.RNG_SEED)
def _shuffle_roidb_inds(self):
"""Randomly permute the training roidb."""
# TODO(rbg): remove duplicated code
self._perm = np.random.permutation(np.arange(len(self._roidb)))
self._cur = 0
def _get_next_minibatch_inds(self):
"""Return the roidb indices for the next minibatch."""
# TODO(rbg): remove duplicated code
if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
self._shuffle_roidb_inds()
db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
self._cur += cfg.TRAIN.IMS_PER_BATCH
return db_inds
def run(self):
print 'BlobFetcher started'
while True:
db_inds = self._get_next_minibatch_inds()
minibatch_db = [self._roidb[i] for i in db_inds]
blobs = get_minibatch(minibatch_db, self._num_classes)
self._queue.put(blobs)

下面的roidb都隻是一次batch的

3.1 setup在caffe.SGDSolver時調用；setup的top（list猜測是c++的vector）的每個項是caffe._caffe.Blob

（猜測，輸出的Top shape就是上面的top,在setup中被shape；top[0],1 3 [600] 1000;top[1],1 3;top[2], 1 4)（疑問，在forward中blob的資料shape被重置，有時大小甚至會不定）

3.2 name_to_top: {'gt_boxes': 2, 'data': 0, 'im_info': 1}字典的value值是top的對應索引

3.3 solver.step(1)會調用layer的reshape、forward

3.4 self._perm：把roidb的索引打亂，造成圖檔的shuffle，打亂的索引存儲的地方

3.5 cfg.TRAIN.IMS_PER_BATCH：（猜測，每次取圖檔的數量）

3.6 self._cur：相當于一個指向_perm的指針，每次取走圖檔後，他會跟着變化

3.7 db_inds：本次取得圖檔的索引

3.8 def _get_next_minibatch_inds(self)：取得本次圖檔的索引，即db_inds

3.9 minibatch_db：本次的roidb

3.10 _num_classes：網絡裡的類别數值21

3.11 forward（）：得到blob并處理放進top

solver.step(1)-》reshape-》forward-》_get_next_minbatch-》_get_next_minbatch_inds-》(前面在layers裡,現在進入minibatch組建真正的blob)get_minibatch

faster rcnn源碼解讀（五）之layer（網絡裡的input-data）

繼續閱讀

FPN（feature pyramid networks）論文筆記

RCNN SPP_net

目标檢測基礎知識（1）--目标檢測任務描述目标檢測基礎知識（1）–目标檢測任務描述0.參考部落格1.目标檢測項目結構2.目标檢測算法分類3.目标檢測的任務4.目标定位的簡單實作思路5.分類與目标檢測的差別

目标檢測基礎知識（2）--R-CNN目标檢測基礎知識（2）–R-CNN0.參考部落格1.背景2.滑動視窗3.R-CNN模型4.候選區域選擇5.非極大值抑制（NMS）6.修正候選區域（邊框回歸）7.目标檢測評估名額8.R-CNN過程總結

faster rcnn源碼了解（二）之AnchorTargetLayer（網絡中的rpn_data）

faster-RCNN環境配置（Ｕbuntu14.04）Ubuntu14.04(64位)+Ｃuda7.5+Ｃudnn7.0+Caffe+Ｍatlab(Linux版)

【目标檢測】RCNN算法詳解思想候選區域生成特征提取類别判斷位置精修結果

Fast RCNN 訓練自己資料集 (2修改資料讀取接口)

py-faster-rcnn源碼解讀系列