天天看點

Faster R-CNN tf版 _region_proposal函數

文章目錄

    • Faster R-CNN tf代碼解析
    • 訓練階段
      • _region_proposal函數

Faster R-CNN tf代碼解析

訓練階段

_region_proposal函數

_region_proposal流程圖如下:

各個變量的含義及次元如下表所示:

Name Info Dim
_num_anchors 多尺度多ratio anchors的數目 9
rpn RPN特征層 n*w*h*cfg.RPN_CHANNELS
rpn_cls_score archors分類特征 n*h*w*(9*2)
rpn_cls_score_reshape rpn_cls_score形變 n*(h*9)*w*2
rpn_cls_prob_reshape archors的類别機率 n*(h*9)*w*2
rpn_cls_pred archors的類别 (n*h*9*w)*1
rpn_cls_prob archors的類别機率 n*h*w*(9*2)
rpn_bbox_pred archors的回歸位置偏移 n*h*w*(9*4)
rois 候選ROI區域(類别+坐标) n*256*5
roi_scores 初選(post_nms_topN)rois在rpn_cls_score中對應值 n*post_nms_topN*1
def _region_proposal(self, net_conv, is_training, initializer): 
    rpn = slim.conv2d(net_conv, cfg.RPN_CHANNELS, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") 
    self._act_summaries.append(rpn)
    
    # anchor classification
    rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score')    
    rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape') 
    rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape")  
    rpn_cls_pred = tf.argmax(tf.reshape(rpn_cls_score_reshape, [-1, 2]), axis=1, name="rpn_cls_pred")     
    rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob")  
    
    ## bounding box regression
    rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred')
    
    if is_training:
        # output:post_nms_topN=2000個archors的位置(包括全0的batch_inds)及為1的機率
        rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        
        # rpn_labels:特征圖中每個位置對應的是正樣本、負樣本還是不關注
        rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
        # Try to have a deterministic order for the computing graph, for reproducibility
        with tf.control_dependencies([rpn_labels]):  
        #通過post_nms_topN個archors的位置及為1(正樣本)的機率得到256個rois(第一列的全0更新為每個archors對應的類别)及對應資訊
            rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")  
    else:
        if cfg.TEST.MODE == 'nms':
            # 每個位置的9個archors的類别機率和每個位置的9個archors的回歸位置偏移得到post_nms_topN=300個archors的位置(包括全0的batch_inds)及為1的機率
            rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        elif cfg.TEST.MODE == 'top':
            rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        else:
            raise NotImplementedError

    self._predictions["rpn_cls_score"] = rpn_cls_score
    self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
    self._predictions["rpn_cls_prob"] = rpn_cls_prob
    self._predictions["rpn_cls_pred"] = rpn_cls_pred
    self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
    self._predictions["rois"] = rois

    return rois  # 傳回256個archors的類别(第一維,訓練時為每個archors的類别,測試時全0)及位置(後四維)

def _reshape_layer(self, bottom, num_dim, name):
    input_shape = tf.shape(bottom)
    with tf.variable_scope(name) as scope:
        # NHWC(TF資料格式)變成NCHW(caffe格式)
        to_caffe = tf.transpose(bottom, [0, 3, 1, 2])  
        # 1*(num_dim*9)*h*w==>1*num_dim*(9*h)*w
        reshaped = tf.reshape(to_caffe, tf.concat(axis=0, values=[[1, num_dim, -1], [input_shape[2]]])) 
        # NCHW(caffe格式)變成NHWC(TF資料格式) 1*num_dim*(9*h)*w --> 1*(9*h)*w*num_dim
        to_tf = tf.transpose(reshaped, [0, 2, 3, 1])
        return to_tf


def _softmax_layer(self, bottom, name):
    if name.startswith('rpn_cls_prob_reshape'):  
        # bottom:1*(h*9)*w*2
        input_shape = tf.shape(bottom)
        # 隻保留最後一維,用于計算softmax的機率,其他的全合并:1*(h*9)*w*2==>(1*h*9*w)*2
        bottom_reshaped = tf.reshape(bottom, [-1, input_shape[-1]])  
        # 得到所有特征的機率
        reshaped_score = tf.nn.softmax(bottom_reshaped, name=name)  
        # 恢複到bottom的shape,(1*h*9*w)*2==>1*(h*9)*w*2
        return tf.reshape(reshaped_score, input_shape)   
    return tf.nn.softmax(bottom, name=name)
           

參考網址:

https://www.cnblogs.com/darkknightzh/p/10043864.html

論文:https://arxiv.org/abs/1506.01497

tf的第三方faster rcnn:https://github.com/endernewton/tf-faster-rcnn

IOU:https://www.cnblogs.com/darkknightzh/p/9043395.html

繼續閱讀