天天看点

redis sentinel哨兵机制的源码分析

        sentinel是redis的高可用性解决方案:由一个或多个sentinel实例组成sentinel系统监视多个master以及master的slave,

并在被监视的master进入下线状态时,自动将下线master的某个slave升级为master,然后新的master代替下线的master处

理命令请求。

redis sentinel哨兵机制的源码分析

  一、下线状态监测

 (1)、主观下线监测

        在默认情况下,sentinel会以每秒一次的频率向所有与它创建了命令连接的实例(master、slave、sentinel)发送ping,

如果实例在down_after_period时间内,没有返回正确回复,标记为SRI_S_DOWN,反之,标记为正常状态。

       由定时任务serverCron定期监听所有连接实例

int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
    ……
    run_with_period(100) {
        //如果是哨兵服务器,触发sentinelTimer
        if (server.sentinel_mode) sentinelTimer();
    }
    ……
}
void sentinelTimer(void) {
    ……
    sentinelHandleDictOfRedisInstances(sentinel.masters);
    ……
}//检查所有连接实例
void sentinelHandleDictOfRedisInstances(dict *instances) {
    ……//instances为sentinelState->masters
    while((de = dictNext(di)) != NULL) {
        sentinelRedisInstance *ri = dictGetVal(de);
        sentinelHandleRedisInstance(ri);
        if (ri->flags & SRI_MASTER) {
            sentinelHandleDictOfRedisInstances(ri->slaves);
            sentinelHandleDictOfRedisInstances(ri->sentinels);
            ……
            }
        }
    }
    ……
}
void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {
    ……//发送ping给这个实例
    sentinelSendPeriodicCommands(ri);
    //检查是否主观下线
    sentinelCheckSubjectivelyDown(ri);
    ……
}//主观下线检查
void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
    mstime_t elapsed = 0;
    //收到pong后,act_ping_time会被置为0
    if (ri->link->act_ping_time)
        elapsed = mstime() - ri->link->act_ping_time;
    if (elapsed > ri->down_after_period ||
        (ri->flags & SRI_MASTER &&ri->role_reported == SRI_SLAVE &&
         mstime() - ri->role_reported_time >
          (ri->down_after_period+SENTINEL_INFO_PERIOD*2)))
    {//实例之前不是下线状态,更新为下线状态同时更新下线时间
        if ((ri->flags & SRI_S_DOWN) == 0) {
            ri->s_down_since_time = mstime();
            ri->flags |= SRI_S_DOWN;
        }
    } else {//收到pong,把下线状态删除
        if (ri->flags & SRI_S_DOWN) {
            ri->flags &= ~(SRI_S_DOWN|SRI_SCRIPT_KILL_SENT);
        }
    }
}
           

    (2)、客观下线监测

     当sentinel将master判断为主观下线之后,为了确认master是否真的下线,sentinel向监视master的sentinels询问

下线状态。在sentinel系统中有足够的的sentinel认为master下线了,sentinel将master标记为SRI_O_DOWN。(客观

下线监测只针对maste,不针对slave和sentinelr)

void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {
    if (ri->flags & SRI_MASTER) {
        sentinelCheckObjectivelyDown(ri);//客观下线检查
        ……//向其他sentinel询问master的下线状态
        sentinelAskMasterStateToOtherSentinels(ri,SENTINEL_NO_FLAGS);
    }
}//向其他sentinel询问master的下线状态
void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master, int flags) {
    ……
    di = dictGetIterator(master->sentinels);
    while((de = dictNext(di)) != NULL) {
        sentinelRedisInstance *ri = dictGetVal(de);
        ……//主观下线才询问其他sentinel
        if ((master->flags & SRI_S_DOWN) == 0) continue;
        …… /*failover_state为SENTINEL_FAILOVER_STATE_NONE,
        还未进入failover,询问master的下线状态*/
        retval = redisAsyncCommand(ri->link->cc,
          sentinelReceiveIsMasterDownReply, ri,
          "SENTINEL is-master-down-by-addr %s %s %llu %s",
           master->addr->ip, port,sentinel.current_epoch,
           (master->failover_state > SENTINEL_FAILOVER_STATE_NONE) ?sentinel.myid : "*");
        if (retval == C_OK) ri->link->pending_commands++;
    }
}//接收询问下线状态的回复
void sentinelReceiveIsMasterDownReply(redisAsyncContext *c, void *reply, void *privdata) {
    ……
    //reply回复三个参数down_state,leader_runid,leader_epoch。询问状态时,只需要down_state
    r = reply;//down_state==1表示sentinel认为master下线
    if (r->element[0]->integer == 1) {
        ri->flags |= SRI_MASTER_DOWN;
    } else {
        ri->flags &= ~SRI_MASTER_DOWN;
    }
    ……
}//客观下线检查
void sentinelCheckObjectivelyDown(sentinelRedisInstance *master) {
    unsigned int quorum = 0, odown = 0;
    if (master->flags & SRI_S_DOWN) {
        quorum = 1; 
        di = dictGetIterator(master->sentinels);
        //遍历监视master的sentinel的flag的下线标志
        while((de = dictNext(di)) != NULL) {
            sentinelRedisInstance *ri = dictGetVal(de);
            if (ri->flags & SRI_MASTER_DOWN) quorum++;
        }//当下线标志数量大于master->quorum,认定为客观下线
        if (quorum >= master->quorum) odown = 1;
    }
    if (odown) {//更新master的下线状态
        if ((master->flags & SRI_O_DOWN) == 0) {
            master->flags |= SRI_O_DOWN;
            master->o_down_since_time = mstime();
        }
    } else {
        if (master->flags & SRI_O_DOWN) {
            master->flags &= ~SRI_O_DOWN;
        }
    }
}
           

  二、故障转移

·  (1)、领头的选举

     每个sentine都会向其他监视master的sentinel请求,请求成为目标sentinel局部领头sentinel,目标sentinel则会

返回自己设置好的局部领头给源sentinel。sentinel设置局部领头sentinel的规则是先到先得:最先向目标sentinel发

送设置要的源sentinel将会成为目标sentinel的局部领头sentinel。得到半数以上sentinel支持的sentinel将成为领头,

如果没有超过半数,在下个配置纪元再进行领头选举。

     目标sentinel接收成为局部领头sentinel的请求,进行领头选举

char *sentinelVoteLeader(sentinelRedisInstance *master, uint64_t req_epoch, char *req_runid, 
				uint64_t *leader_epoch) {
    ……/*只有当请求纪元大于当前纪元,sentinel才能成为局部领头,并更新当其纪元
     让同一纪元的另外的领头请求请求失败*/
    if (master->leader_epoch < req_epoch && sentinel.current_epoch <= req_epoch)
    {
        sdsfree(master->leader);
        master->leader = sdsnew(req_runid);
        master->leader_epoch = sentinel.current_epoch;
    }
    ……
    *leader_epoch = master->leader_epoch;
    return master->leader ? sdsnew(master->leader) : NULL;
}
           

   (2)、故障转移

     领头选举完成后,领头sentinel对下线的master执行故障转移操作,该操作包含以下三个步骤:

     1)在下线master的从节点中选出一个slave,将其转换成master;

     2)让master的slave改为复制新master;

     3)将下线master设置为新master的从节点,当其重新上线时,成为新master的slave。

     其中新master的选择规则是领头sentinel先删除列表中处于非正常状态的节点根据slave的slave_priority,slave_repl_offset,

runid对列表中剩余的slaves进行排序,选择slave_priority最高;slave_priority相同选择slave_repl_offset最大;slave_repl_offset

相同时选择runid最小的。

sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master) {
    sentinelRedisInstance **instance =zmalloc(sizeof(instance[0])*dictSize(master->slaves));
    ……
    di = dictGetIterator(master->slaves);
    while((de = dictNext(di)) != NULL) {
        if (slave->flags & (SRI_S_DOWN|SRI_O_DOWN)) continue;//过滤下线的slave
        if (slave->link->disconnected) continue;//过滤断开连接的slave
        if (mstime() - slave->link->last_avail_time > SENTINEL_PING_PERIOD*5) continue;  
        //过滤掉SENTINEL_PING_PERIOD*5没有回复头ping命令的slave     
        if (slave->slave_priority == 0) continue;
        if (mstime() - slave->info_refresh > info_validity_time) continue;
        //过滤info_validity_time内*5没回复领头info命令的slave
        if (slave->master_link_down_time > max_master_down_time) continue;
        //过滤与已下线master断开连接超过max_master_down_time
        instance[instances++] = slave;
    }
    ……
    qsort(instance,instances,sizeof(sentinelRedisInstance*),compareSlavesForPromotion);
    ……
}//从服务器的排序规则
int compareSlavesForPromotion(const void *a, const void *b) {
    sentinelRedisInstance **sa = (sentinelRedisInstance **)a,
                          **sb = (sentinelRedisInstance **)b;
    //优先级比较
    if ((*sa)->slave_priority != (*sb)->slave_priority)
        return (*sa)->slave_priority - (*sb)->slave_priority;
    if ((*sa)->slave_repl_offset > (*sb)->slave_repl_offset) {
        return -1; //复制偏移量比较
    } else if ((*sa)->slave_repl_offset < (*sb)->slave_repl_offset) {
        return 1; /* b > a */
    }
    sa_runid = (*sa)->runid;
    sb_runid = (*sb)->runid;
    if (sa_runid == NULL && sb_runid == NULL) return 0;
    else if (sa_runid == NULL) return 1;  /* a > b */
    else if (sb_runid == NULL) return -1; /* a < b */
    return strcasecmp(sa_runid, sb_runid);//运行id比较
}
           

    故障转移的代码实现过程是一个状态机(sentinelFailoverStateMachine函数)如下图所示:

redis sentinel哨兵机制的源码分析

    故障转移状态

SENTINEL_FAILOVER_STATE_NONE 0  /*没有failover在进行*/
SENTINEL_FAILOVER_STATE_WAIT_START 1  /* 领头sentinel接手failover*/
SENTINEL_FAILOVER_STATE_SELECT_SLAVE 2 /* 选择slave成为master*/
SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE 3 /* 发送slaveof no one给新master */
SENTINEL_FAILOVER_STATE_WAIT_PROMOTION 4 /* 等待新master升级完成,超时终止failover*/
SENTINEL_FAILOVER_STATE_RECONF_SLAVES 5 /* 新master升级完成后,让slaves复制新master */
SENTINEL_FAILOVER_STATE_UPDATE_CONFIG 6 /* 监视新master */
           

    开始进行failover,向其它sentinel请求成为其局部领头,sentinelFailoverStateMachine根据不同状态做出相应的处理。

void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {
   ……
   if (ri->flags & SRI_MASTER) {
        if(sentinelStartFailoverIfNeeded(ri))
        //请求成为其他sentinel的局部领头
            sentinelAskMasterStateToOtherSentinels(ri,SENTINEL_ASK_FORCED);
        sentinelFailoverStateMachine(ri);
    }
}//判断master是否客观下线,并且master没有进行failover
int sentinelStartFailoverIfNeeded(sentinelRedisInstance *master) {
    if (!(master->flags & SRI_O_DOWN)) return 0;
    if (master->flags & SRI_FAILOVER_IN_PROGRESS) return 0;
    //设置failover_state为SENTINEL_FAILOVER_STATE_WAIT_START
    sentinelStartFailover(master);
    return 1;
}
           

   状态机中的函数汇总

void sentinelStartFailover(sentinelRedisInstance *master);
//failover_state=SENTINEL_FAILOVER_STATE_WAIT_START,开始故障转移
void sentinelFailoverWaitStart(sentinelRedisInstance *ri); 
//如果自己是领头sentinel,failover_state = SENTINEL_FAILOVER_STATE_SELECT_SLAVE
void sentinelFailoverSelectSlave(sentinelRedisInstance *ri);
//选择slave成为新master,failover_state= SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE
void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance *ri);
//发送slaveof no one给新master,failover_state=SENTINEL_FAILOVER_STATE_WAIT_PROMOTION
void sentinelFailoverWaitPromotion(sentinelRedisInstance *ri);
//等待新master角色转换完成,超时终止failover。
void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info);
/*info新master的回复中的role==SRI_MASTER,本地记录的flag==SRI_SLAVE
表示新master转换完,failover_state=SENTINEL_FAILOVER_STATE_RECONF_SLAVES*/
void sentinelFailoverReconfNextSlave(sentinelRedisInstance *master);
//让slaves复制新master,failover_state=SENTINEL_FAILOVER_STATE_UPDATE_CONFIG
void sentinelFailoverSwitchToPromotedSlave(sentinelRedisInstance *master);
//监视新master,failover_state=SENTINEL_FAILOVER_STATE_NONE