检查主观下线
sentinel每次发送PING命令,用于检测被监测的master和slave是否宕机。
int sentinelSendPing(sentinelRedisInstance *ri) {
int retval = redisAsyncCommand(ri->cc,
sentinelPingReplyCallback, NULL, "PING");
if (retval == REDIS_OK) {
ri->pending_commands++;
/* We update the ping time only if we received the pong for
* the previous ping, otherwise we are technically waiting
* since the first ping that did not received a reply. */
// 只有收到了PONG响应的时候,这个字段才会变成0
if (ri->last_ping_time == 0) ri->last_ping_time = mstime();
return 1;
} else {
return 0;
}
}
void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
...
if (strncmp(r->str,"PONG",4) == 0 ||
strncmp(r->str,"LOADING",7) == 0 ||
strncmp(r->str,"MASTERDOWN",10) == 0)
{
ri->last_avail_time = mstime();
ri->last_ping_time = 0; /* Flag the pong as received. */
}
...
}
只有收到PONG、LOADING、MASTERDOWN响应的时候,sentinel才认为master正常,记录最后的响应时间。
同时每次调用sentinelHandleRedisInstance函数的时候,都会去检查是否有监控的master活着slave已经主管超时。
void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
mstime_t elapsed = 0;
if (ri->last_ping_time)
elapsed = mstime() - ri->last_ping_time;
/* Check if we are in need for a reconnection of one of the
* links, because we are detecting low activity.
*
* 1) Check if the command link seems connected, was connected not less
* than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have a
* pending ping for more than half the timeout. */
// 检查命令连接是否已经超时
if (ri->cc &&
(mstime() - ri->cc_conn_time) > SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
ri->last_ping_time != 0 && /* Ther is a pending ping... */
/* The pending ping is delayed, and we did not received
* error replies as well. */
(mstime() - ri->last_ping_time) > (ri->down_after_period/2) &&
(mstime() - ri->last_pong_time) > (ri->down_after_period/2))
{
sentinelKillLink(ri,ri->cc);
}
/* 2) Check if the pubsub link seems connected, was connected not less
* than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have no
* activity in the Pub/Sub channel for more than
* SENTINEL_PUBLISH_PERIOD * 3.
*/
// 检查订阅连接是否已经超时
if (ri->pc &&
(mstime() - ri->pc_conn_time) > SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
(mstime() - ri->pc_last_activity) > (SENTINEL_PUBLISH_PERIOD*3))
{
sentinelKillLink(ri,ri->pc);
}
/* Update the SDOWN flag. We believe the instance is SDOWN if:
*
* 1) It is not replying.
* 2) We believe it is a master, it reports to be a slave for enough time
* to meet the down_after_period, plus enough time to get two times
* INFO report from the instance. */
if (elapsed > ri->down_after_period ||
(ri->flags & SRI_MASTER &&
ri->role_reported == SRI_SLAVE &&
mstime() - ri->role_reported_time >
(ri->down_after_period+SENTINEL_INFO_PERIOD*2)))
{
/* Is subjectively down */
if ((ri->flags & SRI_S_DOWN) == 0) {
sentinelEvent(REDIS_WARNING,"+sdown",ri,"%@");
ri->s_down_since_time = mstime();
ri->flags |= SRI_S_DOWN;
}
} else {
/* Is subjectively up */
if (ri->flags & SRI_S_DOWN) {
sentinelEvent(REDIS_WARNING,"-sdown",ri,"%@");
ri->flags &= ~(SRI_S_DOWN|SRI_SCRIPT_KILL_SENT);
}
}
}
注意:同一个master的所有slave,主观下线超时时间相同(配置文件中配置的down-after-milliseconds),不同master可以独立配置。
检测客观下线
void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {
...
/* Only masters */
if (ri->flags & SRI_MASTER) {
sentinelCheckObjectivelyDown(ri);
if (sentinelStartFailoverIfNeeded(ri))
sentinelAskMasterStateToOtherSentinels(ri,SENTINEL_ASK_FORCED);
sentinelFailoverStateMachine(ri);
sentinelAskMasterStateToOtherSentinels(ri,SENTINEL_NO_FLAGS);
}
}
在sentinelHandleRedisInstance函数最后,针对master还会做这些操作:
* 检查客观下线
* 询问其他sentinel的检查结果
* 进行故障转移
首先先来看询问其他sentinel的检查结果:
#define SENTINEL_ASK_FORCED (1<<0)
void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master, int flags) {
dictIterator *di;
dictEntry *de;
// 迭代sentinels字典里面保存的所有已知sentinels
di = dictGetIterator(master->sentinels);
while((de = dictNext(di)) != NULL) {
sentinelRedisInstance *ri = dictGetVal(de);
mstime_t elapsed = mstime() - ri->last_master_down_reply_time;
char port[32];
int retval;
/* If the master state from other sentinel is too old, we clear it. */
if (elapsed > SENTINEL_ASK_PERIOD*5) {
ri->flags &= ~SRI_MASTER_DOWN;
sdsfree(ri->leader);
ri->leader = NULL;
}
/* Only ask if master is down to other sentinels if:
*
* 1) We believe it is down, or there is a failover in progress.
* 2) Sentinel is connected.
* 3) We did not received the info within SENTINEL_ASK_PERIOD ms. */
// 只有当自己已经检测到master已经进入主观下线状态才发送询问
if ((master->flags & SRI_S_DOWN) == 0) continue;
// 确保这个sentinel仍然连接着
if (ri->flags & SRI_DISCONNECTED) continue;
// 非强制,且在指定周期(1000ms)内还没有收到回复,暂时不发送
if (!(flags & SENTINEL_ASK_FORCED) &&
mstime() - ri->last_master_down_reply_time < SENTINEL_ASK_PERIOD)
continue;
/* Ask */
ll2string(port,sizeof(port),master->addr->port);
retval = redisAsyncCommand(ri->cc,
sentinelReceiveIsMasterDownReply, NULL,
"SENTINEL is-master-down-by-addr %s %s %llu %s",
master->addr->ip, port,
sentinel.current_epoch,
(master->failover_state > SENTINEL_FAILOVER_STATE_NONE) ?
server.runid : "*");
if (retval == REDIS_OK) ri->pending_commands++;
}
dictReleaseIterator(di);
}
发送消息包含:
* 被判断为主观下线的master IP(master->addr->ip)
* 被判断为主管下线的master端口(port)
* sentinel当前纪元
* 运行id,*表示用于检测该master是否客观下线,当前sentinel运行id用于选举头领
接收响应:
void sentinelReceiveIsMasterDownReply(redisAsyncContext *c, void *reply, void *privdata) {
sentinelRedisInstance *ri = c->data;
redisReply *r;
REDIS_NOTUSED(privdata);
if (ri) ri->pending_commands--;
if (!reply || !ri) return;
r = reply;
/* Ignore every error or unexpected reply.
* Note that if the command returns an error for any reason we'll
* end clearing the SRI_MASTER_DOWN flag for timeout anyway. */
// 响应格式:第一个元素表示是否同意主观下线,1表示同意,0表示不同意
// 第二个元素,在查询状态的时候返回*
// 第三个元素仅在第二个元素不为*的时候有效,其余情况都未0
if (r->type == REDIS_REPLY_ARRAY && r->elements == 3 &&
r->element[0]->type == REDIS_REPLY_INTEGER &&
r->element[1]->type == REDIS_REPLY_STRING &&
r->element[2]->type == REDIS_REPLY_INTEGER)
{
ri->last_master_down_reply_time = mstime();
if (r->element[0]->integer == 1) {
ri->flags |= SRI_MASTER_DOWN;
} else {
ri->flags &= ~SRI_MASTER_DOWN;
}
if (strcmp(r->element[1]->str,"*")) {
/* If the runid in the reply is not "*" the Sentinel actually
* replied with a vote. */
sdsfree(ri->leader);
if ((long long)ri->leader_epoch != r->element[2]->integer)
redisLog(REDIS_WARNING,
"%s voted for %s %llu", ri->name,
r->element[1]->str,
(unsigned long long) r->element[2]->integer);
ri->leader = sdsnew(r->element[1]->str);
ri->leader_epoch = r->element[2]->integer;
}
}
}
检查是否客观下线:
void sentinelCheckObjectivelyDown(sentinelRedisInstance *master) {
dictIterator *di;
dictEntry *de;
unsigned int quorum = 0, odown = 0;
if (master->flags & SRI_S_DOWN) {
/* Is down for enough sentinels? */
quorum = 1; /* the current sentinel. */
/* Count all the other sentinels. */
// 遍历sentinels字典,统计标记master主观下线的sentinel数量
di = dictGetIterator(master->sentinels);
while((de = dictNext(di)) != NULL) {
sentinelRedisInstance *ri = dictGetVal(de);
if (ri->flags & SRI_MASTER_DOWN) quorum++;
}
dictReleaseIterator(di);
if (quorum >= master->quorum) odown = 1;
}
/* Set the flag accordingly to the outcome. */
if (odown) {
if ((master->flags & SRI_O_DOWN) == 0) {
sentinelEvent(REDIS_WARNING,"+odown",master,"%@ #quorum %d/%d",
quorum, master->quorum);
master->flags |= SRI_O_DOWN;
master->o_down_since_time = mstime();
}
} else {
if (master->flags & SRI_O_DOWN) {
sentinelEvent(REDIS_WARNING,"-odown",master,"%@");
master->flags &= ~SRI_O_DOWN;
}
}
}
有了前面通过is-master-down-by-addr询问和标记,就能够知道标记为主观下线的sentinel数量,如果这个数量超过了配置文件里面设置的quorum,
则标记该master进入客观下线。