Sentinel(Redis 3.0.0-rc1)
Sentinel是Redis HA方案,一个或多个Sentinel实例组成的Sentinel系统,可以监视任意多个主服务器(master),
以及这些主服务器属下的所有从服务器(slave),并在被监视的主服务器进入下线状态时,自动在将被下线主服务器属下的某个从服务器升级为新的主服务器,
然后由新的主服务器代替已下线的主服务器继续处理命令请求。
基础数据结构
typedef struct sentinelRedisInstance {
// 当前实例的类型和状态(master, slave, sentinel,是否下线)
int flags;
// 主机名,ip:port
char *name;
// 实例的runid
char *runid;
// 配置纪元
uint64_t config_epoch;
// 实例的地址
sentinelAddr *addr;
redisAsyncContext *cc; /* Hiredis context for commands. */
redisAsyncContext *pc; /* Hiredis context for Pub / Sub. */
int pending_commands; /* Number of commands sent waiting for a reply. */
mstime_t cc_conn_time; /* cc connection time. */
mstime_t pc_conn_time; /* pc connection time. */
mstime_t pc_last_activity; /* Last time we received any message. */
mstime_t last_avail_time; /* Last time the instance replied to ping with
a reply we consider valid. */
mstime_t last_ping_time; /* Last time a pending ping was sent in the
context of the current command connection
with the instance. 0 if still not sent or
if pong already received. */
mstime_t last_pong_time; /* Last time the instance replied to ping,
whatever the reply was. That's used to check
if the link is idle and must be reconnected. */
mstime_t last_pub_time; /* Last time we sent hello via Pub/Sub. */
mstime_t last_hello_time; /* Only used if SRI_SENTINEL is set. Last time
we received a hello from this Sentinel
via Pub/Sub. */
mstime_t last_master_down_reply_time; /* Time of last reply to
SENTINEL is-master-down command. */
mstime_t s_down_since_time; /* Subjectively down since time. */
mstime_t o_down_since_time; /* Objectively down since time. */
// 无响应多少毫秒之后,进入主观下线
mstime_t down_after_period;
mstime_t info_refresh; /* Time at which we received INFO output from it. */
/* Role and the first time we observed it.
* This is useful in order to delay replacing what the instance reports
* with our own configuration. We need to always wait some time in order
* to give a chance to the leader to report the new configuration before
* we do silly things. */
int role_reported;
mstime_t role_reported_time;
mstime_t slave_conf_change_time; /* Last time slave master addr changed. */
/* Master specific. */
dict *sentinels; /* Other sentinels monitoring the same master. */
dict *slaves; /* Slaves for this master instance. */
// 判断为客观下线需要的支持票数
unsigned int quorum;
// 故障转移时,可以同时对新的master进行同步的slave数量
int parallel_syncs; /* How many slaves to reconfigure at same time. */
char *auth_pass; /* Password to use for AUTH against master & slaves. */
/* Slave specific. */
mstime_t master_link_down_time; /* Slave replication link down time. */
int slave_priority; /* Slave priority according to its INFO output. */
mstime_t slave_reconf_sent_time; /* Time at which we sent SLAVE OF <new> */
struct sentinelRedisInstance *master; /* Master instance if it's slave. */
char *slave_master_host; /* Master host as reported by INFO */
int slave_master_port; /* Master port as reported by INFO */
int slave_master_link_status; /* Master link status as reported by INFO */
unsigned long long slave_repl_offset; /* Slave replication offset. */
/* Failover */
char *leader; /* If this is a master instance, this is the runid of
the Sentinel that should perform the failover. If
this is a Sentinel, this is the runid of the Sentinel
that this Sentinel voted as leader. */
uint64_t leader_epoch; /* Epoch of the 'leader' field. */
uint64_t failover_epoch; /* Epoch of the currently started failover. */
int failover_state; /* See SENTINEL_FAILOVER_STATE_* defines. */
mstime_t failover_state_change_time;
mstime_t failover_start_time; /* Last failover attempt start time. */
// 刷新故障迁移状态的最大时限
mstime_t failover_timeout; /* Max time to refresh failover state. */
mstime_t failover_delay_logged; /* For what failover_start_time value we
logged the failover delay. */
struct sentinelRedisInstance *promoted_slave; /* Promoted slave instance. */
/* Scripts executed to notify admin or reconfigure clients: when they
* are set to NULL no script is executed. */
char *notification_script;
char *client_reconfig_script;
} sentinelRedisInstance;
struct sentinelState {
// 当前纪元
uint64_t current_epoch;
// 监控的master字典,key是master名称,value是sentinelRedisInstance对象
dict *masters;
// 是否处于TILT模式
int tilt;
// 目前正在执行脚本的数量
int running_scripts;
// 进入TITL模式时间
mstime_t tilt_start_time;
// 最后一次执行时间处理器的时间
mstime_t previous_time;
// 用户脚本执行队列
list *scripts_queue;
} sentinel;
Sentinel初始化
启动命令:
redis-sentinel /path/to/sentinel.conf
或者
redis-server /path/to/sentinel.conf --sentinel
Sentinel启动的时候,必须指定配置文件,最小配置类似:
sentinel monitor mymaster 127.0.0.1 6379 2
sentinel down-after-milliseconds mymaster 60000
sentinel failover-timeout mymaster 180000
sentinel parallel-syncs mymaster 1
这个配置文件表示,当前sentinel监视一个Redis master(mymaster),ip为127.0.0.1,端口为6379,
需要两个sentinel声明下线,才进行主备切换。mymaster 60000ms未响应标记为失效。
在main函数中,会直接对sentinel启动特殊的配置:
int main(int argc, char **argv) {
...
if (server.sentinel_mode) {
initSentinelConfig();
initSentinel();
}
...
}
首先是覆盖redis server的端口设置,sentinel会默认监听在26379端口:
#define REDIS_SENTINEL_PORT 26379
void initSentinelConfig(void) {
server.port = REDIS_SENTINEL_PORT;
}
然后覆盖server的命令表格,初始化sentinel对象:
void initSentinel(void) {
unsigned int j;
/* Remove usual Redis commands from the command table, then just add
* the SENTINEL command. */
// 清空Redis支持的命令表格,改成sentinel支持的命令表格
dictEmpty(server.commands,NULL);
for (j = 0; j < sizeof(sentinelcmds)/sizeof(sentinelcmds[0]); j++) {
int retval;
struct redisCommand *cmd = sentinelcmds+j;
retval = dictAdd(server.commands, sdsnew(cmd->name), cmd);
redisAssert(retval == DICT_OK);
}
/* Initialize various data structures. */
sentinel.current_epoch = 0;
sentinel.masters = dictCreate(&instancesDictType,NULL);
sentinel.tilt = 0;
sentinel.tilt_start_time = 0;
sentinel.previous_time = mstime();
sentinel.running_scripts = 0;
sentinel.scripts_queue = listCreate();
sentinel.announce_ip = NULL;
sentinel.announce_port = 0;
}
sentinel只能支持监控相关的命令,无法执行通常的Redis命令,sentinel可以支持的命令表格为:
struct redisCommand sentinelcmds[] = {
{"ping",pingCommand,1,"",0,NULL,0,0,0,0,0},
{"sentinel",sentinelCommand,-2,"",0,NULL,0,0,0,0,0},
{"subscribe",subscribeCommand,-2,"",0,NULL,0,0,0,0,0},
{"unsubscribe",unsubscribeCommand,-1,"",0,NULL,0,0,0,0,0},
{"psubscribe",psubscribeCommand,-2,"",0,NULL,0,0,0,0,0},
{"punsubscribe",punsubscribeCommand,-1,"",0,NULL,0,0,0,0,0},
{"publish",sentinelPublishCommand,3,"",0,NULL,0,0,0,0,0},
{"info",sentinelInfoCommand,-1,"",0,NULL,0,0,0,0,0},
{"role",sentinelRoleCommand,1,"l",0,NULL,0,0,0,0,0},
{"shutdown",shutdownCommand,-1,"",0,NULL,0,0,0,0,0}
};
同时,由于sentinel不接受Redis普通命令,因此初始化的时候,也不会去加载rdb文件等原始数据。