搞了几天redis cluster codis 的部署安装,测试,架构优化,配合研发应用整合,这里记一些心得!
背景需求:
之前多个业务都在应用到redis库,各业务独立占用主从两台服务器,硬件资源利用不合理,主从架构冗余度不高,主redis故障的话,从redis恢复需要时间,降低业务的可用性, 所以调研测试部署了基于codis的redis集群。 官方地址
部署文档 参考这里http://www.cnblogs.com/shantu/p/4589798.html
维护管理
了解过codis的同志都知道codis集群组件服务启动有一定顺序的,而且基本了 添加组,初始化slot ,各节点启动proxy都依懒于dashboard, 所以要遵循以下基本步骤。
- start dashboard
- start master and slave codis redis
- add group
- init slot
- start proxy
下面根据自已的使用习惯写了一个codis集群的管理脚本
- 主要实现codis集群服务组件启动关闭、redis配置文件生成的功能,python学的实在太烂,之前写的那个更烂,这个做了些优化
- 使用说明:codis 本身默认配置为config.ini 放在当前home, 使用此脚本,也需要预配codis.ini配置文件,供脚本解析调用,
如下:
[codis_group]
group_1 = 192.168.5.14:6381 master,192.168.5.15:6381 slave
group_2 = 192.168.5.15:6382 master,192.168.5.14:6382 slave
group_3 = 192.168.5.16:6383 master,192.168.5.44:6383 slave
group_4 = 192.168.5.44:6384 master,192.168.5.16:6384 slave
- cat redis_manage.py
#!/usr/bin/env python
# coding:utf8
#author:shantuwqk@163.com
import os,sys,commands,time
import ConfigParser
from subprocess import Popen,PIPE
from mako.template import Template
codis_config = "config.ini"
codis_root = "/data/setup/codis/data"
codis_dbdir = "%s/db"%codis_root
def pars_config():
codis_group_dict = {}
cf = ConfigParser.ConfigParser()
cf.read("codis.ini")
codis_group = cf.items("codis_group")
return codis_group
def codis_dashboard(opt):
if opt == "start":
#os.chdir(codis_root)
exec_cmd = "cd %s; nohup ../bin/codis-config -c %s -L ./log/dashboard.log dashboard --addr=:18087 --http-log=./log/http.log &>/dev/null &" %(codis_root,codis_config)
s = os.system(exec_cmd)
if s == 0:
print "\033[32;1m codis dashboard start .... OK\033[0m"
else:
print "\033[31;1m codis dashboard start .... Error\033[0m"
elif opt == "stop":
dashboard_id = "ps aux |grep \"codis-config\"| grep \"dashboard\"|awk '{print $2}'"
s,v = commands.getstatusoutput("kill -9 `%s`" %dashboard_id)
if s == 0:
print "\033[32;1m KILL codis dashboard id:[%s] OK\033[0m"%dashboard_id
else:
print "\033[31;1m KILL codis dashboard id:[%s] Error\033[0m"%dashboard_id
else:
pass
def INIT_config_master(port,memsize,datadir,gtag):
os.chdir(codis_root)
template_file = Template(filename="./conf/redis.master.conf.template",module_directory='tmp/test').render(port="%s"%port,memsize=memsize,datadir=datadir)
conf = open('./conf/%s.conf'%gtag,'w')
conf.write(template_file)
conf.close()
dirisnot = os.path.exists(datadir)
redisconf = os.path.isfile("%s/conf/%s.conf"%(codis_root,gtag))
if not dirisnot and redisconf:
print dirisnot,"create %s ...."%datadir
os.makedirs(datadir)
print commands.getstatusoutput("ls -l %s/conf/redis_%s.conf"%(codis_root,gtag))[1]
return True
else:
print "%s is exists !!"%datadir
return False
def INIT_config_slave(port,memsize,datadir,slaveof,gtag):
os.chdir(codis_root)
template_file = Template(filename="./conf/redis.slave.conf.template",module_directory='tmp/test').render(port="%s"%port,memsize=memsize,datadir=datadir,slaveof="%s %s"%(slaveof.split(":")[0],slaveof.split(":")[1]))
conf = open('./conf/%s.conf'%gtag,'w')
conf.write(template_file)
conf.close()
dirisnot = os.path.exists(datadir)
redisconf = os.path.isfile("./conf/%s.conf"%(gtag))
if not dirisnot and redisconf:
print dirisnot,"create %s ...."%datadir
os.makedirs(datadir)
print commands.getstatusoutput("ls -l %s/conf/redis_%s.conf"%(codis_root,gtag))[1]
return True
else:
print "%s is exists !!"%datadir
return False
def codis_redis(opt,cf):
print "###################33",cf,
cname = cf[:-5]
print cname,
port = cf[-9:-5]
print "#########",port,
redis_id = "ps aux | grep codis-server| grep -v grep |grep %s|awk '{print $2}'"%port
print redis_id
if opt == "start":
ids,idv = commands.getstatusoutput(redis_id)
print ids,type(idv)
if ids == 0 and idv != "":
print "\033[32;1m%s redis process Already exists pid is:[%s]\033[0m"%(port,idv)
sys.exit(1)
else:
exec_cmd = "cd %s; nohup ../bin/codis-server ./conf/%s.conf &> ./log/%s.log &" %(codis_root,cname,cname)
print exec_cmd
print os.system(exec_cmd)
s = os.system(exec_cmd)
if s == 0:
print "\033[32;1m start redis %s server is OK \033[0m"%(port)
else:
print "\033[31;1m start redis %s server is Error\033[0m"%(port)
elif opt == "stop":
s,v = commands.getstatusoutput("kill -9 `%s`" %redis_id)
if s == 0:
print "\033[32;1m KILL redis port:[%s] pid:[%s] is OK\033[0m"%(port,redis_id)
print v,
else:
print "\033[31;1m KILL redis port:[%s] pid:[%s] is Error\033[0m"%(port,redis_id)
print v,
else:
pass
def codis_group():
print "change codis.ini exists?"
if os.path.isfile("codis.ini") and pars_config() != "":
print "\033[32;1mparsing the codis.ini configuration file\033[0m"
for i in sorted(pars_config()):
#print i[1],'\n'
for j in i[1].split(","):
#print j,'\n',
exec_cmd = "cd %s; ../bin/codis-config -c %s -L ./log/%s_addgroup.log server add %s %s %s" %(codis_root,codis_config,j.split()[0].split(":")[1],i[0].split("_")[1],j.split()[0],j.split()[1])
print exec_cmd,'\n',
s,v = commands.getstatusoutput(exec_cmd)
print s,v,
if s == 0:
print "\033[32;1m add group:[%s] with a gtag:[%s] addr:(%s) OK\033[0m" %(i[0].split("_")[1],j.split()[1],j.split()[0])
else:
print "\033[31;1m add group:[%s] with a gtag:[%s] addr:(%s) Error\033[0m" %(i[0].split("_")[1],j.split()[1],j.split()[0])
else:
print "config not exist!!"
def remove_fenc():
remove_fenc = "cd %s;../bin/codis-config -c %s action remove-fence"%(codis_root,codis_config)
s,v = commands.getstatusoutput(remove_fenc)
print "remove fenc proxy info",v,
def slot_init():
init_cmd = "cd %s; ../bin/codis-config -c %s slot init -f" %(codis_root,codis_config)
print "INIT SLOT ........","\n",commands.getstatusoutput(init_cmd)[1]
def codis_initslot(gid,slot_range):
exec_cmd = "cd %s; ../bin/codis-config -c %s slot range-set %s %s %s online"%(codis_root,codis_config,slot_range[0],slot_range[1],gid)
print exec_cmd
s,v = commands.getstatusoutput(exec_cmd)
print v,
if s == 0:
print "\033[32;1m slot init:[%s],gid:[%s] OK\033[0m"%(slot_range,gid)
else:
print "\033[31;1m slot init:[%s],gid:[%s] Error\033[0m"%(slot_range,gid)
def offline_proxy():
proxy_tag = commands.getstatusoutput("cat %s|grep proxy_id"%codis_config)[1].split('=')[1]
exec_down_cmd = "cd %s; ../bin/codis-config -c %s proxy offline %s" %(codis_root,codis_config,proxy_tag)
print "Shutdown %s offline....." %proxy_tag
print exec_down_cmd
commands.getstatusoutput(exec_down_cmd)
def codis_proxy(opt):
proxy_tag = commands.getstatusoutput("cat %s|grep proxy_id"%codis_config)[1].split('=')[1]
if opt == "start":
exec_new_cmd = "cd %s; nohup ../bin/codis-proxy --log-level info -c %s -L ./log/%s.log --cpu=8 --addr=0.0.0.0:19000 --http-addr=0.0.0.0:11000 &" %(codis_root,codis_config,proxy_tag)
print exec_new_cmd
s = os.system(exec_new_cmd)
if s == 0:
print "\033[32;1m codis proxy tag:[%s]start OK\033[0m"%(proxy_tag)
else:
print "\033[31;1m codis proxy tag:[%s]start Error\033[0m"%(proxy_tag)
time.sleep(5)
exec_online_cmd = "cd %s;../bin/codis-config -c %s proxy online %s" %(codis_root,codis_config,proxy_tag)
print "Set %s online .....!!" %(proxy_tag)
print exec_online_cmd
print commands.getstatusoutput(exec_online_cmd)[1]
elif opt == "stop":
print "Shutdown %s offline....." %proxy_tag
exec_down_cmd = "cd %s; ../bin/codis-config -c %s proxy offline %s" %(codis_root,codis_config,proxy_tag)
print commands.getstatusoutput(exec_down_cmd)[1]
exec_proxy_id = "ps aux |grep codis-proxy| grep -v grep|awk '{print $2}'"
print commands.getstatusoutput("kill -9 `%s`"%exec_proxy_id)
def get_client_ip():
eth_inter= commands.getstatusoutput("ifconfig -a| awk '/^em/ {;a=$1;FS=\":\"; nextline=NR+1; next}{ if (NR==nextline) { split($2,b,\" \")}{ if ($2 ~ /[0-9]\./) {print a,b[1]}; FS=\" \"}}'|uniq -c|awk '{print $2,$3}'")
if eth_inter[0] == 0:
return eth_inter[1].split()[1]
else:
print "get client ip error"
def slot_range(n,group):
slot_dict = {}
num=0
l = range(n)
per = len(l) / group
#改变i的索引值
for i in l[::per]:
avg=l[i:i+per]
tupv01 = avg[0],avg[-1]
#print type(tupv01),tupv01
#指定当前添加的组数
num+=1
slot_dict[num] = []
slot_dict[num].append(tupv01)
#如果只剩下一组,则将剩余元素全部追加至列表
if num==group-1:
tupv02 = l[i+per:][0],l[i+per:][-1]
#print tupv02
v02key = num + 1
slot_dict[v02key] = []
slot_dict[v02key].append(tupv02)
break
#for k,port in redis_instance_port.items():
# if k in slot_dict.keys():
# slot_dict[k].append(port)
#print slot_dict
return slot_dict
def handle_slot():
n = len(pars_config())
return slot_range(1024,n)
def init_config():
for i in sorted(pars_config()):
for j in i[1].split(","):
if j.split()[1] == "master":
confname = i[0] + "_" + j.split()[1] + "_" + j.split()[0].split(":")[0] + "_" + j.split()[0].split(":")[1]
INIT_config_master(j.split()[0].split(":")[1],6,codis_dbdir,confname)
mastername = j.split()[0]
elif j.split()[1] == "slave":
confname = i[0] + "_" + j.split()[1] + "_" + j.split()[0].split(":")[0] + "_" + j.split()[0].split(":")[1]
INIT_config_slave(j.split()[0].split(":")[1],6,codis_dbdir,mastername,confname)
else:
pass
def start_master():
mcfile_cmd = "cd %s/conf;ls -lrt| egrep \"%s\"| grep \"master\"|awk '{print $9}'"%(codis_root,get_client_ip())
print mcfile_cmd
mcfile = commands.getstatusoutput(mcfile_cmd)[1]
codis_redis('start',mcfile)
def stop_master():
mcfile_cmd = "cd %s/conf;ls -lrt| egrep \"%s\"| grep \"master\"|awk '{print $9}'"%(codis_root,get_client_ip())
print mcfile_cmd
mcfile = commands.getstatusoutput(mcfile_cmd)[1]
codis_redis('stop',mcfile)
def start_slave():
mcfile_cmd = "cd %s/conf;ls -lrt| egrep \"%s\"| grep \"slave\"|awk '{print $9}'"%(codis_root,get_client_ip())
print mcfile_cmd
mcfile = commands.getstatusoutput(mcfile_cmd)[1]
codis_redis('start',mcfile)
def stop_slave():
mcfile_cmd = "cd %s/conf;ls -lrt| egrep \"%s\"| grep \"slave\"|awk '{print $9}'"%(codis_root,get_client_ip())
print mcfile_cmd
mcfile = commands.getstatusoutput(mcfile_cmd)[1]
codis_redis('stop',mcfile)
def assign_slot():
remove_fenc()
slot_init()
for k,v in handle_slot().items():
codis_initslot(k,v[0])
def help_prompt():
print """
This program prints files to the standard output.
Options include:
--version : Prints the version number
--help : Helpful tips
sample : python codis_manage.py init
: python codis_manage.py start_master/stop_master start_slave/stop_slave start_web/stop_web add_group initslot start_proxy/stop_proxy
"""
if __name__ == "__main__":
if len(sys.argv) < 2:
print "no argument",'\n',help_prompt()
sys.exit(1)
if sys.argv[1] == "--help":
help_prompt()
elif sys.argv[1] == "--version":
print "Version 0.1"
elif sys.argv[1] == "init":
init_config()
elif sys.argv[1] == "start_master":
start_master()
elif sys.argv[1] == "stop_master":
stop_master()
elif sys.argv[1] == "start_slave":
start_slave()
elif sys.argv[1] == "stop_slave":
stop_slave()
elif sys.argv[1] == "start_web":
codis_dashboard('start')
elif sys.argv[1] == "stop_web":
codis_dashboard('stop')
elif sys.argv[1] == "add_group":
codis_group()
elif sys.argv[1] == "initslot":
assign_slot()
elif sys.argv[1] == "start_proxy":
codis_proxy('start')
elif sys.argv[1] == "stop_proxy":
codis_proxy('stop')
elif sys.argv[1] == "stopall":
codis_proxy('stop')
commands.getstatusoutput("killall codis-server")
codis_dashboard("stop")
else:
help_prompt()
print get_client_ip()
这里还有一个基于redis sentinel方式的集群redis服务启停脚本
- sentinel.conf配置示例
port 26330
sentinel monitor lashou 192.168.5.14 6479 2
sentinel down-after-milliseconds lashou 3000
sentinel failover-timeout lashou 4000
sentinel notification-script lashou /data/setup/redis-2.8.19/bin/get_redis_master.py
cat get_redis_master.py
#!/usr/bin/env python
# coding:utf8
#author:shantuwqk@163.com
import os,sys,commands,time
from subprocess import Popen,PIPE
from mako.template import Template
from get_redis_master import get_redis_master_info
REDIS_ROOT = "/data/setup/redis-2.8.19"
REDIS_DBDIR = "%s/data"%REDIS_ROOT
def INIT_config_master(port,memsize,datadir):
os.chdir(REDIS_ROOT)
template_file = Template(filename="./conf/redis.master.conf.template",module_directory='tmp/test').render(port="%s"%port,memsize=memsize,datadir=datadir)
conf = open('./conf/redis_%s.conf'%port,'w')
conf.write(template_file)
conf.close()
dirisnot = os.path.exists(datadir)
redisconf = os.path.isfile("%s/conf/redis_%s.conf"%(REDIS_ROOT,port))
if not dirisnot and redisconf:
print dirisnot,"create %s ...."%datadir
os.makedirs(datadir)
print commands.getstatusoutput("ls -l %s/conf/redis_%s.conf"%(REDIS_ROOT,port))[1]
return True
else:
print "%s is exists !!"%datadir
return False
def INIT_config_slave(port,memsize,datadir,slaveof):
os.chdir(REDIS_ROOT)
template_file = Template(filename="./conf/redis.slave.conf.template",module_directory='tmp/test').render(port="%s"%port,memsize=memsize,datadir=datadir,slaveof="%s %s"%(slaveof.split(":")[0],slaveof.split(":")[1]))
conf = open('./conf/redis_%s.conf'%port,'w')
conf.write(template_file)
conf.close()
dirisnot = os.path.exists(datadir)
redisconf = os.path.isfile("%s/conf/redis_%s.conf"%(REDIS_ROOT,port))
if not dirisnot and redisconf:
print dirisnot,"create %s ...."%datadir
os.makedirs(datadir)
print commands.getstatusoutput("ls -l %s/conf/redis_%s.conf"%(REDIS_ROOT,port))[1]
return True
else:
print "%s is exists !!"%datadir
return False
def start_redis(redis_root,port):
start_redis_cmd = "cd %s ; nohup ./src/redis-server conf/redis_%s.conf &> ./logs/%s_redis.log & "%(REDIS_ROOT,port,port)
s = os.system(start_redis_cmd)
if s == 0:
print " redis start instence:[%s] ok"%(port)
else:
print " redis start instence:[%s] faild"%(port)
def stop_redis(port):
stop_redis_pid = "ps aux | grep redis| grep %s|awk '{print $2}'"%(port)
stop_redis_cmd = "kill -9 `%s`"%stop_redis_pid
s,v = commands.getstatusoutput(stop_redis_cmd)
if s == 0:
print " redis stop instence:[%s] ok"%(port),"\n",v
else:
print " redis stop instence:[%s] faild"%(port)
def help_prompt():
print """
This program prints files to the standard output.
Options include:
--version : Prints the version number
--help : Helpful tips
--task : To operate on missions:
[start_redis|stop_redis] [port]
sample : python redis_manage.py start/stop master/slave port
"""
if len(sys.argv) < 2:
print "no argument"
help_prompt()
sys.exit()
if sys.argv[1].startswith('--'):
option = sys.argv[1][2:]
if option == 'version':
print 'Version 0.1'
elif option == 'help':
help_prompt()
elif sys.argv[1] == "start" and sys.argv[2] == "master" and sys.argv[3] is not None:
INIT_config_master(sys.argv[3],6,REDIS_DBDIR)
time.sleep(2)
start_redis(REDIS_ROOT,sys.argv[3])
elif sys.argv[1] == "start" and sys.argv[2] == "slave" and sys.argv[3] is not None:
INIT_config_slave(sys.argv[3],6,REDIS_DBDIR,get_redis_master_info())
time.sleep(2)
start_redis(REDIS_ROOT,sys.argv[3])
elif sys.argv[1] == "stop" and sys.argv[2] is not None:
print commands.getstatusoutput("ps aux | grep redis-server|grep -v grep")[1]
stop_redis(sys.argv[2])
else:
help_prompt()
触发更新haproxy配置文件的脚本
#!/usr/bin/env python
# coding:utf8
#author:shantuwqk@163.com
import os,sys,commands,time
from subprocess import Popen,PIPE
from mako.template import Template
#init_redis_master =
REDIS_ROOT = "/data/setup/redis-2.8.19"
def get_redis_master_info():
cmd = "cd %s;./src/redis-cli -h 192.168.5.15 -p 26329 sentinel masters| grep -A 4 \"ip\"|head -4"%REDIS_ROOT
s,v = commands.getstatusoutput(cmd)
#print v,
redis_master = "%s:%s"%(v.split("\n")[1],v.split("\n")[3])
return redis_master
def handle_redis_m():
redis_master_dict = {}
redis_master_dict[get_redis_master_info()] = "redis_master_%s_%s"%(get_redis_master_info().split(":")[0].split(".")[3],get_redis_master_info().split(":")[1])
return redis_master_dict
def handle_redis_s():
redis_slave_dict = {}
cmd = "cd %s;./src/redis-cli -h 192.168.5.15 -p 26329 sentinel slaves lashou| grep -A 1 \"name\"| grep -v \"\-\-\\|name\""%REDIS_ROOT
s,v = commands.getstatusoutput(cmd)
slave_list = v.split("\n")
for i in slave_list:
redis_slave_dict[i] = "redis_slave_%s_%s"%(i.split(":")[0].split(".")[3],i.split(":")[1])
return redis_slave_dict
def update_ha_conf(redis_slave_dict,redis_master_dict):
os.chdir(REDIS_ROOT)
template_file = Template(filename="./conf/haproxy.cfg.template",module_directory='tmp/test').render(redis_slave_dict=redis_slave_dict,redis_master_dict=redis_master_dict)
conf = open('./conf/haproxy.cfg','w')
conf.write(template_file)
conf.close()
haproxy_conf = os.path.isfile("%s/conf/haproxy.cfg"%(REDIS_ROOT))
if haproxy_conf:
print "update haproxy config info master:[%s] and slave:[%s] ...."%(redis_master_dict,redis_slave_dict)
print commands.getstatusoutput("ls -l %s/conf/haproxy.cfg"%(REDIS_ROOT))[1]
return True
else:
print "%s is not exists !!"%haproxy_conf
return False
remote_ha_root = "/data/setup/haproxy/conf"
localhafile = "/data/setup/redis/conf/haproxy.cfg"
def reload_ha_service(addr):
rsync_cmd = "rsync -avz -e \"ssh -p 22\" %s root@%s:%s/"%(localhafile,addr,remote_ha_root)
remote_cmd = "ssh -p 22 root@%s \" /usr/local/sbin/haproxy -f /data/setup/haproxy-1.5.10/conf/haproxy.cfg -p /var/run/haproxy.pid -sf $(cat /var/run/haproxy.pid)\""
rs,rv = commands.getstatusoutput(rsync_cmd)
remotes,remotev = commands.getstatusoutput(remote_cmd)
if rs == 0 and remotes == 0:
print "haproxy.cfg update ok"
else:
print "haproxy.cfg update faild!!"
time.sleep(5)
if __name__ == "__main__":
print get_redis_master_info()
print update_ha_conf(handle_redis_s(),handle_redis_m())
如果需要迁移现有 redis 数据到 codis,该如何操作?
- 先搭建好 codis 集群并让 codis-proxy 正确运行起来
- 对线上每一个 redis 实例运行一个 redis-port 来向 codis 导入数据,例如:
for port in {6379,6380,6479,6480}; do
nohup redis-port sync --ncpu=4 --from=redis-server:${port} \
--target=codis-proxy:19000 > ${port}.log 2>&1 &
sleep 5
done
tail -f *.log
每个 redis-port 负责将对应的 redis 数据导入到 codis
多个 redis-port 之间不互相干扰,除非多个 redis 上的 key 本身出现冲突
单个 redis-port 可以将负责的数据并行迁移以提高速度,通过 --ncpu 来指定并行数
导入速度受带宽以及 codis-proxy 处理速度限制(本质是大量的 slotsrestore 操作)
完成数据迁移,在适当的时候将服务指向 Codis,并将原 redis 下线
- 旧 redis 下线时,会导致 reids-port 链接断开,于是自动退出