Mysql Keepalived 实现双主互备高可用详细配置

时间:2022-06-29 00:27:51

Mysql Keepalived双主互备架构搭架

Mysql   Keepalived 实现双主互备高可用详细配置
配置前进行校时操作

#安装ntpdate工具
yum install ntpdate -y
#使用ntpdate校时(后面的是ntp服务器)
ntpdate pool.ntp.org

一、配置mysql双主备

0.测试mysql安装

 yum install mariadb mariadb-server -y
systemctl restart mariadb
systemctl enable mariadb
mysql_secure_installation

1.DB1修改配置文件(需重启)


vim /etc/my.cnf
#在[mysqld]添加
server-id=166
#开启mysql日志功能
log-bin=mysql-bin
#定义日志命名格式 
relay-log=mysql-relay-bin
#以下table复制过滤
#replicate-wild-ignore-table=test.%
#replicate-wild-ignore-table=mysql.%
#replicate-wild-ignore-table=performance_schema.%

2.DB2修改配置文件(需重启)


vim /etc/my.cnf
#在[mysqld]添加
server-id=168
#开启mysql日志功能
log-bin=mysql-bin
#定义日志命名格式 
relay-log=mysql-relay-bin

3.DB1,DB2分别创建复制帐号

mysql -u root -p
#创建用户slave_up允许从192.168.1网段登录
create user ‘slave_cp‘@‘192.168.1.%‘ identified by ‘pass‘;
grant replication slave on *.* to ‘slave_cp‘@‘192.168.1.%‘;
exit

4.DB1获取二进制日志信息

mysql -u root -p
#对数据库进行只读锁定(防止查看二进制日志同时有人对数据库修改操作)
flush tables with read lock;
#查询主机二进制文件信息
show master status;
 ------------------ ---------- -------------- ------------------ 
| File | Position | Binlog_Do_DB | Binlog_Ignore_DB |
 ------------------ ---------- -------------- ------------------ 
| mysql-bin.000001 | 494 | | |
 ------------------ ---------- -------------- ------------------ 
1 row in set (0.00 sec)
#解除只读锁定
unlock tables;
exit

5.同步数据库


#DB1
#备份主数据库,并上传给从数据库
mysqldump -uroot -p123456 --all-databases --lock-all-tables >/tmp/dbdump.sql
scp /tmp/dbdump.sql 192.168.1.168:/tmp/
#DB2
#从服务器导入主服务器数据库
mysql -uroot -p </tmp/dbdump.sql

6.在DB2上将DB1设置为主服务器


 #开启mysql防火墙端口
firewall-cmd --add-port=3306/tcp --permanent
firewall-cmd --reload
 change master to
master_host=‘192.168.1.168‘ ,
master_user=‘slave_cp‘, 
master_password=‘pass‘,
master_log_file=‘mysql-bin.000001‘, 
master_log_pos=494;
#启动slave
start slave;
#查看是否正常工作
show slave statusG
exit

7.在DB2获取二进制日志信息

 mysql -u root -p
#对数据库进行只读锁定(防止查看二进制日志同时有人对数据库修改操作)
flush tables with read lock;
#查询主机二进制文件信息
show master status;
 ------------------ ---------- -------------- ------------------ 
| File | Position | Binlog_Do_DB | Binlog_Ignore_DB |
 ------------------ ---------- -------------- ------------------ 
| mysql-bin.000001 | 513721| | |
 ------------------ ---------- -------------- ------------------ 
1 row in set (0.00 sec)
#解除只读锁定
unlock tables;
exit

8.在DB1将DB2设置为主服务器

 change master to
master_host=‘192.168.1.168‘ ,
master_user=‘slave_cp‘, 
master_password=‘pass‘,
master_log_file=‘mysql-bin.000001‘, 
master_log_pos=513721;
#启动slave
start slave;
#查看是否正常工作
show slave statusG
exit

二、Keepalived配置(/etc/keepalived/keepalived.conf)(yum install keepalived -y)(注意:关闭selinux策略 setenforce 0)

1.keepalived配置文件配置

! Configuration File for keepalived
global_defs {
 #设置报警通知邮件地址,可以设置多个
 notification_email { 
 [email protected]
 } 
 #设置邮件的发送地址 
 notification_email_from [email protected] 
 #设置smtp server的地址,该地址必须是存在的
 smtp_server 127.0.0.1 
 #设置连接smtp server的超时时间 
 smtp_connect_timeout 30 
 #运行Keepalived服务器的标识,发邮件时显示在邮件标题中的信息 
 router_id mysql_msun 
}
#检测haproxy脚本
vrrp_script chk_mysql {
 script "/etc/keepalived/mysqlcheck/check_slave.sh"
 interval 2
 weight 2
}
#定义VRRP实例,实例名自定义
vrrp_instance mysql_msun {
 #指定Keepalived的角色,MASTER主机 BACKUP备份
 state BACKUP #此处两个都设置为BACKUP
 #指定HA监测的接口 
 interface eno16777736 
 #虚拟路由标识,这个标识是一个数字(1-255),在一个VRRP实例中主备服务器ID必须一样 
 virtual_router_id 68 
 #优先级,数字越大优先级越高,在一个实例中主服务器优先级要高于备服务器 
 priority 100 #从服务器99
 #设置主备之间同步检查的时间间隔单位秒 
 advert_int 1 
 #设置不抢占模式(DB1设置即可)
 nopreempt 
 #设置验证类型和密码 
 authentication { 
 #验证类型有两种{PASS|HA} 
 auth_type PASS 
 #设置验证密码,在一个实例中主备密码保持一样 
 auth_pass 1689 
 }
 track_script {
 chk_mysql # 执行监控的服务
 } 
 #定义虚拟IP地址,可以有多个,每行一个
 virtual_ipaddress { 
 192.168.1.160
 } 
}

2.创建监控脚本(/etc/keepalived/mysqlcheck/check_slave.sh)

#!/bin/bash
#This scripts is check for Mysql Slave status

Mysqlbin=/usr/bin/mysql
user=root
pw=‘123456‘
port=3306
host=127.0.0.1
#最大延时
sbm=120
#Check for $Mysqlbin
if [ ! -f $Mysqlbin ];then
 echo ‘Mysqlbin not found,check the variable Mysqlbin‘
 exit 99
fi
#Get Mysql Slave Status
IOThread=`$Mysqlbin -h $host -P $port -u$user -p$pw -e ‘show slave statusG‘ 2>/dev/null|grep ‘Slave_IO_Running:‘|awk ‘{print $NF}‘`
SQLThread=`$Mysqlbin -h $host -P $port -u$user -p$pw -e ‘show slave statusG‘ 2>/dev/null|grep ‘Slave_SQL_Running:‘|awk ‘{print $NF}‘`
SBM=`$Mysqlbin -h $host -P $port -u$user -p$pw -e ‘show slave statusG‘ 2>/dev/null|grep ‘Seconds_Behind_Master:‘|awk ‘{print $NF}‘`
#Check if the mysql run
if [[ -z "$IOThread" ]];then
 exit 1
fi
#Check if the thread run 
if [[ "$IOThread" == "No" || "$SQLThread" == "No" ]];then
 exit 1
 elif [[ $SBM -ge $sbm ]];then
 exit 1
 else
 exit 0
fi

3.防火墙策略


#运行VPPR组播(keepalived)
firewall-cmd --permanent --add-rich=‘rule family="ipv4" destination address="224.0.0.18" protocol value="vrrp" accept‘
firewall-cmd --reload

4.测试

1.正常启动DB1、DB2信息

[[email protected] ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN 
 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
 inet 127.0.0.1/8 scope host lo
 valid_lft forever preferred_lft forever
 inet6 ::1/128 scope host 
 valid_lft forever preferred_lft forever
2: eno16777736: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
 link/ether 00:0c:29:43:30:13 brd ff:ff:ff:ff:ff:ff
 inet 192.168.1.166/24 brd 192.168.1.255 scope global eno16777736
 valid_lft forever preferred_lft forever
 inet 192.168.1.160/32 scope global eno16777736
 valid_lft forever preferred_lft forever
 inet6 fe80::20c:29ff:fe43:3013/64 scope link 
 valid_lft forever preferred_lft forever
[[email protected] ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN 
 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
 inet 127.0.0.1/8 scope host lo
 valid_lft forever preferred_lft forever
 inet6 ::1/128 scope host 
 valid_lft forever preferred_lft forever
2: eno16777736: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
 link/ether 00:0c:29:da:7d:9d brd ff:ff:ff:ff:ff:ff
 inet 192.168.1.168/24 brd 192.168.1.255 scope global eno16777736
 valid_lft forever preferred_lft forever
 inet6 fe80::20c:29ff:feda:7d9d/64 scope link 
 valid_lft forever preferred_lft forever

2.关闭DB1数据库,观察IP

[[email protected] ~]# systemctl stop mariadb
[[email protected] ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN 
 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
 inet 127.0.0.1/8 scope host lo
 valid_lft forever preferred_lft forever
 inet6 ::1/128 scope host 
 valid_lft forever preferred_lft forever
2: eno16777736: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
 link/ether 00:0c:29:43:30:13 brd ff:ff:ff:ff:ff:ff
 inet 192.168.1.166/24 brd 192.168.1.255 scope global eno16777736
 valid_lft forever preferred_lft forever
 inet6 2001:da8:4002:3301:20c:29ff:fe43:3013/64 scope global noprefixroute dynamic 
 valid_lft 2591817sec preferred_lft 604617sec
 inet6 fe80::20c:29ff:fe43:3013/64 scope link 
 valid_lft forever preferred_lft forever
[[email protected] ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN 
 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
 inet 127.0.0.1/8 scope host lo
 valid_lft forever preferred_lft forever
 inet6 ::1/128 scope host 
 valid_lft forever preferred_lft forever
2: eno16777736: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
 link/ether 00:0c:29:da:7d:9d brd ff:ff:ff:ff:ff:ff
 inet 192.168.1.168/24 brd 192.168.1.255 scope global eno16777736
 valid_lft forever preferred_lft forever
 inet 192.168.1.160/32 scope global eno16777736
 valid_lft forever preferred_lft forever
 inet6 2001:da8:4002:3301:20c:29ff:feda:7d9d/64 scope global noprefixroute dynamic 
 valid_lft 2591669sec preferred_lft 604469sec
 inet6 fe80::20c:29ff:feda:7d9d/64 scope link 
 valid_lft forever preferred_lft forever

3.重启主机mariadb,观察IP

[[email protected] ~]# ip a #DB1不切换成主机(因为设置为不抢占模式)
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN 
 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
 inet 127.0.0.1/8 scope host lo
 valid_lft forever preferred_lft forever
 inet6 ::1/128 scope host 
 valid_lft forever preferred_lft forever
2: eno16777736: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
 link/ether 00:0c:29:43:30:13 brd ff:ff:ff:ff:ff:ff
 inet 192.168.1.166/24 brd 192.168.1.255 scope global eno16777736
 valid_lft forever preferred_lft forever
 inet6 2001:da8:4002:3301:20c:29ff:fe43:3013/64 scope global noprefixroute dynamic 
 valid_lft 2591605sec preferred_lft 604405sec
 inet6 fe80::20c:29ff:fe43:3013/64 scope link 
 valid_lft forever preferred_lft forever
[[email protected] ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN 
 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
 inet 127.0.0.1/8 scope host lo
 valid_lft forever preferred_lft forever
 inet6 ::1/128 scope host 
 valid_lft forever preferred_lft forever
2: eno16777736: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
 link/ether 00:0c:29:da:7d:9d brd ff:ff:ff:ff:ff:ff
 inet 192.168.1.168/24 brd 192.168.1.255 scope global eno16777736
 valid_lft forever preferred_lft forever
 inet 192.168.1.160/32 scope global eno16777736
 valid_lft forever preferred_lft forever
 inet6 2001:da8:4002:3301:20c:29ff:feda:7d9d/64 scope global noprefixroute dynamic 
 valid_lft 2591533sec preferred_lft 604333sec
 inet6 fe80::20c:29ff:feda:7d9d/64 scope link 
 valid_lft forever preferred_lft forever

4.再次关闭DB2的maraiadb,问题出现了,预想当DB2宕机,IP未能正常漂移!!!

[[email protected] ~]# systemctl stop mariadb 
[[email protected] ~]# ip a 
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN 
 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
 inet 127.0.0.1/8 scope host lo
 valid_lft forever preferred_lft forever
 inet6 ::1/128 scope host 
 valid_lft forever preferred_lft forever
2: eno16777736: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
 link/ether 00:0c:29:da:7d:9d brd ff:ff:ff:ff:ff:ff
 inet 192.168.1.168/24 brd 192.168.1.255 scope global eno16777736
 valid_lft forever preferred_lft forever
 inet 192.168.1.160/32 scope global eno16777736
 valid_lft forever preferred_lft forever
 inet6 2001:da8:4002:3301:20c:29ff:feda:7d9d/64 scope global noprefixroute dynamic 
 valid_lft 2591326sec preferred_lft 604126sec
 inet6 fe80::20c:29ff:feda:7d9d/64 scope link 
 valid_lft forever preferred_lft forever
[[email protected] ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN 
 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
 inet 127.0.0.1/8 scope host lo
 valid_lft forever preferred_lft forever
 inet6 ::1/128 scope host 
 valid_lft forever preferred_lft forever
2: eno16777736: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
 link/ether 00:0c:29:43:30:13 brd ff:ff:ff:ff:ff:ff
 inet 192.168.1.166/24 brd 192.168.1.255 scope global eno16777736
 valid_lft forever preferred_lft forever
 inet6 2001:da8:4002:3301:20c:29ff:fe43:3013/64 scope global noprefixroute dynamic 
 valid_lft 2591241sec preferred_lft 604041sec
 inet6 fe80::20c:29ff:fe43:3013/64 scope link 
 valid_lft forever preferred_lft forever

关于这个问题的思考:

先说说本案例中采用的是Keepalived mysql的组合,Keepalived用了双备模式,采用脚本检测应用是否正常来决定keepalived的权重,从而让正常的数据库能得到VIP而正常工作,考虑到了如果DB1宕掉(仅mysql服务)(DB1的权重会低于DB2),DB2接管服务,而DB1再次正常时(权重又会高于DB2),为了避免数据库来回切换,而在DB1的keepalived配置中设置了不抢占模式来避免此问题。但是,导致了一个新的问题,就算是DB2的数据库不正常工作,(权重更低于DB2),但因为DB1不抢占模式,DB2仍然会占有VIP,(除非是Keepalived服务宕掉)。

关于这个问题的思考解决方案

1.keepalived取消使用不抢占模式,从而使主机切换正常,但这样也就使DB1的数据库服务宕掉再正常启动时,会抢占DB2的VIP,使数据库进行不必要的切换。

2.修改检测脚本,keepalived使用检测脚本正常时返回0,不正常时,直接让keepalived服务本身宕掉。这样情况下,DB1服务mysql服务再次正常(并启动keepalived),不会抢占DB2的VIP;当DB2mysql服务不正常工作时,(会自动停止keepalived),从而使DB1正常得到VIP。

 #!/bin/bash
#This scripts is check for Mysql Slave status

Mysqlbin=/usr/bin/mysql
user=root
pw=‘123456‘
port=3306
host=127.0.0.1
#最大延时
sbm=120
#Check for $Mysqlbin
if [ ! -f $Mysqlbin ];then
 echo ‘Mysqlbin not found,check the variable Mysqlbin‘
 pkill keepalived
fi
#Get Mysql Slave Status
IOThread=`$Mysqlbin -h $host -P $port -u$user -p$pw -e ‘show slave statusG‘ 2>/dev/null|grep ‘Slave_IO_Running:‘|awk ‘{print $NF}‘`
SQLThread=`$Mysqlbin -h $host -P $port -u$user -p$pw -e ‘show slave statusG‘ 2>/dev/null|grep ‘Slave_SQL_Running:‘|awk ‘{print $NF}‘`
SBM=`$Mysqlbin -h $host -P $port -u$user -p$pw -e ‘show slave statusG‘ 2>/dev/null|grep ‘Seconds_Behind_Master:‘|awk ‘{print $NF}‘`
#Check if the mysql run
if [[ -z "$IOThread" ]];then
 pkill keepalived
fi
#Check if the thread run 
if [[ "$IOThread" == "No" || "$SQLThread" == "No" ]];then
 pkill keepalived
 elif [[ $SBM -ge $sbm ]];then
 pkill keepalived
 else
 exit 0
fi