主库的wal日志已经被归档或异常丢失如何搭建从库

关键字：wal日志归档搭建从库 restore_command

master	194.1
slave	194.4
wal归档目录	/backup/pgsql/pg_arch/
xlog目录	/var/lib/pgsql/9.5/data/pg_xlog

我们知道pg的从库的搭建是通过基础备份和完整的wal日志完成的，那么如果主库的wal日志已经被归档或异常丢失，如果通过备份进行搭建从库呢？

我们可以通过restore_command命令把wal归档拷贝回xlog目录完成。具体的操作如下：

一、模拟xlog目录的wal日志丢失或已经被归档，0000000800000000000000D8该wal日志丢失

mater

wal归档目录

[root@db-hx-194-1 1941]# pwd

/backup/pgsql/pg_arch/1941

[root@db-hx-194-1 1941]# ll 0000000800000000000000D8

-rw------- 1 postgres postgres 16777216 Mar 14 12:18 0000000800000000000000D8

xlog目录

cd /var/lib/pgsql/9.5/data/pg_xlog

-rw------- 1 postgres postgres 16777216 Mar 14 12:09 0000000800000000000000D7

-rw------- 1 postgres postgres 316 Mar 14 12:09 0000000800000000000000D7.00000028.backup

-rw------- 1 postgres postgres 16777216 Mar 14 12:18 0000000800000000000000D8

drwx------ 2 postgres postgres 4096 Mar 14 12:18 archive_status

-rw------- 1 postgres postgres 16777216 Mar 14 12:18 0000000800000000000000D9

$ mv 0000000800000000000000D8 /tmp/

在从库的归档目录下准备好

slave

/bin/mount -t nfs 10.40.194.1:/backup /backup3

[root@oracle-hx-194-4 ~]# df

Filesystem 1K-blocks Used Available Use% Mounted on

/dev/mapper/vg_root-lv_root 51606140 3640460 45344240 8% /

tmpfs 49474596 12 49474584 1% /dev/shm

/dev/sdb1 5858145720 11671396 5553476080 1% /backup

/dev/sda1 99150 29857 64173 32% /boot

/dev/mapper/vg_root-lv_opt 178956928 139412400 39544528 78% /opt

/dev/mapper/vg_root-lv_tmp 51606140 209408 48775292 1% /tmp

/dev/fioa 1220670356 32992 1220637364 1% /database

10.40.194.1:/backup 5858146304 18405376 5546742784 1% /backup3

cp /backup3/pgsql/pg_arch/1941/* /backup/pgsql/pg_arch/1941/

[root@oracle-hx-194-4 pg_xlog]# cd /backup/pgsql/pg_arch/1941/

[root@oracle-hx-194-4 1941]# ll

total 131080

-rw------- 1 postgres postgres 16777216 Mar 14 13:40 0000000800000000000000D1

-rw------- 1 postgres postgres 16777216 Mar 14 13:40 0000000800000000000000D2

-rw------- 1 postgres postgres 16777216 Mar 14 13:40 0000000800000000000000D3

-rw------- 1 postgres postgres 16777216 Mar 14 13:40 0000000800000000000000D4

-rw------- 1 postgres postgres 16777216 Mar 14 13:40 0000000800000000000000D5

-rw------- 1 postgres postgres 316 Mar 14 13:40 0000000800000000000000D5.00000028.backup

-rw------- 1 postgres postgres 16777216 Mar 14 13:40 0000000800000000000000D6

-rw------- 1 postgres postgres 16777216 Mar 14 13:40 0000000800000000000000D7

-rw------- 1 postgres postgres 316 Mar 14 13:40 0000000800000000000000D7.00000028.backup

-rw------- 1 postgres postgres 16777216 Mar 14 13:40 0000000800000000000000D8

二、搭建基础备份

master

/opt/pgdata/digoal/1921/data04/pg93backup/20180313/data

scp -rp * root@10.40.194.4:/opt/tuniu/pgsql9.5/data

slave

chown -R postgres.postgres /opt/tuniu/pgsql9.5/data

/opt/tuniu/pgsql9.5/data/pg_xlog

rm -rf *

/opt/tuniu/pgsql9.5/data/pg_tblspc

mv 41002/* /opt/pgsql-9.5/tbs

rm -rf 41002

ln -s /opt/pgsql-9.5/tbs ./41002

mv 41037/* /opt/pgsql-9.5/bi_erp_itf_tbs

rm -rf 41037

ln -s /opt/pgsql-9.5/bi_erp_itf_tbs ./41037

三、恢复配置准备

[postgres@oracle-hx-194-4 data]$ cat recovery.conf

standby_mode = on

trigger_file = '/opt/tuniu/pgsql9.5/pg.trigger'

primary_conninfo = 'host=10.40.194.1 port=5432 user=rep password=rep'

restore_command = 'cp /backup/pgsql/pg_arch/1941/%f %p'

启动数据库，查看进程

[root@oracle-hx-194-4 pg_log]# ps aux|grep post

postgres 110488 0.0 0.3 10924720 306044 pts/4 S 15:28 0:00 /opt/tuniu/pgsql9.5/bin/postgres -D /opt/tuniu/pgsql9.5/data

postgres 110490 0.0 0.0 10924788 2924 ? Ss 15:28 0:00 postgres: startup process recovering 0000000800000000000000D9

postgres 110500 0.0 0.0 10929172 1944 ? Ss 15:28 0:00 postgres: wal receiver process streaming 0/D9019B80

查看日志

# more postgresql-2018-03-14_152851.log

LOG: database system was interrupted; last known up at 2018-03-13 17:19:01 CST

LOG: creating missing WAL directory "pg_xlog/archive_status"

LOG: entering standby mode

cp: cannot stat `/backup/pgsql/pg_arch/1941/00000008.history': No such file or directory

LOG: restored log file "0000000800000000000000D5" from archive

LOG: redo starts at 0/D5000028

LOG: consistent recovery state reached at 0/D50000F8

LOG: database system is ready to accept read only connections

LOG: restored log file "0000000800000000000000D6" from archive

LOG: restored log file "0000000800000000000000D7" from archive

LOG: restored log file "0000000800000000000000D8" from archive

cp: cannot stat `/backup/pgsql/pg_arch/1941/0000000800000000000000D9': No such file or directory

LOG: fetching timeline history file for timeline 8 from primary server

LOG: started streaming WAL from primary at 0/D9000000 on timeline 8

归档中找不到的WAL段可以在pg_xlog/中看到，这使得可以使用最近未归档的段。但是，在归档中可用的段将会被优先于pg_xlog/中的文件被使用。

通常，恢复将会处理完所有可用的WAL段，从而将数据库恢复到当前时间点（或者尽可能接近给定的可用WAL段）。因此，一个正常的恢复将会以一个"文件未找到"消息结束，错误消息的准确文本取决于你选择的restore_command。你也可能在恢复的开始看到一个针对名称类似于00000001.history文件的错误消息。这也是正常的并且不表示在简单恢复情况中的问题，对此的讨论见第 24.3.5 节。---节选自PG中文文档备份恢复章节

[root@oracle-hx-194-4 pg_xlog]# ll

total 81924

-rw------- 1 postgres postgres 16777216 Mar 14 15:28 0000000800000000000000D5

-rw------- 1 postgres postgres 16777216 Mar 14 15:28 0000000800000000000000D6

-rw------- 1 postgres postgres 16777216 Mar 14 15:28 0000000800000000000000D7

-rw------- 1 postgres postgres 16777216 Mar 14 15:28 0000000800000000000000D8

-rw------- 1 postgres postgres 16777216 Mar 14 15:39 0000000800000000000000D9

-rw------- 1 postgres postgres 304 Mar 14 15:28 00000008.history

drwx------ 2 postgres postgres 150 Mar 14 15:28 archive_status

更多参考PG中文手册备份恢复http://www.postgres.cn/docs/9.6/continuous-archiving.html#BACKUP-BASE-BACKUP