基于【CentOS-7+ Ambari 2.7.0 + HDP 3.0】搭建HAWQ数据仓库03 —— 安装HAWQ 2.3.0.0

时间:2022-11-30 20:30:01

一、 HAWQ2.3.0环境准备【全部主机节点】:

1, vim /etc/sysctl.conf,编辑如下内容:

kernel.shmmax=
kernel.shmmni=
kernel.shmall=
kernel.sem=
kernel.sysrq=
kernel.core_uses_pid=
kernel.msgmnb=
kernel.msgmax=
kernel.msgmni=
net.ipv4.tcp_syncookies=
net.ipv4.ip_forward=
net.ipv4.conf.default.accept_source_route=
net.ipv4.tcp_tw_recycle=
net.ipv4.tcp_max_syn_backlog=
net.ipv4.conf.all.arp_filter=
net.ipv4.ip_local_port_range=
net.core.netdev_max_backlog=
vm.overcommit_memory=
fs.nr_open=
kernel.threads-max=
kernel.pid_max=
#increase network
net.core.rmem_max=
net.core.wmem_max=

保存退出后,使用命令“sysctl -p” 使之生效:

sysctl -p

2, vim /etc/security/limits.conf 编辑如下内容:

* soft nofile
* hard nofile
* soft nproc
* hard nproc

重新登录以使配置生效,或者使用命令临时设置一下:

ulimit -n 10240

3, 添加gpadmin用户(greeplum admin缩写),使用/opt/gpadmin作为主目录

useradd --home=/opt/gpadmin/ --no-create-home --comment "HAWQ admin" gpadmin
echo gpadmin | passwd --stdin gpadmin
mkdir /opt/gpadmin
chown gpadmin:gpadmin /opt/gpadmin

2,添加gpadmin到/etc/sudoers,编辑该文件,添加如下行

gpadmin ALL=(ALL) NOPASSWD:ALL

3, 配置gpadmin用户的ssh【免密互访】
4, 准备软件包依赖项:
libgsasl is needed by apache-hawq-2.3.0.0-el7.x86_64
protobuf >= 2.5.0 is needed by apache-hawq-2.3.0.0-el7.x86_64
net-snmp-libs is needed by apache-hawq-2.3.0.0-el7.x86_64
thrift >= 0.9.1 is needed by apache-hawq-2.3.0.0-el7.x86_64
boost >= 1.53.0 is needed by apache-hawq-2.3.0.0-el7.x86_64
其中gcc gcc-c++ protobuf net-snmp-libs boots可直接安装。而安装libgsasl需要添加一个包含libgsasl库的源:新建/etc/yum.repos.d/fedora.repo文件,添加如下内容:

[epel-repo]
name=epel
baseurl=http://dl.fedoraproject.org/pub/epel/6/x86_64/
enabled=
gpgcheck=

执行安装:

yum install gcc gcc-c++ -y
yum install protobuf -y
yum install net-snmp-libs -y
yum install boost -y
yum install libgsasl-devel -y

C)下载安装thrift 0.9.1

wget http://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/t/thrift-0.9.1-15.el7.x86_64.rpm
rpm -ivh thrift-0.9.-.el7.x86_64.rpm

二、 下载安装包apache-hawq-rpm-2.3.0.0-incubating.tar.gz,并解压缩

cd downloads
wget http://mirrors.tuna.tsinghua.edu.cn/apache/incubator/hawq/2.3.0.0-incubating/apache-hawq-rpm-2.3.0.0-incubating.tar.gz
tar -zxvf apache-hawq-rpm-2.3.0.0-incubating.tar.gz

三、 安装服务端
1, 安装
rpm -ivh --prefix=/opt/gpadmin apache-hawq-2.3.0.0-el7.x86_64.rpm

#在其他主机中依次执行上面安装

**移除安装
yum remove apache-hawq -y

2 HAWQ的参数配置

0) 切换用户gpadmin

[root@ep-bd01 apache-hawq]# su - gpadmin

编辑用户gpadmin启动文件.bash_proifile

source /opt/gpadmin/apache-hawq/greenplum_path.sh

执行命令

source /opt/gpadmin/apache-hawq/greenplum_path.sh

配置HAWQ的集群及其它参数

1) vi   /opt/gpadmin/apache-hawq/etc/slaves

# 添加集群主机列表

ep-bd01
ep-bd02
ep-bd03
ep-bd04
ep-bd05
ep-bd06

2) vi hawq-site.xml

# 修改如下参数
参数名 值

hadoop.security.authentication    simple
hawq_master_address_host ep-bd01
hawq_dfs_url ep-bd01:/hawq
hawq_rm_memory_limit_perseg 64GB
hawq_rm_yarn_address ep-bd01:
hawq_dfs_url ep-bd01:/hawq
hawq_global_rm_type yarn
hawq_rm_yarn_scheduler_address ep-bd01:
hawq_acl_type standalone
hawq_master_directory /opt/gpadmin/data/masterdd
hawq_segment_directory /opt/gpadmin/data/segmentdd

vi yarn-site.xml

# 修改如下参数
参数名 值

hadoop.security.authentication    simple

2, 建立dfs/hawq

sudo -u hdfs hdfs dfs -mkdir /hawq
sudo -u hdfs hdfs dfs -chown gpadmin:gpadmin /hawq

3, 建立data目录(全部节点)

sudo -u gpadmin mkdir -p /opt/gpadmin/data/masterdd /opt/gpadmin/data/segmentdd

4, 复制配置文件到其他节点

su - gpadmin
gpscp -f slaves  slaves =:/opt/gpadmin/apache-hawq/etc
gpscp -f slaves  hawq-site.xml =:/opt/gpadmin/apache-hawq/etc
gpscp -f slaves  hdfs-client.xml =:/opt/gpadmin/apache-hawq/etc
gpscp -f slaves yarn-client.xml =:/opt/gpadmin/apache-hawq/etc

5,初始化cluster【注意,这里必须进入/opt/gpadmin/apache-hawq/bin】

su - gpadmin
cd /opt/gpadmin/apache-hawq/bin
source ../greenplum_path.sh
./hawq init cluster

# 初始化cluster失败,调整之后,再次执行初始化之前需要

#删除dfs目录中内容:
sudo -u hdfs hdfs dfs -rm -r -f /hawq/*
#删除数据目录中内容

ssh ep-bd01 rm -rf /opt/gpadmin/data/masterdd/*
ssh ep-bd01 rm -rf /opt/gpadmin/data/segmentdd/*
ssh ep-bd02 rm -rf /opt/gpadmin/data/masterdd/*
ssh ep-bd02 rm -rf /opt/gpadmin/data/segmentdd/*
ssh ep-bd03 rm -rf /opt/gpadmin/data/masterdd/*
ssh ep-bd03 rm -rf /opt/gpadmin/data/segmentdd/*
ssh ep-bd04 rm -rf /opt/gpadmin/data/masterdd/*
ssh ep-bd04 rm -rf /opt/gpadmin/data/segmentdd/*
ssh ep-bd05 rm -rf /opt/gpadmin/data/masterdd/*
ssh ep-bd05 rm -rf /opt/gpadmin/data/segmentdd/*
ssh ep-bd06 rm -rf /opt/gpadmin/data/masterdd/*
ssh ep-bd06 rm -rf /opt/gpadmin/data/segmentdd/*

# 启动停止重启hawq
hawq stop/restart/start cluster