install Hadoop

时间:2023-03-09 16:58:31
install Hadoop

Installing Java

Hadoop runs on both Unix and Windows operating systems, and requires Java to be
installed. For a production installation, you should select a combination of operating
system, Java, and Hadoop that has been certified by the vendor of the Hadoop distribution
you are using. There is also a page on the Hadoop wiki that lists combinations
that community members have run with success.

#Master 192.168.1.201 jdk、hadoop NameNode、DFSZKFailoverController(zkfc)、ResourceManager、JournalNode
#Slave1 192.168.1.202 jdk、hadoop NameNode、DFSZKFailoverController(zkfc)、ResourceManager

、JournalNode
#Slave2 192.168.1.203 jdk、hadoop、zookeeper DataNode、NodeManager、QuorumPeerMain

#Slave3 192.168.1.203 jdk、hadoop、zookeeper DataNode、NodeManager、QuorumPeerMain

#Slave4 192.168.1.204 jdk、hadoop、zookeeper DataNode、NodeManager、QuorumPeerMain

##############虚拟机集中配置本机环境######################

sed -i '/^SELINUX=/cSELINUX=disabled' /etc/selinux/config
chkconfig iptables off
service iptables stop
rm /etc/udev/rules.d/70-persistent-net.rules
#del MAC form /etc/sysconfig/network-scripts/ifcfg-eth0

tar -zxvf jdk-8u74-linux-x64.tar.gz -C /usr/java
rm /usr/java/latest -rf
ln -s /usr/java/jdk1.8.0_74 latest

echo "export JAVA_HOME=/usr/java/latest" >> /etc/profile
echo "export HADOOP_HOME=/usr/local/hadoop" >> /etc/profile
echo "export HBASE_HOME=/usr/local/hbase" >> /etc/profile
echo "export ZOOKEEPER_HOME=/usr/local/zookeeper" >> /etc/profile
echo "export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/sbin:$HADOOP_HOME/bin:$ZOOKEEPER_HOME/bin:$HBASE_HOME/bin:" >> /etc/profile
source /etc/profile

cd && tar -zxvf hadoop-2.6.4.tar.gz -C /usr/local/
mv /usr/local/hadoop-2.6.4 /usr/local/hadoop

echo "slave1" >> /usr/local/hadoop/etc/hadoop/slaves
echo "slave2" >> /usr/local/hadoop/etc/hadoop/slaves
echo "slave3" >> /usr/local/hadoop/etc/hadoop/slaves

##############虚拟机集中配置本机环境######################

##############虚拟机分散配置克隆环境########################
sed -i '/^HOSTNAME=/cHOSTNAME=master' /etc/sysconfig/network
sed -i '/^HOSTNAME=/cHOSTNAME=slave1' /etc/sysconfig/network
sed -i '/^HOSTNAME=/cHOSTNAME=slave2' /etc/sysconfig/network
sed -i '/^HOSTNAME=/cHOSTNAME=slave3' /etc/sysconfig/network

echo "192.168.26.130 master" >> /etc/hosts
echo "192.168.26.163 slave1" >> /etc/hosts
echo "192.168.26.164 slave2" >> /etc/hosts
echo "192.168.26.165 slave3" >> /etc/hosts

cd && mkdir .ssh
ssh-keygen -N '' -t rsa -q -f id_rsa

ssh-copy-id master
ssh-copy-id slave1
ssh-copy-id slave2
ssh-copy-id slave3
##############虚拟机分散配置克隆环境########################

#######################配置Zookeeper####################################################
tar -zxvf zookeeper-3.4.8.tar.gz -C /usr/local/
mv /usr/local/zookeeper-3.4.8/ /usr/local/zookeeper
cd /usr/local/zookeeper/conf/
cp zoo_sample.cfg zoo.cfg
mkdir /usr/local/zookeeper/data -p
sed -i '/^dataDir/cdataDir=/usr/local/zookeeper/data' /usr/local/zookeeper/conf/zoo.cfg

echo "server.0=slave1:2288:3388" >> /usr/local/zookeeper/conf/zoo.cfg
echo "server.1=slave2:2288:3388" >> /usr/local/zookeeper/conf/zoo.cfg
echo "server.2=slave3:2288:3388" >> /usr/local/zookeeper/conf/zoo.cfg

echo 0 > /usr/local/zookeeper/data/myid

#echo 1 > /usr/local/zookeeper/data/myid
#echo 2 > /usr/local/zookeeper/data/myid
#######################配置Zookeeper####################################################

#######################配置HADOOP#######################################################
#hadoo-env.sh
cd /usr/local/hadoop/etc/hadoop
sed -i '/^export JAVA_HOME/cexport JAVA_HOME=/usr/java/latest' hadoop-env.sh

#core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 指定hdfs的nameservice为masters -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://masters</value>
</property>
<!-- 指定hadoop临时目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop/tmp</value>
</property>
<!-- 指定zookeeper地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>slave1:2181,slave2:2181,slave3:2181</value>
</property>
</configuration>

#hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!--指定hdfs的nameservice为masters,需要和core-site.xml中的保持一致 -->
<property>
<name>dfs.nameservices</name>
<value>masters</value>
</property>
<!-- Master下面有两个NameNode,分别是Master,Slave1 -->
<property>
<name>dfs.ha.namenodes.masters</name>
<value>master,slave1</value>
</property>
<!-- Master的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.masters.master</name>
<value>master:9000</value>
</property>
<!-- Master的http通信地址 -->
<property>
<name>dfs.namenode.http-address.masters.master</name>
<value>master:50070</value>
</property>
<!-- Slave1的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.masters.slave1</name>
<value>slave1:9000</value>
</property>
<!-- Slave1的http通信地址 -->
<property>
<name>dfs.namenode.http-address.masters.slave1</name>
<value>slave1:50070</value>
</property>
<!-- 指定NameNode的元数据在JournalNode上的存放位置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://slave1:8485;slave2:8485;slave3:8485/masters</value>
</property>
<!-- 指定JournalNode在本地磁盘存放数据的位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/usr/local/hadoop/journal</value>
</property>
<!-- 开启NameNode失败自动切换 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 配置失败自动切换实现方式 -->
<property>
<name>dfs.client.failover.proxy.provider.masters</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置隔离机制方法,多个机制用换行分割,即每个机制暂用一行-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
</value>
</property>
<!-- 使用sshfence隔离机制时需要ssh免登陆 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!-- 配置sshfence隔离机制超时时间 -->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
</configuration>

mapred-site.xml
<configuration>
<!-- 指定mr框架为yarn方式 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>

<configuration>
<!-- 开启RM高可靠 -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!-- 指定RM的cluster id -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>RM_HA_ID</value>
</property>
<!-- 指定RM的名字 -->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!-- 分别指定RM的地址 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>master</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>slave1</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>

<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<!-- 指定zk集群地址 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>slave1:2181,slave2:2181,slave3:2181</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>

scp -r /usr/local/hadoop/etc/hadoop slave1:/usr/local/hadoop/etc/hadoop
scp -r /usr/local/hadoop/etc/hadoop slave2:/usr/local/hadoop/etc/hadoop
scp -r /usr/local/hadoop/etc/hadoop slave3:/usr/local/hadoop/etc/hadoop

#######################配置HADOOP#######################################################

##############配置hbase#########################hbase-**

tar -zxvf hbase-** /usr/local/hbase

##############配置hbase#########################hbase-**

##############COMMAND##############################

cd /usr/local/hadoop/sbin
hadoop-daemon.sh start journalnode

cd /usr/local/zookeeper/bin/
zkServer.sh start
zkServer.sh status

hdfs namenode -format

scp -r /usr/local/hadoop/tmp slave1:/usr/local/hadoop/
hdfs zkfc -formatZK
start-dfs.sh
start-yarn.sh
yarn-daemon.sh start resourcemanager

hadoop fs -put /etc/profile /profile
hadoop fs -ls /
hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.4.1.jar wordcount /profile /out

start-hbase.sh
##############COMMAND#########################