jupyter访问hive进行数据分析
# 1 拉取现有集群镜像
docker pull limengjiao029/hive:v0.1
# 2 基于镜像创建容器实例
docker run --privileged -tid -p 8000:8000 -p 8088:8088 -p 8042:8042 -p 50070:50070 -p 8678:8678 -p10000:10000 limengjiao029/hive:v0.1
# 3 进入容器,替换容器 ID, 需要Docker基础
docker exec -ti containerID /bin/bash
# 4 启动 hadoop
/opt/modules/hadoop-2.7.7/sbin/start-all.sh
# 5 启动metastore服务
# 5.1 无日志启动
# nohup hive --service metastore 2>&1 &
nohup hive --service metastore >> ~/metastore.log 2>&1 &
# 6 启动hiveserver2服务,jdbc连接均需要
# 6.1 无日志启动
# nohup hive --service hiveserver2 2>&1 &
# 6.2 查看hiveserver2 进程
# ps -aux| grep hiveserver2
# 6.3 杀死进程
# kill -9 pid
# 6.4 验证是否有效
# beeline
# !connect jdbc:hive2://localhost:10000
# show tables;
nohup hive --service hiveserver2 >> ~/hiveserver2.log 2>&1 &
# 7 安装anaconda2
bash Anaconda2-2019.03-Linux-x86_64.sh
# 7.1 配置环境变量, 在/etc/profile文件中加入下列语句, 并source生效
export PATH=/root/anaconda2/bin:$PATH
source /etc/profile
# 7.2 判断python是否anaconda2版本
python -v
# 8 安装sasl
# 8.1 解压 sasl-0.2.
# tar -zxvf sasl-0.2.
# cd sasl-0.2.1
python setup.py install
# 9 安装thrift
# 9.1 解压 thrift-0.11.
# tar -zxvf thrift-0.11.
# cd thrift-0.11.0
python setup.py install
# 10 安装pyhs2
# 10.1 解压 pyhs2-0.6.
# tar -zxvf pyhs2-0.6.
# cd pyhs2-0.6.0
python setup.py install
# 11 配置jupyter_notebook_config.py
# 11.1 密码生成命令
# from import passwd
# passwd()
c.NotebookApp.allow_remote_access = True
c.NotebookApp.allow_root = True
c.NotebookApp.ip = '*'
c.NotebookApp.notebook_dir = u'/home/jupyter_root_dir'
c.NotebookApp.open_browser = False
c.NotebookApp.password = u'sha1:4600e66850b0:aedd50f9cabd7102cbcb1c8ba38d4e2500e46f67'
c.NotebookApp.port = 8678
# 12 后台驻留启动jupyter notebook
nohup jupyter notebook --allow-root >> nohup.out 2>&1 &