jupyter访问hive进行数据分析

时间:2025-04-17 16:02:51
# 1 拉取现有集群镜像 docker pull limengjiao029/hive:v0.1 # 2 基于镜像创建容器实例 docker run --privileged -tid -p 8000:8000 -p 8088:8088 -p 8042:8042 -p 50070:50070 -p 8678:8678 -p10000:10000 limengjiao029/hive:v0.1 # 3 进入容器,替换容器 ID, 需要Docker基础 docker exec -ti containerID /bin/bash # 4 启动 hadoop /opt/modules/hadoop-2.7.7/sbin/start-all.sh # 5 启动metastore服务 # 5.1 无日志启动 # nohup hive --service metastore 2>&1 & nohup hive --service metastore >> ~/metastore.log 2>&1 & # 6 启动hiveserver2服务,jdbc连接均需要 # 6.1 无日志启动 # nohup hive --service hiveserver2 2>&1 & # 6.2 查看hiveserver2 进程 # ps -aux| grep hiveserver2 # 6.3 杀死进程 # kill -9 pid # 6.4 验证是否有效 # beeline # !connect jdbc:hive2://localhost:10000 # show tables; nohup hive --service hiveserver2 >> ~/hiveserver2.log 2>&1 & # 7 安装anaconda2 bash Anaconda2-2019.03-Linux-x86_64.sh # 7.1 配置环境变量, 在/etc/profile文件中加入下列语句, 并source生效 export PATH=/root/anaconda2/bin:$PATH source /etc/profile # 7.2 判断python是否anaconda2版本 python -v # 8 安装sasl # 8.1 解压 sasl-0.2. # tar -zxvf sasl-0.2. # cd sasl-0.2.1 python setup.py install # 9 安装thrift # 9.1 解压 thrift-0.11. # tar -zxvf thrift-0.11. # cd thrift-0.11.0 python setup.py install # 10 安装pyhs2 # 10.1 解压 pyhs2-0.6. # tar -zxvf pyhs2-0.6. # cd pyhs2-0.6.0 python setup.py install # 11 配置jupyter_notebook_config.py # 11.1 密码生成命令 # from import passwd # passwd() c.NotebookApp.allow_remote_access = True c.NotebookApp.allow_root = True c.NotebookApp.ip = '*' c.NotebookApp.notebook_dir = u'/home/jupyter_root_dir' c.NotebookApp.open_browser = False c.NotebookApp.password = u'sha1:4600e66850b0:aedd50f9cabd7102cbcb1c8ba38d4e2500e46f67' c.NotebookApp.port = 8678 # 12 后台驻留启动jupyter notebook nohup jupyter notebook --allow-root >> nohup.out 2>&1 &