Linux记录-shell获取hdfs表查询mysql

时间:2024-05-04 11:35:25
#!/bin/sh

hdfs dfs -ls /user/hive/warehouse | awk '{print $8}' | awk -F "/" '{print $5}' >hivedb.txt
sed -i '1d' hivedb.txt
dbline=$(cat hivedb.txt | wc -l)
let dbline=dbline+1
if [ $dbline -gt 0 ]
  then
  for (( i = 1 ; i < $dbline ; i++ ))
  do
  hdb=$(sed -n "$i p" hivedb.txt | awk '{print $1}')
  hdfs dfs -du -h /user/hive/warehouse/$hdb | awk -F "/" '{print $6"\t"$1}'  >> hivetab.txt
  echo $i>>dbcount.txt
  done
else
   echo "Not Hive DB"
fi
tabline=$(cat hivetab.txt | wc -l)
let tabline=tabline+1
if [ $tabline -gt 0 ]
  then
  for (( j = 1 ; j < $tabline ; j++ ))
  do
   htab=$(sed -n "$j p" hivetab.txt | awk '{print $1}')
   htabsize=$(sed -n "$j p" hivetab.txt | awk '{print $2}')
   mysql -hbsch-M.db.sfdc.com.cn -ubsch -pBD_1234ch -Dbsch -e \
   "select distinct t.owner,b.begin_time,datediff(now(),b.begin_time) as diffdate
    from t_script s
    inner join t_task t on s.id=t.script_id
    left outer join alpha_user a on t.owner=a.emp_code
    left outer join t_job b on t.id=b.task_id
    where s.content like '%${htab}%'
    order by b.begin_time desc limit 1;" >task.txt
	diffdate=$(cat task.txt | awk 'NR==1{print int($3)}')
	echo $j>>tabcount.txt
	if [ $diffdate ]
	then
	 if [ $diffdate -gt 90 ]
	 then
        echo $htab "" $htabsize "" $(cat task.txt | awk 'NR==1{print $1"\t"$2"\t"$3}') >>tabgt90.txt
	 fi
	 if [ $diffdate -gt 0 ] && [ $diffdate -le 90 ]
	 then
		echo $htab "" $htabsize "" $(cat task.txt | awk 'NR==1{print $1"\t"$2"\t"$3}') >>tablt90.txt
	 fi
	fi
  done
else
   echo "Not Hive table"
fi