痛并快乐的造*之旅:awk访问数据库之旅

时间:2024-04-29 07:36:30

俺是一枚悲催的数据统计程序员,从先辈的手里接收了这样的代码:

#! /bin/sh

alias statdb="mysql -h 192.168.1.1 -u stat -paaa statdb"
sql="select * from abc"
echo $sql | statdb -s > abc
cat abc | awk '{...}' #此处省略400行

俺对于这样的代码标示极度不认同,追求模块化才是俺的理想,于是我把代码写成了这样

#! /bin/sh

alias statdb="mysql -h 192.168.1.1 -u stat -paaa"
sql="select * from abc"
echo $sql | statdb -s > abc
echo "" | igawk -f stat.awk -v DATA_FILE="abc"

我的stat.awk的代码长成这样

@include lib/assert.awk
@include lib/makesql.awk BEGIN{...}
{...}
END{...} #可以复用的代码在lib下

但是这样的写法也非常的啰嗦,从获得数据,再把数据导入到awk脚本里环节很多,我在想能不能直接用awk读入数据呢?先写一个从数据库读入数据的shell脚本,然后用awk调用它(system命令)

#! /bin/sh

cd `dirname $`

alias statdb="/usr/local/mysql/bin/mysql -h 192.168.1.1 -u stat -paaa -A statdb"

getDomain()
{
item=$
ver=$ if [ -z $item ]; then
echo item is empty string!
exit
fi sql=""
if [ -z $ver ]; then
sql="select domain from domain_name_list where item ='"$item"';";
else
sql="select domain from domain_name_list where item ='"$item"' and ver='"$ver"';";
fi #echo $sql
echo $sql | statdb -s | cat
} getDomain $*

awk脚本就可以这样调用:

function loadDomains(item, ver, result, __ARGVEND__)
{
command = "/work/stat/read/domain/getdomain.sh "item" "ver
while(command | getline domain)
{
result[domain]
}
close(command)
} BEGIN{
loadDomains("shuqi","wap",SHUQIWAP)
for(i in SHUQIWAP)
{
print i
} print "-------------" loadDomains("shuqi","touch",SHUQITOUCH)
for(i in SHUQITOUCH)
{
print i
}
}

试验了一下,完全是可以的,这样,我就扔掉了中间文件,再也不用让shell先读,读完导入到awk脚本里,但是,既然都可以从shell脚本里读取数据记录了,为啥不能直接连接MYSQL,查询记录集直接生成数组呢?再接再厉,加油!下面是最终版本

function query(sql, db, result, __ARGVEND__)
{
command = "echo \"" sql "\" | " db " -s | cat" while(command | getline record)
{
result[record]
}
close(command)
} function loadDomains(item, ver, result, __ARGVEND__)
{
db = "/usr/local/mysql/bin/mysql -h 192.168.1.1 -u stat -paaa statdb";
sql = "select * from domain_name_list where item ='%s' and ver = '%s' ;"
sql = sprintf(sql,item,ver) query(sql, db, result)
} BEGIN{ # sql = "select domain from domain_name_list where item ='shuqi' and ver = 'wap' ;"
# db = "/usr/local/mysql/bin/mysql -h 59.151.37.16 -u ppstat -pstatpp -A statdb";
# query(sql,db,result) loadDomains("shuqi","wap",result)
loadDomains("sousuo","wap",result1) for(i in result1)
{
print i
}
}

下面是调用执行结果:

wappp@ppcn--:/work/stat/read/booklist> echo "" | igawk -f test.awk
bookw.yisou.com? zwjf sousuo wap -- ::
bookw3.yisou.com zwjf sousuo wap -- ::
a1.pp.cn zwjf sousuo wap -- ::
a2.pp.cn zwjf sousuo wap -- ::
a5.pp.cn zwjf sousuo wap -- ::
bookw1.yisou.com zwjf sousuo wap -- ::
bookw4.yisou.com zwjf sousuo wap -- ::
a3.pp.cn zwjf sousuo wap -- ::
so.pp.cn zwjf sousuo wap -- ::
a5.pp.cn zwjf sousuo wap -- ::
bookw2.yisou.com zwjf sousuo wap -- ::
a0.pp.cn zwjf sousuo wap -- ::
bookw5.yisou.com zwjf sousuo wap -- ::
wappp@ppcn--:/work/stat/read/booklist>