使用python找出nginx访问日志中访问次数最多的10个ip排序生成网页

方法1：
linux下使用awk命令

# cat access1.log | awk '{print $1"  "$7"  "$9}'|sort -n|uniq -c |sort -n -r|head -10

方法2：
通过python处理日志

#encoding=utf-8

# 找到日志中的top 10，日志格式如下

#txt = '''100.116.167.9 - - [22/Oct/2017:03:55:53 +0800] "HEAD /check HTTP/1.0" 200 0 "-" "-" "-" ut = 0.001'''

#nodes = txt.split()

#print 'ip:%s, url:%s, code:%s' % (nodes[0],nodes[6],nodes[8])

# 统计ip,url,code的次数，并且生成字典

def log_analysis(log_file, dpath, topn = 10):

    path=log_file

    shandle = open(path, 'r')

    count = 1

    log_dict = {}

    while True:

        line = shandle.readline()

        if line == '':

            break

        #print line

        nodes = line.split()

        #count += 1

        #if count >= 10:

        #    break

        # {(ip,url,code):count}当做字典的key

        #print 'ip:%s, url:%s, code:%s' % (nodes[0],nodes[6],nodes[8])

        # 拼凑字典，如果不存在赋值为1，如果存在则+1

        ip,url,code = nodes[0],nodes[6],nodes[8]

        if (ip, url, code) not in log_dict:

            log_dict[(ip, url, code)] = 1

        else:

            log_dict[(ip, url, code)] = log_dict[(ip, url, code)] + 1

    # 关闭文件句柄

    shandle.close()

    # 对字典进行排序

    #print log_dict

    # ('111.37.21.148', '/index', '200'): 2

    rst_list = log_dict.items()

    #print rst_list

    #

    for j in range(10):

    # 冒泡法根据rst_list中的count排序，找出访问量最大的10个IP

        for i in range(0,len(rst_list) - 1):

            if rst_list[i][1] > rst_list[i+1][1]:

                temp = rst_list[i]

                rst_list[i] = rst_list[i+1]

                rst_list[i+1] = temp

    need_list = rst_list[-1:-topn - 1:-1]

    # 打印出top 10访问日志，并写入网页中

    title = 'nginx访问日志'

    tbody = ''

    for i in need_list:

        tbody += '<tr>\n<td>%s</td><td>%s</td><td>%s</td><td>%s</td>\n<tr>\n' % (i[1],i[0][0],i[0][1],i[0][2])

    html_tpl = '''

    <!DOCTYPE html>

    <html>

        <head>

            <meta charset="utf-8">

            <title>{title}</title>

        </head>

        <body>

            <table border="1" cellspacing="0" cellpadding="0" color='pink'>

                <thead>

                    <tr cellspacing="0" cellpadding="0">

                        <th>访问次数</th>

                        <th>ip</th>

                        <th>url</th>

                        <th>http_code</th>

                    </tr>

                </thead>

                {tbody}

            </table>

        </body>

    </html>

    '''

    html_handle = open(dpath,'w')

    html_handle.write(html_tpl.format(title = title, tbody = tbody))

    html_handle.close()

# 函数入口

if __name__ == '__main__':

    # nginx日志文件

    log_file = 'access1.log'

    dpath = 'top10.html'

    # topn 表示去top多少个

    # 不传，默认10个

    topn = 10

    # log_analysis(log_file, dpath)

    log_analysis(log_file,dpath,topn)

使用python找出nginx访问日志中访问次数最多的10个ip排序生成网页

方法2

# 统计nginx日志中的前十名

def static_file(file_name):

    res_dict = {}

    with open(file_name) as f:

        for line in f:

            if line == '\n':

                continue

            # ['100.116.x.x', '-', '-', '[08/Feb/2018:14:37:13', '+0800]', '"HEAD',

            # '/check', 'HTTP/1.0"', '200', '0', '"-"', '"-"', '"-"', 'ut', '=', '0.002']

            tmp = line.split()

            # print(tmp)

            tup = (tmp[0],tmp[8])

            # 赋值

            res_dict[tup] = res_dict.get(tup,0) + 1

    return res_dict

def generate_html(rst_list):

    str_html = '<table border="1" cellpading=0 cellspacing=0>'

    str_html += "<tr><th>ip地址</th><th>状态码</th><th>次数</th></tr>"

    html_tmpl = '<tr><td>%s</td><td>%s</td><td>%s</td></tr>'

    for (ip, status),count in rst_list[-20:]:

        str_html += html_tmpl % (ip,status,count)

    str_html += '</table>'

    return str_html

def write_to_html(html_list):

    with open('res.html', 'w') as f:

        f.write(html_list)

def main():

    res_dict = static_file('voice20180208.log')

    res_list = sorted(res_dict.items(), key = lambda x:x[1])

    # html_content = generate_html(res_list[-10:])

    html_content = generate_html(res_list[-1:-20:-1])

    write_to_html(html_content)

if __name__ == "__main__":

    main()