统计nginx单个IP访问日志并获取IP来源

时间:2023-03-10 05:22:30
统计nginx单个IP访问日志并获取IP来源
#!/usr/bin/env python
#coding=utf-8
import requests
from urllib2 import urlopen
# import lxml.html
from bs4 import BeautifulSoup
import subprocess
import sys,os
reload(sys)
sys.setdefaultencoding('utf-8')
# log file
logfile = "./access.log"
#result = "/tmp/result"
# ip white list
white_list = "8.8.8.8"
# notify list
mail_list = "xxx@sina.com xxx@xxx.com"
cmd = """
awk '{ip[$1]++}END{for (k in ip){print ip[k],k}}' %s | sort -rn | head -20 | grep -v %s
""" %(logfile, white_list)
send_mail = 'mail -s "Walrus Rest Log Statistics" %s < /tmp/check' %mail_list
label = "div"
item = "class"
reg = "well" def get_source(ip):
url = "http://ip.chinaz.com/%s" %ip
# BeautifulSoup parser
html = urlopen(url)
bsObj = BeautifulSoup(html,"html.parser")
Check_result = bsObj.findAll("span",{"class":"Whwtdhalf w50-0"})
# get source
for i in Check_result:
if "IP" in i.get_text():
pass
else:
ip_source = i.get_text()
return ip_source
def run_cmd():
f = open("/tmp/check", 'w')
# start command
command_result = subprocess.Popen(cmd,shell=True,stdin=subprocess.PIPE,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
#temp = command_result.stdout.read()
for i in command_result.stdout:
ip = i.split()[1]
source = get_source(ip)
char = "%s\t%s\n" %(i,source)
f.write(char)
f.close()
os.system(send_mail)
return if __name__ == '__main__':
run_cmd()