python 解析nginx 日志 url

时间:2024-01-03 14:06:20

>>> import os
>>> os.chdir('e:/')
>>> log=open('access.log')//这两行是获取日志流
>>> ip = r"?P<ip>[\d.]*"
>>> date = r"?P<date>\d+"
>>> month = r"?P<month>\w+"
>>> year = r"?P<year>\d+"
>>> log_time = r"?P<time>\S+"
>>> method = r"?P<method>\S+"
>>> request = r"?P<request>\S+"
>>> status = r"?P<status>\d+"
>>> bodyBytesSent = r"?P<bodyBytesSent>\d+"
>>> refer = r"""?P<refer>
[^\"]*
"""
>>> userAgent=r"""?P<userAgent>
.*
"""

>>>out=open('text.txt') //获取匹配的url 写到文件

>>>p = re.compile(r"(%s)\ -\ -\ \[(%s)/(%s)/(%s)\:(%s)\ [\S]+\]\ \"(%s)?[\s]?(%s)?.*?\"\ (%s)\ (%s)\ \"(%s)\"\ \"(%s).*?\"" %( ip, date, month, year, log_time, method, request, status, bodyBytesSent, refer, userAgent ), re.VERBOSE)

>>> for line in log:
m = re.findall(p, line)
print(m,file=out)