通过python操作GeoLite2-City.mmdb库将nginx日志访问IP转换为城市写入数据库

时间:2024-04-02 14:40:31

通过python操作GeoLite2-City.mmdb库将nginx日志写入数据库

# 创建存放nginx日志的表accesslog2

CREATE TABLE `accesslog2` (
  `id` bigint(20) NOT NULL AUTO_INCREMENT,
  `logtime` datetime DEFAULT NULL,
  `ip` varchar(128) DEFAULT NULL,
  `url` text,
  `status` int(11) DEFAULT NULL,
  `lat` float DEFAULT NULL,
  `lng` float DEFAULT NULL,
  `city` varchar(50) DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=896 DEFAULT CHARSET=utf8;

# 安装geoip2模块
# sudo pip2 install geoip2

#encoding=utf-8

import time
import geoip2.database

from dbutils import MysqlConnection

# 找出ip所在城市的日志处理
def log_2db_4map(log_file):
    # 清空数据库信息
    MysqlConnection.execute_sql('truncate table accesslog2')
    path=log_file
    shandle = open(path, 'r')
    log_dict = {}

    # 读取ip数据库
    reader = geoip2.database.Reader('GeoLite2-City.mmdb')

    while True:
        line = shandle.readline()
        if line == '':
            break
        _nodes = line.split()
        
    # 访问url,来访ip,http状态码,访问时间
        _url,_ip,_status,_lgtime = _nodes[6], _nodes[0], _nodes[8],_nodes[3][1:]

        # 将日志访问的时间"22/Oct/2017:03:28:01"转成 2017-11-23 10:08:18 类似的格式
        _ltime = time.strftime('%Y-%m-%d %H:%M:%S',time.strptime(_lgtime,'%d/%b/%Y:%H:%M:%S'))
        # 获取城市信息
        try:
            response = reader.city(_ip)
            # 如果国家不是中国跳出本次循环
            if 'China' != response.country.name:
                continue
            # 获取城市
            _city = response.city.names.get('zh-CN','')
            if _city == '':
                print 'ip: %s city is empty' % _ip
                continue
            # 获取经度和纬度
            _lat = response.location.latitude
            _lng = response.location.longitude
            # print response
        except Exception as e:
            print 'goe has not %s info' % _ip
        
        _args = (_ltime,_ip,_url,_status, _lat,_lng,_city)
    # 插入数据库语句
        sql = 'insert into accesslog2(logtime, ip, url,status,lat,lng,city) values(%s, %s, %s,%s,%s,%s,%s)'
        MysqlConnection.execute_sql(sql, _args)
        
    # 关闭文件句柄
    shandle.close()

# 文件入口
if __name__ == '__main__':
    # nginx日志文件
    log_file = 'www_access.log'
    rt_list = log_2db_4map(log_file = log_file)

连接数据库和操作数据库的底层模块参考:
python操作mysql数据库增删改查的dbutils实例
http://www.cnblogs.com/reblue520/p/7884365.html

通过python操作GeoLite2-City.mmdb库将nginx日志访问IP转换为城市写入数据库