[py]初始化dict结构和json.dump使用

时间:2022-01-22 08:17:44

1.json.dump使用

http://python3-cookbook.readthedocs.io/zh_CN/latest/c06/p02_read-write_json_data.html

- 针对文件
json.dump
json.load

ps:各台机器机器本地计算, os.walk根据目录算出目录下所有文件绝对路径,遍历后汇总成一个数组做处理

2.字典数据结构:使用前先初始化

- d有一级 d = {'name':[]}
d = {}
if d.get('name'):
    d['name'].append('maotai')
else:
    d['name'] = []
- d有两级: d = {'name':{k:[]}}
d = {}
for i in data:
    if d.get(i[0]):
        if d[i[0]].get(i[1]):
            d[i[0]][i[1]].append(i[2])
        else:
            d[i[0]][i[1]] = [i[2]]
    else:
        d[i[0]] = {i[1]: [i[2]]}

较完善的一个例子

import os
import json

# 获取数据
def get_data():
    data = []
    with open('/tmp/res/res.txt') as f:
        for i in f:
            ip = i.split()[0]
            hash_val = i.split()[1]
            file_name = os.path.basename(i.split()[2])
            data.append([file_name, hash_val, ip])
    # print(json.dumps(data))

    # 处理数据
    d = {}
    for i in data:
        if d.get(i[0]):
            if d[i[0]].get(i[1]):
                d[i[0]][i[1]].append(i[2])
            else:
                d[i[0]][i[1]] = [i[2]]
        else:
            d[i[0]] = {i[1]: [i[2]]}
    # print(json.dumps(d))

    # 生成html
    html_tmpl_start = '''
    <table border='1' cellspacing="0">
        <tr>
            <td>file_name</td>
            <td>hash</td>
            <td>ip</td>
        </tr>
    '''

    html_tmpl_file_begin = '<tr>'
    html_tmpl_file = '''
            <td rowspan="%s">%s</td>
    '''

    html_tmpl_hash = '''
            <td>%s</td>
    '''

    html_tmpl_ip = '''
            <td>%s</td>
    '''

    html_tmpl_ip_end = '</tr>'

    html_tmpl_end = '</table>'
    html_content = ''

    # 统计下同一个file有几个hash
    # print(d)

    for k, v in d.items():
        html_tmpl_file_res = ''
        html_tmpl_hash_res = ''
        html_tmpl_ip_res = ''
        flag = 1
        if len(v) > 1:
            # file: k, 看hash
            html_tmpl_file_res = html_tmpl_file % (len(list(v.keys())), k)
            # print(html_tmpl_file_res)
            # hash: k is hash,v is ip,看ip

            for m, n in v.items():
                if flag == 1:

                    html_tmpl_hash_res = html_tmpl_hash % (m)
                    html_tmpl_ip_res = html_tmpl_ip % (n)
                    tmp = html_tmpl_file_begin + html_tmpl_file_res + html_tmpl_hash_res + html_tmpl_ip_res + html_tmpl_ip_end
                    flag += 1

                else:
                    html_tmpl_hash_res = html_tmpl_hash % (m)
                    html_tmpl_ip_res = html_tmpl_ip % (n)

                    tmp = html_tmpl_file_begin + html_tmpl_hash_res + html_tmpl_ip_res + html_tmpl_ip_end
                html_content += tmp

    content = html_tmpl_start + html_content + html_tmpl_end
    return content