统计词语频率保存到xls

时间:2023-07-06 08:32:02
import json
import jieba.analyse as anl
import xlwt # 获取待统计的文本内容
# 打开文件
f = open('zhilian.json', 'r', encoding='utf-8')
ans_data = ''
for index in range(1867):
data = f.readline().replace('},', '}')
dict = json.loads(data)
ans_data += dict['job_content']
# 关闭文件
f.close() # xls的相关操作
# 新建文件,后面save
workbook=xlwt.Workbook(encoding='ascii')
# 新建sheet表
worksheet=workbook.add_sheet('python招聘分词') # jieba分词统计ans_data文本中,分词后的频率
seg = anl.extract_tags(ans_data, topK=150, withWeight=True)
index = 0
for tag, weight in seg:
print("%-20s:%3s %-8s" % (weight, index, tag))
# 写入xls的单元格
worksheet.write(index, 0, label=index + 1)
worksheet.write(index, 1, label=tag)
worksheet.write(index, 2, label=weight)
index += 1
# 保存xls文件
workbook.save('python招聘分词统计.xls')