Python统计excel表格中文本的词频，生成词云图片

import xlrd

import jieba

import pymysql

import matplotlib.pylab as plt

from wordcloud import WordCloud

from collections import Counter

import numpy as np

def getExcelData(excel,txt):

    readbook = xlrd.open_workbook(excel)

    sheet = readbook.sheet_by_index(2) #取第二个sheet页

    rows = sheet.nrows

    i = 0

    while i < rows:

        txt += sheet.cell(i, 2).value #取第三列的值

        i += 1

    seg_list = jieba.cut(txt)

    c = Counter()

    result = {}

    for x in seg_list:

        if len(x) > 1 and x != '\r\n':

            c[x] += 1

    for (k, v) in c.most_common():

        result[k] = v #放到字典中，用于生成词云的源数据

    return result

def makeWordCloud(txt):

    x, y = np.ogrid[:300, :500]

    mask = (x - 150) ** 2 + (y - 150) ** 2 > 150 ** 2

    mask = 255 * mask.astype(int)

    wc = WordCloud(background_color="white",

                    max_words=500,

                    mask=mask,

                    repeat=True,

                    width=1000,

                    height=1000,

                    scale=4, #这个数值越大，产生的图片分辨率越高，字迹越清晰

                    font_path="C:\Windows\Fonts\STXINGKA.TTF")

    wc.generate_from_frequencies(txt)

    wc.to_file('abc.png')

    plt.axis("off")

    plt.imshow(wc, interpolation="bilinear")

    plt.show()

if __name__ == '__main__':

    txt = ''

    makeWordCloud(getExcelData('getdata.xlsx', txt))