word_ngram 模型使用小tip

import

import pandas

#vocabulary 中不存在的单词计算，切分字符求平均计算余弦相似度 

# 本方法，使用wiki word+char模型，计算PKU 500词语相似，在wiki中的词汇

def cha_w2v(s):

res = []

for i in s:

print(i, model[i])

(model[i])

    return sum(res)/len(res)

def Cosine(vec1, vec2):

npvec1, npvec2 = (vec1), (vec2)

    return (npvec2)/(((npvec1**2).sum()) * ((npvec2**2).sum()))

def word2vec(word1, word2):

import gensim

model = .load_word2vec_format(r'D:\paper\github\baike\', binary=False)

try:

return (word1, word2) # 两者的相似度

except Exception as e:

print(e)

def open_file(path, path1):

result = []

res = []

    with open(path, 'r') as f:

result = ()

    with open(path1, "r") as f1:

res = ()

# print(result, res)

    for i in range(len(result)):

        print(word2vec(result[i].replace("\n", ""), res[i].replace("\n", "")))

if __name__ == "__main__":

path = r"D:\three-graduate\github\PKU\test_0.txt"

path1 = r"D:\three-graduate\github\PKU\test_1.txt"

open_file(path, path1)

秒客网