import
import pandas
#vocabulary 中不存在的单词计算,切分字符求平均计算余弦相似度
# 本方法,使用wiki word+char模型,计算PKU 500词语相似,在wiki中的词汇
def cha_w2v(s):
res = []
for i in s:
print(i, model[i])
(model[i])
return sum(res)/len(res)
def Cosine(vec1, vec2):
npvec1, npvec2 = (vec1), (vec2)
return (npvec2)/(((npvec1**2).sum()) * ((npvec2**2).sum()))
def word2vec(word1, word2):
import gensim
model = .load_word2vec_format(r'D:\paper\github\baike\', binary=False)
try:
return (word1, word2) # 两者的相似度
except Exception as e:
print(e)
def open_file(path, path1):
result = []
res = []
with open(path, 'r') as f:
result = ()
with open(path1, "r") as f1:
res = ()
# print(result, res)
for i in range(len(result)):
print(word2vec(result[i].replace("\n", ""), res[i].replace("\n", "")))
if __name__ == "__main__":
path = r"D:\three-graduate\github\PKU\test_0.txt"
path1 = r"D:\three-graduate\github\PKU\test_1.txt"
open_file(path, path1)