python计算中文文本相似度神器 sentence_transformer、text2vec

时间:2025-05-08 07:23:38
import sys from sentence_transformers.util import cos_sim from sentence_transformers import SentenceTransformer as SBert #model = SBert('paraphrase-multilingual-MiniLM-L12-v2') #如果这调用模型有问题,需自行下载,该模型 , #[下载网址](/reimers/sentence-transformers/v0.2/) model = SBert("C:\\Users\xxxx\Downloads\\paraphrase-multilingual-MiniLM-L12-v2") # Two lists of sentences sentences1 = ['如何更换花呗绑定银行卡', 'The cat sits outside', 'A man is playing guitar', 'The new movie is awesome'] sentences2 = ['花呗更改绑定银行卡', 'The dog plays in the garden', 'A woman watches TV', 'The new movie is so great'] # Compute embedding for both lists embeddings1 = model.encode(sentences1) embeddings2 = model.encode(sentences2) # Compute cosine-similarits cosine_scores = cos_sim(embeddings1, embeddings2) cosine_scores