| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 |
- """
- @author: luojunhui
- """
- import time
- import torch
- import numpy as np
- from similarities import BertSimilarity
- # bge_large_zh_v1_5 = 'bge_large_zh_v1_5'
- # text2vec_base_chinese = "text2vec_base_chinese"
- # text2vec_bge_large_chinese = "text2vec_bge_large_chinese"
- class NLPFunction(object):
- """
- NLP Task
- """
- def __init__(self, model):
- self.model = model
- def base_string_similarity(self, text_dict):
- """
- 基础功能,计算两个字符串的相似度
- :param text_dict:
- :return:
- """
- score_tensor = self.model.similarity(
- text_dict['text_a'],
- text_dict['text_b']
- )
- return score_tensor.squeeze().tolist()
- def base_list_similarity(self, pair_list_dict):
- """
- 计算两个list的相似度
- :return: "score_list_b": [100, 1000, 500, 40],
- """
- score_tensor = self.model.similarity(
- pair_list_dict['text_list_a'],
- pair_list_dict['text_list_b']
- )
- return score_tensor.tolist()
- if __name__ == '__main__':
- a = time.time()
- m = BertSimilarity(model_name_or_path="BAAI/bge-large-zh-v1.5")
- b = time.time()
- print("模型加载时间:\t", b - a)
- NF = NLPFunction(m)
- td = {
- "text_a": "王者荣耀",
- "text_b": "斗罗大陆"
- }
- tld = {
- "text_list_a": ["凯旋", "圣洁", "篮球"],
- "text_list_b": ["胜利", "纯洁", "足球"]
- }
- # res = NF.base_string_similarity(text_dict=td)
- res = NF.base_list_similarity(pair_list_dict=tld)
- c = time.time()
- print("计算时间:\t", c - b)
- for i in res:
- print(i)
|