textSimilarity.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. """
  2. @author: luojunhui
  3. """
  4. import time
  5. import torch
  6. import numpy as np
  7. from similarities import BertSimilarity
  8. # bge_large_zh_v1_5 = 'bge_large_zh_v1_5'
  9. # text2vec_base_chinese = "text2vec_base_chinese"
  10. # text2vec_bge_large_chinese = "text2vec_bge_large_chinese"
  11. class NLPFunction(object):
  12. """
  13. NLP Task
  14. """
  15. def __init__(self, model):
  16. self.model = model
  17. def base_string_similarity(self, text_dict):
  18. """
  19. 基础功能,计算两个字符串的相似度
  20. :param text_dict:
  21. :return:
  22. """
  23. score_tensor = self.model.similarity(
  24. text_dict['text_a'],
  25. text_dict['text_b']
  26. )
  27. return score_tensor.squeeze().tolist()
  28. def base_list_similarity(self, pair_list_dict):
  29. """
  30. 计算两个list的相似度
  31. :return: "score_list_b": [100, 1000, 500, 40],
  32. """
  33. score_tensor = self.model.similarity(
  34. pair_list_dict['text_list_a'],
  35. pair_list_dict['text_list_b']
  36. )
  37. return score_tensor.tolist()
  38. if __name__ == '__main__':
  39. a = time.time()
  40. m = BertSimilarity(model_name_or_path="BAAI/bge-large-zh-v1.5")
  41. b = time.time()
  42. print("模型加载时间:\t", b - a)
  43. NF = NLPFunction(m)
  44. td = {
  45. "text_a": "王者荣耀",
  46. "text_b": "斗罗大陆"
  47. }
  48. tld = {
  49. "text_list_a": ["凯旋", "圣洁", "篮球"],
  50. "text_list_b": ["胜利", "纯洁", "足球"]
  51. }
  52. # res = NF.base_string_similarity(text_dict=td)
  53. res = NF.base_list_similarity(pair_list_dict=tld)
  54. c = time.time()
  55. print("计算时间:\t", c - b)
  56. for i in res:
  57. print(i)