123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117 |
- """
- @author: luojunhui
- """
- import torch
- import numpy as np
- def score_to_attention(score, symbol=1):
- """
- :param score:
- :param symbol:
- :return:
- """
- score_pred = torch.FloatTensor(score).unsqueeze(0)
- score_norm = symbol * torch.nn.functional.normalize(score_pred, p=2)
- score_attn = torch.nn.functional.softmax(score_norm, dim=1)
- return score_attn, score_norm, score_pred
- class NLPFunction(object):
- """
- NLP Task
- """
- def __init__(self, model):
- self.model = model
- def base_string_similarity(self, text_dict):
- """
- 基础功能,计算两个字符串的相似度
- :param text_dict:
- :return:
- """
- score_tensor = self.model.similarity(
- text_dict['text_a'],
- text_dict['text_b']
- )
- response = {
- "score": score_tensor.squeeze().tolist()
- }
- return response
- def base_list_similarity(self, pair_list_dict):
- """
- 计算两个list的相似度
- :return:
- """
- score_tensor = self.model.similarity(
- pair_list_dict['text_list_a'],
- pair_list_dict['text_list_b']
- )
- response = {
- "score_list_list": score_tensor.tolist()
- }
- return response
- def max_cross_similarity(self, data):
- """
- max
- :param data:
- :return:
- """
- score_list_max = []
- text_list_max = []
- score_array = self.base_list_similarity(data)['score_list_list']
- text_list_a, text_list_b = data['text_list_a'], data['text_list_b']
- for i, row in enumerate(score_array):
- max_index = np.argmax(row)
- max_value = row[max_index]
- score_list_max.append(max_value)
- text_list_max.append(text_list_b[max_index])
- response = {
- 'score_list_max': score_list_max,
- 'text_list_max': text_list_max,
- 'score_list_list': score_array,
- }
- return response
- def mean_cross_similarity(self, data):
- """
- :param data:
- :return:
- """
- resp = self.max_cross_similarity(data)
- score_list_max, text_list_max, score_array = resp['score_list_max'], resp['text_list_max'], resp['score_list_list']
- score_tensor = torch.tensor(score_array)
- score_res = torch.mean(score_tensor, dim=1)
- score_list = score_res.tolist()
- response = {
- 'score_list_mean': score_list,
- 'text_list_max': text_list_max,
- 'score_list_list': score_array,
- }
- return response
- def avg_cross_similarity(self, data):
- """
- :param data:
- :return:
- """
- score_list_b = data['score_list_b']
- symbol = data['symbol']
- # score_list_max, text_list_max, score_array = self.max_cross_similarity(data)
- resp = self.max_cross_similarity(data)
- score_list_max, text_list_max, score_array = resp['score_list_max'], resp['text_list_max'], resp[
- 'score_list_list']
- score_attn, score_norm, score_pred = score_to_attention(score_list_b, symbol=symbol)
- score_tensor = torch.tensor(score_array)
- score_res = torch.matmul(score_tensor, score_attn.transpose(0, 1))
- score_list = score_res.squeeze(-1).tolist()
- response = {
- 'score_list_avg': score_list,
- 'text_list_max': text_list_max,
- 'score_list_list': score_array,
- }
- return response
|