1 year ago · c470c12ff6
--- a/applications/textSimilarity.py
+++ b/applications/textSimilarity.py
@@ -0,0 +1,92 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
 
				+import torch
			
 
				+import numpy as np
			
 
				+from similarities import BertSimilarity
			
 
				+
			
 
				+model = BertSimilarity(model_name_or_path="BAAI/bge-large-zh-v1.5")
			
 
				+
			
 
				+
			
 
				+bge_large_zh_v1_5 = 'bge_large_zh_v1_5'
			
 
				+text2vec_base_chinese = "text2vec_base_chinese"
			
 
				+text2vec_bge_large_chinese = "text2vec_bge_large_chinese"
			
 
				+
			
 
				+
			
 
				+def get_sim_score_by_pair(model, pair):
			
 
				+    try:
			
 
				+        score_tensor = model.similarity(pair['text_a'], pair['text_b'])
			
 
				+        return score_tensor.squeeze().tolist()
			
 
				+    except Exception as e:
			
 
				+        raise
			
 
				+
			
 
				+
			
 
				+def get_sim_score_by_pair_list(model, pair_list):
			
 
				+    try:
			
 
				+        res = [get_sim_score_by_pair(model, pair) for pair in pair_list['text_pair_list']]
			
 
				+        return res
			
 
				+    except Exception as e:
			
 
				+        raise
			
 
				+
			
 
				+
			
 
				+def get_sim_score_by_list_pair(model, list_pair):
			
 
				+    try:
			
 
				+        score_tensor = model.similarity(list_pair['text_list_a'], list_pair['text_list_b'])
			
 
				+        return score_tensor.tolist()
			
 
				+    except Exception as e:
			
 
				+        raise
			
 
				+
			
 
				+
			
 
				+def get_sim_score_max(model, data):
			
 
				+    try:
			
 
				+        score_list_max = []
			
 
				+        text_list_max = []
			
 
				+        score_array = get_sim_score_by_list_pair(model, data)
			
 
				+        text_list_a, text_list_b = data['text_list_a'], data['text_list_b']
			
 
				+        for i, row in enumerate(score_array):
			
 
				+            max_index = np.argmax(row)
			
 
				+            max_value = row[max_index]
			
 
				+            score_list_max.append(max_value)
			
 
				+            text_list_max.append(text_list_b[max_index])
			
 
				+        return score_list_max, text_list_max, score_array
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"Error in get_sim_score_max: {e}")
			
 
				+        raise
			
 
				+
			
 
				+
			
 
				+def score_to_attention(score, symbol=1):
			
 
				+    try:
			
 
				+        score_pred = torch.FloatTensor(score).unsqueeze(0)
			
 
				+        score_norm = symbol * torch.nn.functional.normalize(score_pred, p=2, dim=1)
			
 
				+        score_attn = torch.nn.functional.softmax(score_norm, dim=1)
			
 
				+        return score_attn, score_norm, score_pred
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"Error in score_to_attention: {e}")
			
 
				+        raise
			
 
				+
			
 
				+
			
 
				+def get_sim_score_avg(model, data):
			
 
				+    try:
			
 
				+        text_list_a, text_list_b = data['text_list_a'], data['text_list_b']
			
 
				+        score_list_b, symbol = data['score_list_b'], data['symbol']
			
 
				+        score_list_max, text_list_max, score_array = get_sim_score_max(model, data)
			
 
				+        score_attn, score_norm, score_pred = score_to_attention(score_list_b, symbol=symbol)
			
 
				+        score_tensor = torch.tensor(score_array)
			
 
				+        score_res = torch.matmul(score_tensor, score_attn.transpose(0, 1))
			
 
				+        score_list = score_res.squeeze(-1).tolist()
			
 
				+        return score_list, text_list_max, score_array
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"Error in get_sim_score_avg: {e}")
			
 
				+        raise
			
 
				+
			
 
				+
			
 
				+def get_sim_score_mean(model, data):
			
 
				+    try:
			
 
				+        text_list_a, text_list_b = data['text_list_a'], data['text_list_b']
			
 
				+        score_list_max, text_list_max, score_array = get_sim_score_max(model, data)
			
 
				+        score_tensor = torch.tensor(score_array)
			
 
				+        score_res = torch.mean(score_tensor, dim=1)
			
 
				+        score_list = score_res.tolist()
			
 
				+        return score_list, text_list_max, score_array
			
 
				+    except Exception as e:
			
 
				+        raise
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,27 @@
 
				+aiofiles
			
 
				+aiohttp
			
 
				+aiomysql
			
 
				+aiosignal
			
 
				+alembic
			
 
				+aliyun-log-python-sdk
			
 
				+black
			
 
				+httpx
			
 
				+huggingface-hub
			
 
				+hypercorn
			
 
				+matplotlib
			
 
				+numpy
			
 
				+odps
			
 
				+openai
			
 
				+openpyxl
			
 
				+optuna
			
 
				+packaging
			
 
				+pandas
			
 
				+pymysql
			
 
				+pyodps
			
 
				+quart
			
 
				+requests
			
 
				+scipy
			
 
				+selenium
			
 
				+torch
			
 
				+tqdm
			
 
				+transformers
			
--- a/routes/nlpServer.py
+++ b/routes/nlpServer.py
@@ -0,0 +1,83 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
 
				+from typing import List
			
 
				+from pydantic import BaseModel
			
 
				+from similarities import BertSimilarity
			
 
				+import numpy as np
			
 
				+import torch
			
 
				+import logging
			
 
				+
			
 
				+
			
 
				+class NLPServer(object):
			
 
				+    """
			
 
				+    nlp_server
			
 
				+    """
			
 
				+    def __init__(self, params):
			
 
				+        """
			
 
				+        :param params:
			
 
				+        """
			
 
				+        self.model = None
			
 
				+        self.function = None
			
 
				+        self.text_02 = None
			
 
				+        self.text_01 = None
			
 
				+        self.params = params
			
 
				+
			
 
				+    def check_params(self):
			
 
				+        """
			
 
				+        参数校验
			
 
				+        :return:
			
 
				+        """
			
 
				+        try:
			
 
				+            self.text_01 = self.params['text_01']
			
 
				+            self.text_02 = self.params['text_02']
			
 
				+            self.function = self.params['function']
			
 
				+            self.model = self.params['model']
			
 
				+            return None
			
 
				+        except Exception as e:
			
 
				+            error_info = {
			
 
				+                "error": "params error",
			
 
				+                "detail": str(e)
			
 
				+            }
			
 
				+            return error_info
			
 
				+
			
 
				+    def choose_function(self):
			
 
				+        """
			
 
				+        :return:
			
 
				+        """
			
 
				+        match self.function:
			
 
				+            case "similarities":
			
 
				+                return
			
 
				+            case "similarities_cross":
			
 
				+                return
			
 
				+            case "similarities_cross_max":
			
 
				+                return
			
 
				+            case "similarities_cross_avg":
			
 
				+                return
			
 
				+            case "similarities_cross_mean":
			
 
				+                return
			
 
				+
			
 
				+    def base_similarity(self):
			
 
				+        """
			
 
				+        base similarity
			
 
				+        :return:
			
 
				+        """
			
 
				+        try:
			
 
				+
			
 
				+            res = {
			
 
				+                'score_list': []
			
 
				+            }
			
 
				+            return res
			
 
				+        except Exception as e:
			
 
				+            return {"error": str(e)}
			
 
				+
			
 
				+    def deal(self):
			
 
				+        """
			
 
				+        deal function
			
 
				+        :return:
			
 
				+        """
			
 
				+        return self.check_params if self.check_params else self.choose_function
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/test/rank_dev.py
+++ b/test/rank_dev.py