Browse Source

增加requirements.txt

罗俊辉 10 months ago
parent
commit
c470c12ff6
4 changed files with 206 additions and 14 deletions
  1. 92 0
      applications/textSimilarity.py
  2. 27 0
      requirements.txt
  3. 83 0
      routes/nlpServer.py
  4. 4 14
      test/rank_dev.py

+ 92 - 0
applications/textSimilarity.py

@@ -0,0 +1,92 @@
+"""
+@author: luojunhui
+"""
+import torch
+import numpy as np
+from similarities import BertSimilarity
+
+model = BertSimilarity(model_name_or_path="BAAI/bge-large-zh-v1.5")
+
+
+bge_large_zh_v1_5 = 'bge_large_zh_v1_5'
+text2vec_base_chinese = "text2vec_base_chinese"
+text2vec_bge_large_chinese = "text2vec_bge_large_chinese"
+
+
+def get_sim_score_by_pair(model, pair):
+    try:
+        score_tensor = model.similarity(pair['text_a'], pair['text_b'])
+        return score_tensor.squeeze().tolist()
+    except Exception as e:
+        raise
+
+
+def get_sim_score_by_pair_list(model, pair_list):
+    try:
+        res = [get_sim_score_by_pair(model, pair) for pair in pair_list['text_pair_list']]
+        return res
+    except Exception as e:
+        raise
+
+
+def get_sim_score_by_list_pair(model, list_pair):
+    try:
+        score_tensor = model.similarity(list_pair['text_list_a'], list_pair['text_list_b'])
+        return score_tensor.tolist()
+    except Exception as e:
+        raise
+
+
+def get_sim_score_max(model, data):
+    try:
+        score_list_max = []
+        text_list_max = []
+        score_array = get_sim_score_by_list_pair(model, data)
+        text_list_a, text_list_b = data['text_list_a'], data['text_list_b']
+        for i, row in enumerate(score_array):
+            max_index = np.argmax(row)
+            max_value = row[max_index]
+            score_list_max.append(max_value)
+            text_list_max.append(text_list_b[max_index])
+        return score_list_max, text_list_max, score_array
+    except Exception as e:
+        logger.error(f"Error in get_sim_score_max: {e}")
+        raise
+
+
+def score_to_attention(score, symbol=1):
+    try:
+        score_pred = torch.FloatTensor(score).unsqueeze(0)
+        score_norm = symbol * torch.nn.functional.normalize(score_pred, p=2, dim=1)
+        score_attn = torch.nn.functional.softmax(score_norm, dim=1)
+        return score_attn, score_norm, score_pred
+    except Exception as e:
+        logger.error(f"Error in score_to_attention: {e}")
+        raise
+
+
+def get_sim_score_avg(model, data):
+    try:
+        text_list_a, text_list_b = data['text_list_a'], data['text_list_b']
+        score_list_b, symbol = data['score_list_b'], data['symbol']
+        score_list_max, text_list_max, score_array = get_sim_score_max(model, data)
+        score_attn, score_norm, score_pred = score_to_attention(score_list_b, symbol=symbol)
+        score_tensor = torch.tensor(score_array)
+        score_res = torch.matmul(score_tensor, score_attn.transpose(0, 1))
+        score_list = score_res.squeeze(-1).tolist()
+        return score_list, text_list_max, score_array
+    except Exception as e:
+        logger.error(f"Error in get_sim_score_avg: {e}")
+        raise
+
+
+def get_sim_score_mean(model, data):
+    try:
+        text_list_a, text_list_b = data['text_list_a'], data['text_list_b']
+        score_list_max, text_list_max, score_array = get_sim_score_max(model, data)
+        score_tensor = torch.tensor(score_array)
+        score_res = torch.mean(score_tensor, dim=1)
+        score_list = score_res.tolist()
+        return score_list, text_list_max, score_array
+    except Exception as e:
+        raise

+ 27 - 0
requirements.txt

@@ -0,0 +1,27 @@
+aiofiles
+aiohttp
+aiomysql
+aiosignal
+alembic
+aliyun-log-python-sdk
+black
+httpx
+huggingface-hub
+hypercorn
+matplotlib
+numpy
+odps
+openai
+openpyxl
+optuna
+packaging
+pandas
+pymysql
+pyodps
+quart
+requests
+scipy
+selenium
+torch
+tqdm
+transformers

+ 83 - 0
routes/nlpServer.py

@@ -0,0 +1,83 @@
+"""
+@author: luojunhui
+"""
+from typing import List
+from pydantic import BaseModel
+from similarities import BertSimilarity
+import numpy as np
+import torch
+import logging
+
+
+class NLPServer(object):
+    """
+    nlp_server
+    """
+    def __init__(self, params):
+        """
+        :param params:
+        """
+        self.model = None
+        self.function = None
+        self.text_02 = None
+        self.text_01 = None
+        self.params = params
+
+    def check_params(self):
+        """
+        参数校验
+        :return:
+        """
+        try:
+            self.text_01 = self.params['text_01']
+            self.text_02 = self.params['text_02']
+            self.function = self.params['function']
+            self.model = self.params['model']
+            return None
+        except Exception as e:
+            error_info = {
+                "error": "params error",
+                "detail": str(e)
+            }
+            return error_info
+
+    def choose_function(self):
+        """
+        :return:
+        """
+        match self.function:
+            case "similarities":
+                return
+            case "similarities_cross":
+                return
+            case "similarities_cross_max":
+                return
+            case "similarities_cross_avg":
+                return
+            case "similarities_cross_mean":
+                return
+
+    def base_similarity(self):
+        """
+        base similarity
+        :return:
+        """
+        try:
+
+            res = {
+                'score_list': []
+            }
+            return res
+        except Exception as e:
+            return {"error": str(e)}
+
+    def deal(self):
+        """
+        deal function
+        :return:
+        """
+        return self.check_params if self.check_params else self.choose_function
+
+
+
+

File diff suppressed because it is too large
+ 4 - 14
test/rank_dev.py


Some files were not shown because too many files changed in this diff