2 天之前 · 92991f1f55
--- a/evaluate_agent_v2.py
+++ b/evaluate_agent_v2.py
@@ -9,10 +9,10 @@ from openai import OpenAI
 
				 from typing import List, Dict
			
 
				 from pymysql.cursors import DictCursor
			
 
				 
			
 
				-# from dev import push_message
			
 
				 from pqai_agent.database import MySQLManager
			
 
				 from pqai_agent.logging_service import logger
			
 
				 from pqai_agent import configs, logging_service
			
 
				+from pqai_agent.utils import prompt_utils
			
 
				 from pqai_agent_server.utils.prompt_util import format_dialogue_history
			
 
				 
			
 
				 logging_service.setup_root_logger()
			
@@ -48,7 +48,8 @@ def fetch_deepseek_completion(prompt, output_type="text"):
 
				         model='qwen3-235b-a22b',
			
 
				         response_format=response_format,
			
 
				         stream=False,
			
 
				-        extra_body={"enable_thinking": False}
			
 
				+        extra_body={"enable_thinking": False},
			
 
				+        temperature=0.2
			
 
				     )
			
 
				     response = chat_completion.choices[0].message.content
			
 
				     if output_type == "json":
			
@@ -125,23 +126,22 @@ output_dict =   {
 
				         }
			
 
				 
			
 
				 def generate_prompt(dialogue_history: str, message: str,
			
 
				-                    send_time: str, user_profile: Dict, agent_profile: Dict) -> str:
			
 
				+                    send_time: str, user_profile: str, agent_profile: str) -> str:
			
 
				     """
			
 
				     生成评估prompt
			
 
				     :return: prompt
			
 
				     """
			
 
				     prompt = f"""
			
 
				 ## 评估任务说明
			
 
				-当 客服与用户长时间无互动时，客服会主动推送 message 以维系联系。  
			
 
				-请根据输入信息，对该 message 按下列维度逐项打分。
			
 
				-
			
 
				-输入字段：
			
 
				+你是一个专业的语言学专家，你需要完成一项语言评估任务。
			
 
				+该任务的背景为：当客服与用户长时间无互动时，客服会主动推送内容尝试开启互动对话。
			
 
				+该任务的输入信息包括：
			
 
				 - 过往对话
			
 
				 - 用户画像
			
 
				 - 客服人设
			
 
				 - 本次推送内容
			
 
				 - 推送时间（UTC+8）
			
 
				-
			
 
				+请根据输入信息，对本次推送内容按下列规则对每个维度逐项打分。
			
 
				 评分规则：
			
 
				 - 每个 **子指标** 只取 0 或 1 分。  
			
 
				   1 分：满足判分要点，或该项“无需评估”  
			
@@ -240,8 +240,9 @@ def generate_prompt(dialogue_history: str, message: str,
 
				 5.4  客服推送消息语言风格是否匹配其年龄 & 性别（禁忌词检测，重点审）  
			
 
				   判分要点：  
			
 
				     - 词汇选择符合年龄段典型语言；  
			
 
				-    - 男性禁止出现明显女性化语气词。比如说：呢、啦、呀、宝子、yyds；  
			
 
				-    - 45+ 及以上避免“冲鸭”“绝绝子”“yyds”等新潮词；  
			
 
				+    - 男性客服禁止出现明显女性化语气词，绝对禁止出现：呢、啦、呀、宝子、yyds等女性化用词！
			
 
				+    - 男性客服禁止出现“～”等女性标点符号！
			
 
				+    - 45+及以上避免“冲鸭”“绝绝子”“yyds”等新潮词；  
			
 
				     - 青年男性应简洁直接，可偶用“哈哈”“酷”；青年女性可用“呀”“哦”；  
			
 
				     - 不出现与性别、年龄严重背离的口头禅
			
 
				   正例：  
			
@@ -344,6 +345,12 @@ value 也是一个JSON，包含两个 key：score 和 reason，分别代表分
 
				 {message}
			
 
				 ### 推送时间
			
 
				 {send_time}
			
 
				+
			
 
				+## 特别注意
			
 
				+* 请严格按照上述输出格式输出，不要输出任何额外的内容
			
 
				+* 请务必注意禁止出现的情况，不要做出相反的评分！
			
 
				+
			
 
				+现在，请开始评估。
			
 
				 """
			
 
				     return prompt
			
 
				 
			
@@ -377,18 +384,6 @@ class PushMessageEvaluator(AgentEvaluator):
 
				         return history_conversation
			
 
				 
			
 
				     def evaluate_task(self, line):
			
 
				-        # staff_id = line['staff_id']
			
 
				-        # user_id = line['user_id']
			
 
				-        # conversation_id_list = json.loads(line['conversation'])
			
 
				-        # push_message = line['content']
			
 
				-        # send_time = line['send_time']
			
 
				-        # send_date_str = datetime.datetime.fromtimestamp(send_time).strftime('%Y-%m-%d %H:%M:%S')
			
 
				-        # dialogue_list = self.get_dialogue_history_by_id(staff_id, tuple(conversation_id_list))
			
 
				-        # format_dialogue = compose_dialogue(dialogue_list)
			
 
				-        # agent_profile = self.get_profile_info(staff_id, "staff")[0]['profile']
			
 
				-        # agent_profile = json.loads(agent_profile)
			
 
				-        # user_profile = self.get_profile_info(user_id, "user")[0]['profile']
			
 
				-        # user_profile = json.loads(user_profile)
			
 
				         user_profile = line["user_profile"]
			
 
				         agent_profile = line["agent_profile"]
			
 
				         send_date_str = line["push_time"]
			
@@ -398,10 +393,10 @@ class PushMessageEvaluator(AgentEvaluator):
 
				             dialogue_history=format_dialogue,
			
 
				             message=push_message,
			
 
				             send_time=send_date_str,
			
 
				-            agent_profile=agent_profile,
			
 
				-            user_profile=user_profile,
			
 
				+            agent_profile=prompt_utils.format_agent_profile(agent_profile),
			
 
				+            user_profile=prompt_utils.format_user_profile(user_profile),
			
 
				         )
			
 
				-        print(evaluator_prompt)
			
 
				+        # print(len(evaluator_prompt))
			
 
				         response = fetch_deepseek_completion(evaluator_prompt, output_type='json')
			
 
				         return {
			
 
				             "user_profile": user_profile,
			
@@ -419,7 +414,7 @@ class PushMessageEvaluator(AgentEvaluator):
 
				             data = json.loads(f.read())
			
 
				 
			
 
				         samples = random.sample(data, 48)
			
 
				-        samples = [i for i in data if i['push_message'] == '文芝阿姨，晚上好呀！今天有没有抽空做做颈部拉伸运动或者热敷一下颈椎呢？这些小方法对缓解头晕很有帮助哦~']
			
 
				+        # samples = [i for i in data if i['push_message'] == '文芝阿姨，晚上好呀！今天有没有抽空做做颈部拉伸运动或者热敷一下颈椎呢？这些小方法对缓解头晕很有帮助哦~']
			
 
				 
			
 
				         from concurrent.futures import ThreadPoolExecutor
			
 
				         from tqdm import tqdm
			
@@ -434,18 +429,19 @@ class PushMessageEvaluator(AgentEvaluator):
 
				             for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
			
 
				                 result = future.result()
			
 
				                 if result:
			
 
				-                    print(json.dumps(result, ensure_ascii=False, indent=4))
			
 
				+                    # print(json.dumps(result, ensure_ascii=False, indent=4))
			
 
				                     L.append(result)
			
 
				+
			
 
				         # for line in tqdm(data):
			
 
				         #     response = self.evaluate_task(line)
			
 
				         #     print("\n")
			
 
				         #     print(json.dumps(response, ensure_ascii=False, indent=4))
			
 
				         #     if response:
			
 
				         #         L.append(response)
			
 
				-        # #
			
 
				+        #
			
 
				         # 保存结果（与原代码相同）
			
 
				-        # with open("test_0618_v3.json", "w", encoding="utf-8") as f:
			
 
				-        #     json.dump(L, f, ensure_ascii=False, indent=4)
			
 
				+        with open("test_0618_qw_v2.json", "w", encoding="utf-8") as f:
			
 
				+            json.dump(L, f, ensure_ascii=False, indent=4)
			
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
--- a/log.txt
+++ b/log.txt
--- a/pqai_agent/agents/evaluate_agent.py
+++ b/pqai_agent/agents/evaluate_agent.py
@@ -1,7 +1,6 @@
 
				 import datetime
			
 
				 from typing import Optional, List, Dict
			
 
				 
			
 
				-from dev import dialogue
			
 
				 from pqai_agent.agents.simple_chat_agent import SimpleOpenAICompatibleChatAgent
			
 
				 from pqai_agent.chat_service import VOLCENGINE_MODEL_DEEPSEEK_V3
			
 
				 from pqai_agent.logging_service import logger