luojunhui hai 2 días
pai
achega
744e9ad636
Modificáronse 1 ficheiros con 10 adicións e 23 borrados
  1. 10 23
      scripts/evaluate_agent.py

+ 10 - 23
scripts/evaluate_agent.py

@@ -451,7 +451,7 @@ def fetch_llm_completion(prompt, output_type="text"):
     return response
 
 
-def evaluate_push_agent(task):
+def evaluate_agent(task, task_type):
     context = {
         "output_dict": {
             "1.1": {"score": 1, "reason": "识别到用户焦虑并先安抚"},
@@ -460,30 +460,17 @@ def evaluate_push_agent(task):
             "7.5": {"score": 1, "reason": "2025-05-28 发端午祝福;端午=2025-05-31"},
         },
         "dialogue_history": format_dialogue_history(task["dialogue_history"]),
-        "message": task["push_message"],
-        "send_time": task["push_time"],
+        "message": task["message"],
+        "send_time": task["send_time"],
         "agent_profile": format_agent_profile(task["agent_profile"]),
         "user_profile": format_user_profile(task["user_profile"]),
     }
-    evaluate_prompt = PUSH_MESSAGE_EVALUATE_PROMPT.format(**context)
-    response = fetch_llm_completion(evaluate_prompt, output_type="json")
-    return response
-
-
-def evaluate_reply_agent(task):
-    context = {
-        "output_dict": {
-            "1.1": {"score": 1, "reason": "识别到用户焦虑并先安抚"},
-            "2.1": {"score": 0, "reason": "跳过健康话题改聊理财"},
-            "5.4": {"score": 1, "reason": "青年男性用词简洁,无女性化词汇"},
-            "7.5": {"score": 1, "reason": "2025-05-28 发端午祝福;端午=2025-05-31"},
-        },
-        "dialogue_history": format_dialogue_history(task["dialogue_history"]),
-        "message": task["reply_message"],
-        "send_time": task["reply_time"],
-        "agent_profile": format_agent_profile(task["agent_profile"]),
-        "user_profile": format_user_profile(task["user_profile"]),
-    }
-    evaluate_prompt = REPLY_MESSAGE_EVALUATE_PROMPT.format(**context)
+    match task_type:
+        case 0:
+            evaluate_prompt = REPLY_MESSAGE_EVALUATE_PROMPT.format(**context)
+        case 1:
+            evaluate_prompt = PUSH_MESSAGE_EVALUATE_PROMPT.format(**context)
+        case _:
+            raise ValueError("task_type must be 0 or 1")
     response = fetch_llm_completion(evaluate_prompt, output_type="json")
     return response