8 miesięcy temu · 5f953d7406
--- a/evaluate_agent.py
+++ b/evaluate_agent.py
@@ -1,6 +1,7 @@
 
															 import json
														
 
															 import datetime
														
 
															 import random
														
 
															+import traceback
														
 
															 from tqdm import tqdm
														
 
															 from openai import OpenAI
														
@@ -9,6 +10,34 @@ from pqai_agent.database import MySQLManager
 
															 from pqai_agent.agents.message_push_agent import MessagePushAgent
														
 
															 from pqai_agent.agents.message_reply_agent import MessageReplyAgent
														
 
															+evaluation_metrics_dict = {
														
 
															+    "1.2": "是否识别关键信息",
														
 
															+    "1.3": "是否能够理解歧义词/模糊词",
														
 
															+    "1.4": "是否能理解表情包，图片消息",
														
 
															+    "1.5": "是否能理解语音/方言",
														
 
															+    "2.1": "回复是否与用户意图相关",
														
 
															+    "2.2": "回复是否清晰简洁",
														
 
															+    "2.3": "回复是否流畅",
														
 
															+    "2.4": "回复语法是否规范",
														
 
															+    "3.1": "是否能理解代词（他，她， 她， 这个那个）",
														
 
															+    "3.2": "是否能延续上文话题",
														
 
															+    "3.3": "是否记住上文的基础信息",
														
 
															+    "3.4": "是否及时结束聊天",
														
 
															+    "4.1": "是否讨论超出角色认知范围的信息",
														
 
															+    "4.2": "是否讨论了不符合当前时代背景的语言、物品、事件、概念",
														
 
															+    "4.3": "是否表现出与agent 人设相符的专业知识、生活经验或者常识",
														
 
															+    "5.1": "agent 的言行是否反映其预设的核心性格",
														
 
															+    "5.2": "agent 的价值观和道德观是否符合其预设标准",
														
 
															+    "6.1": "agent 使用的词汇、句式、语法复杂度、行话/俚语是否符合其身份、教育背景和时代？",
														
 
															+    "6.2": "agent 语气、语调（恭敬、傲慢、亲切、疏离、热情、冷淡）是否稳定？",
														
 
															+    "6.3": "agent 表达习惯、口头禅是否符合角色预设特点",
														
 
															+    "7.1": "agent 在对话中表现出的目标、关注重心是否与其设定的核心动机一致？",
														
 
															+    "8.1": "agent 是否按照预设的互动模式与用户沟通",
														
 
															+    "8.2": "agent 是否对自身角色有正确理解",
														
 
															+    "8.3": "agent 是否回复超越用户认知的信息"
														
 
															+}
														
 
															+
														
 
															+
														
 
															 def fetch_deepseek_completion(prompt, output_type='text'):
														
 
															     """
														
 
															     deep_seek方法
														
@@ -31,7 +60,7 @@ def fetch_deepseek_completion(prompt, output_type='text'):
 
															                 "content": prompt,
														
 
															             }
														
 
															         ],
														
 
															-        model="deepseek-chat",
														
 
															+        model="deepseek-reasoner",
														
 
															         response_format=response_format,
														
 
															     )
														
 
															     response = chat_completion.choices[0].message.content
														
@@ -205,16 +234,80 @@ def evaluate_push_agent_prompt(dialogue_history, push_message, user_profile_, ag
 
															     return prompt_
														
 
															-def evaluate_reply_agent(dialogue_history, reply_message, user_profile_, agent_profile):
														
 
															+def evaluate_reply_agent_prompt(dialogue_history, message, user_profile_, agent_profile, push_time):
														
 
															     """
														
 
															     :param dialogue_history:
														
 
															-    :param reply_message:
														
 
															+    :param message:
														
 
															     :param user_profile_:
														
 
															     :param agent_profile:
														
 
															     :return:
														
 
															     """
														
 
															-    return
														
 
															+    output_format = {
														
 
															+        "1.1": {
														
 
															+            "score": 1,
														
 
															+            "reason": "理由"
														
 
															+        },
														
 
															+        "1.2": {
														
 
															+            "score": 0,
														
 
															+            "reason": "理由"
														
 
															+        }
														
 
															+    }
														
 
															+    prompt_ = f"""
														
 
															+        **评估任务：** 基于给定的对话历史和 Agent 预设信息，评估 Agent 在对话中的表现。使用以下维度和指标进行评分。
														
 
															+        **评估指标：**
														
 
															+          1. 理解能力
														
 
															+            1.1 是否识别用户核心意图
														
 
															+            1.2 是否识别关键信息
														
 
															+            1.3 是否能够理解歧义词/模糊词
														
 
															+            1.4 是否能理解表情包，图片消息
														
 
															+            1.5 是否能理解语音/方言
														
 
															+          2. 回复能力
														
 
															+            2.1 回复是否与用户意图相关
														
 
															+            2.2 回复是否清晰简洁
														
 
															+            2.3 回复是否流畅
														
 
															+            2.4 回复语法是否规范
														
 
															+          3. 上下文管理能力
														
 
															+            3.1 是否能理解代词（他，她， 她， 这个那个）
														
 
															+            3.2 是否能延续上文话题rye5
														
 
															+            3.4 是否及时结束聊天
														
 
															+          4. 背景知识一致性
														
 
															+            4.1 是否讨论超出角色认知范围的信息
														
 
															+            4.2 是否讨论了不符合当前时代背景的语言、物品、事件、概念
														
 
															+            4.3 是否表现出与agent 人设相符的专业知识、生活经验或者常识
														
 
															+          5. 性格行为一致性
														
 
															+            5.1 agent 的言行是否反映其预设的核心性格
														
 
															+            5.2 agent 的价值观和道德观是否符合其预设标准
														
 
															+          6. 语言风格一致性
														
 
															+            6.1 agent 使用的词汇、句式、语法复杂度、行话/俚语是否符合其身份、教育背景和时代？
														
 
															+            6.2 agent 语气、语调（恭敬、傲慢、亲切、疏离、热情、冷淡）是否稳定？
														
 
															+            6.3 agent 表达习惯、口头禅是否符合角色预设特点
														
 
															+          7. 目标动机一致性
														
 
															+            7.1 agent 在对话中表现出的目标、关注重心是否与其设定的核心动机一致？
														
 
															+          8. 关系认知一致性
														
 
															+            8.1 agent 是否按照预设的互动模式与用户沟通 
														
 
															+            8.2 agent 是否对自身角色有正确理解
														
 
															+            8.3 agent 是否回复超越用户认知的信息
														
 
															+        
														
 
															+        **评估规则：**
														
 
															+        - 对于每个指标：
														
 
															+          - 如果符合要求，得 1 分。
														
 
															+          - 如果不符合要求，得 0 分。
														
 
															+          - 如果指标不适用（如对话未涉及相关场景），得 1 分（无需评估。
														
 
															+        - 理由必须基于对话内容，简短且客观，理由需要是中文， 如果是无需评估，则理由写无需评估
														
 
															+        
														
 
															+        **输入：**
														
 
															+        - **对话历史**： {dialogue_history}
														
 
															+        - **Agent 预设信息**： {agent_profile}
														
 
															+        - **用户预设信息**： {user_profile_}
														
 
															+        - **Agent 消息**： {message}
														
 
															+        - **Agent 发送消息时间**：{push_time}
														
 
															+        
														
 
															+        **输出格式要求：JSON 格式**
														
 
															+        输出格式参考：{output_format}
														
 
															+        
														
 
															+    """
														
 
															+    return prompt_
														
@@ -231,53 +324,62 @@ mysql_client = MySQLManager(config)
 
															 if __name__ == '__main__':
														
 
															     import pqai_agent.logging_service
														
 
															     pqai_agent.logging_service.setup_root_logger()
														
 
															-    with open("scripts/dialogue_data_set.json", "r", encoding="utf-8") as f:
														
 
															+    with open("push_message_evaluation_result.json", "r", encoding="utf-8") as f:
														
 
															         data = json.load(f)
														
 
															-    dialogues = random.sample(data, 100)
														
 
															+    # dialogues = random.sample(data, 1)
														
 
															     F = []
														
 
															-    for sub_dialogues in tqdm(dialogues):
														
 
															+    for sub_dialogues in tqdm(data):
														
 
															         try:
														
 
															             # user 相关
														
 
															-            user_id = sub_dialogues['user_id']
														
 
															-            user_profile_response = get_profile_info(user_id, "user")
														
 
															-            user_profile, avatar = json.loads(user_profile_response[0]['profile']), user_profile_response[0]['avatar']
														
 
															-
														
 
															-            user_profile['avatar'] = avatar
														
 
															-            user_profile['current_datetime'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
														
 
															+            # user_id = sub_dialogues['user_id']
														
 
															+            # user_profile_response = get_profile_info(user_id, "user")
														
 
															+            # user_profile, avatar = json.loads(user_profile_response[0]['profile']), user_profile_response[0]['avatar']
														
 
															+            #
														
 
															+            # user_profile['avatar'] = avatar
														
 
															+            # user_profile['current_datetime'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
														
 
															+            #
														
 
															+            # # staff 相关
														
 
															+            # staff_id = sub_dialogues['staff_id']
														
 
															+            # staff_profile_response = get_profile_info(staff_id, "staff")
														
 
															+            # staff_profile = json.loads(staff_profile_response[0]['profile'])
														
 
															+            #
														
 
															+            # user_profile['formatted_staff_profile'] = staff_profile
														
 
															+            #
														
 
															+            # push_agent = MessagePushAgent()
														
 
															+            # # reply_agent = MessageReplyAgent()
														
 
															+            #
														
 
															+            # # message 相关
														
 
															+            # message = sub_dialogues['dialogue_history']
														
 
															+            # agent_message = push_agent.generate_message(
														
 
															+            #     context=user_profile,
														
 
															+            #     dialogue_history=message
														
 
															+            # )
														
 
															+            message = sub_dialogues["dialogue_history"]
														
 
															+            agent_message = sub_dialogues["push_message"]
														
 
															+            push_time = sub_dialogues["push_time"]
														
 
															+            user_profile = sub_dialogues["user_profile"]
														
 
															+            staff_profile = sub_dialogues["agent_profile"]
														
 
															+            if agent_message:
														
 
															+                prompt = evaluate_reply_agent_prompt(message, agent_message, user_profile, staff_profile, push_time)
														
 
															+                # prompt = evaluate_conversation_quality_task(message, user_profile, staff_profile)
														
 
															+                response = fetch_deepseek_completion(prompt, output_type='json')
														
 
															+                obj = {
														
 
															+                    "user_profile": user_profile,
														
 
															+                    "agent_profile": staff_profile,
														
 
															+                    "dialogue_history": message,
														
 
															+                    "push_message": agent_message,
														
 
															+                    "push_time": push_time,
														
 
															+                    "evaluation_result": response
														
 
															+                }
														
 
															+                F.append(obj)
														
 
															-            # staff 相关
														
 
															-            staff_id = sub_dialogues['staff_id']
														
 
															-            staff_profile_response = get_profile_info(staff_id, "staff")
														
 
															-            staff_profile = json.loads(staff_profile_response[0]['profile'])
														
 
															+        except Exception as e:
														
 
															+            print(e)
														
 
															+            print(traceback.format_exc())
														
 
															-            user_profile['formatted_staff_profile'] = staff_profile
														
 
															-
														
 
															-            push_agent = MessagePushAgent()
														
 
															-            # reply_agent = MessageReplyAgent()
														
 
															-
														
 
															-            # message 相关
														
 
															-            message = sub_dialogues['dialogue_history']
														
 
															-            agent_message = push_agent.generate_message(
														
 
															-                context=user_profile,
														
 
															-                dialogue_history=message
														
 
															-            )
														
 
															-            prompt = evaluate_push_agent_prompt(message, agent_message, user_profile, staff_profile)
														
 
															-            # prompt = evaluate_conversation_quality_task(message, user_profile, staff_profile)
														
 
															-            response = fetch_deepseek_completion(prompt, output_type='json')
														
 
															-            obj = {
														
 
															-                "user_profile": user_profile,
														
 
															-                "agent_profile": staff_profile,
														
 
															-                "dialogue_history": message,
														
 
															-                "push_message": agent_message,
														
 
															-                "push_time": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
														
 
															-                "evaluation_result": response
														
 
															-            }
														
 
															-            F.append(obj)
														
 
															-        except:
														
 
															-            continue
														
 
															-    with open("push_message_evaluation_result.json", "w", encoding="utf-8") as f:
														
 
															-        f.write(json.dumps(F, indent=4, ensure_ascii=False))
														
 
															+    with open("push_message_evaluation_result_2.json", "w", encoding="utf-8") as f:
														
 
															+        f.write(json.dumps(F, ensure_ascii=False, indent=4))
														
--- a/evaluate_agent_v2.py
+++ b/evaluate_agent_v2.py
@@ -0,0 +1,242 @@
 
															+import json
														
 
															+
														
 
															+from openai import OpenAI
														
 
															+
														
 
															+def fetch_deepseek_completion(prompt, output_type='text'):
														
 
															+    """
														
 
															+    deep_seek方法
														
 
															+    """
														
 
															+    client = OpenAI(
														
 
															+        api_key='sk-cfd2df92c8864ab999d66a615ee812c5',
														
 
															+        base_url="https://api.deepseek.com"
														
 
															+    )
														
 
															+
														
 
															+    # get response format
														
 
															+    if output_type == "json":
														
 
															+        response_format = {"type": "json_object"}
														
 
															+    else:
														
 
															+        response_format = {"type": "text"}
														
 
															+
														
 
															+    chat_completion = client.chat.completions.create(
														
 
															+        messages=[
														
 
															+            {
														
 
															+                "role": "user",
														
 
															+                "content": prompt,
														
 
															+            }
														
 
															+        ],
														
 
															+        model="deepseek-chat",
														
 
															+        response_format=response_format,
														
 
															+    )
														
 
															+    response = chat_completion.choices[0].message.content
														
 
															+    if output_type == "json":
														
 
															+        response_json = json.loads(response)
														
 
															+        return response_json
														
 
															+
														
 
															+    return response
														
 
															+
														
 
															+
														
 
															+class PushAgentEvaluator:
														
 
															+    def __init__(self, dialogue_history, push_message, push_time, user_profile, agent_profile):
														
 
															+        self.dialogue_history = dialogue_history
														
 
															+        self.push_message = push_message
														
 
															+        self.user_profile = user_profile
														
 
															+        self.agent_profile = agent_profile
														
 
															+        self.push_time = push_time
														
 
															+
														
 
															+    def evaluate_user_match(self):
														
 
															+        """用户匹配度评估（24分）"""
														
 
															+        out_put_format = {
														
 
															+                "语言风格": {
														
 
															+                    "score": 8,
														
 
															+                    "reason": "语言风格符合用户特征，年龄、性别、地域等特征都匹配"
														
 
															+                },
														
 
															+                "信息边界": {
														
 
															+                    "score": 10,
														
 
															+                    "reason": "信息范围符合用户认知范围"
														
 
															+                },
														
 
															+                "特征匹配": {
														
 
															+                    "score": 10,
														
 
															+                    "reason": "特征匹配度高，user_profile中有兴趣爱好，健康状态，用药状态，居住地址，家庭成员等特征，在 push_message需要体现出来"
														
 
															+            }
														
 
															+        }
														
 
															+        analysis_prompt = f"""
														
 
															+            通过分析用户的以下特征
														
 
															+                年龄: {self.user_profile['age']}, 
														
 
															+                性别: {self.user_profile['gender']}, 
														
 
															+                兴趣: {self.user_profile['interests']},
														
 
															+                健康状态: {self.user_profile['health_conditions']},
														
 
															+                用药状态: {self.user_profile['medications']},
														
 
															+                居住地址: {self.user_profile['region']},
														
 
															+                家庭成员: {self.user_profile['family_members']},
														
 
															+            和 agent 发送的消息: {self.push_message}, 判断该消息是否和该用户的特征匹配
														
 
															+            包括一下三点，每一点的满分为 8 分：
														
 
															+                语言风格：是否符合用户的年龄、性别、地域等特征；
														
 
															+                信息范围：是否超出用户认知范围；
														
 
															+                特征匹配：如果 user_profile中有兴趣爱好，健康状态，用药状态，居住地址，家庭成员等特征，在 push_message需要体现出来
														
 
															+            输出的格式为 JSON，示例如：{out_put_format}
														
 
															+        """
														
 
															+        completion = fetch_deepseek_completion(prompt=analysis_prompt, output_type='json')
														
 
															+        return completion
														
 
															+
														
 
															+    def evaluate_agent_consistency(self):
														
 
															+        """人设一致性评估（16分）"""
														
 
															+        out_put_format = {
														
 
															+            "语言风格": {
														
 
															+                "score": 8,
														
 
															+                "reason": "语言风格符合用户特征，年龄、性别、地域等特征都匹配"
														
 
															+            },
														
 
															+            "消息边界": {
														
 
															+                "score": 8,
														
 
															+                "reason": "信息范围符合用户认知范围"
														
 
															+            },
														
 
															+        }
														
 
															+        analysis_prompt = f"""
														
 
															+            通过分析 agent 的一下特征：
														
 
															+                职业：{self.agent_profile['occupation']},
														
 
															+                年龄：{self.agent_profile['age']},
														
 
															+                性别：{self.agent_profile['gender']},
														
 
															+                地址：{self.agent_profile['region']},
														
 
															+                教育背景：{self.agent_profile['education']},
														
 
															+                工作经验：{self.agent_profile['work_experience']}
														
 
															+            和 agent 发送的消息: {self.push_message}, 判断该消息是否和该 agent 的特征匹配
														
 
															+            包括一下 2 点，每一点的满分为 8 分：
														
 
															+                语言风格：是否符合 agent 的年龄、性别、地域等特征；需要从不同性别，不同职业，不同年龄， 不同地域的人的说话风格去分析
														
 
															+                信息范围：是否超出 agent 认知范围；
														
 
															+            输出的格式为 JSON，示例如：{out_put_format}
														
 
															+        """
														
 
															+        completion = fetch_deepseek_completion(prompt=analysis_prompt, output_type='json')
														
 
															+        return completion
														
 
															+
														
 
															+    def evaluate_interest_arousal(self):
														
 
															+        """兴趣激发评估（30）"""
														
 
															+        out_put_format = {
														
 
															+            "好奇设计": {
														
 
															+                "score": 15,
														
 
															+                "reason": "包含开放式问题，包括对用户认知边界的探索，用户兴趣爱好的联想等方面"
														
 
															+            },
														
 
															+            "利益设计": {
														
 
															+                "score": 15,
														
 
															+                "reason": "如果用户在历史对话中提到了某种需求，新的唤起消息是否有为用户解决需求的趋势"
														
 
															+            }
														
 
															+        }
														
 
															+        analysis_prompt = f"""
														
 
															+            通过分析用户的以下特征
														
 
															+                年龄: {self.user_profile['age']}, 
														
 
															+                性别: {self.user_profile['gender']}, 
														
 
															+                兴趣: {self.user_profile['interests']},
														
 
															+                健康状态: {self.user_profile['health_conditions']},
														
 
															+                用药状态: {self.user_profile['medications']},
														
 
															+                居住地址: {self.user_profile['region']},
														
 
															+                家庭成员: {self.user_profile['family_members']},
														
 
															+            和 agent 发送的消息: {self.push_message}, 以及用户和 agent 的历史对话: {self.dialogue_history}, 
														
 
															+            判断该消息是否能唤起用户的兴趣，驱动用户的聊天激情，主要从以下几2个方面考虑，每一个要点满分 15分
														
 
															+                好奇设计：是否包含开放式问题，包括对用户认知边界的探索，用户兴趣爱好的联想等方面
														
 
															+                利益设计：如果用户在历史对话中提到了某种需求，新的唤起消息是否有为用户解决需求的趋势；
														
 
															+                        如果没提出，可以从用户特征中联想出用户的需求，比如用户的兴趣爱好，健康状态，用药状态，居住地址，家庭成员等特征，
														
 
															+                        在 push_message需要体现出来
														
 
															+            输出的格式为 JSON，示例如：{out_put_format}
														
 
															+        """
														
 
															+        completion = fetch_deepseek_completion(prompt=analysis_prompt, output_type='json')
														
 
															+        return completion
														
 
															+
														
 
															+    def evaluate_execution_quality(self):
														
 
															+        """执行质量评估（15分）"""
														
 
															+        output_format = {
														
 
															+            "信息保真": {
														
 
															+                "score": 5,
														
 
															+                "reason": "信息真实"
														
 
															+            },
														
 
															+            "政策合规": {
														
 
															+                "score": 5,
														
 
															+                "reason": "发送消息是否符合现有政策要求，是否违反相关规则"
														
 
															+            },
														
 
															+            "语言拟人": {
														
 
															+                "score": 5,
														
 
															+                "reason": "表现出一定的拟人化和情商"
														
 
															+            }
														
 
															+        }
														
 
															+        analysis_prompt = f"""
														
 
															+            通过分析 agent 发送的消息：{self.push_message}, 
														
 
															+            信息保真（满分 5 分）:
														
 
															+                拆分消息重点各个事件点，联网搜索，判断事件是否为真实事件，比如说节日祝福需要和发送时间{self.push_time}对应， 天气需要和地区相关
														
 
															+                不能在早上发晚上好之类等等
														
 
															+            政策合规（满分 5 分）:
														
 
															+                并且判断发送的消息是否存在违反规则的现象。
														
 
															+            语言拟人:（满分 5 分）:
														
 
															+                判断发送的消息是否存在机械化，同质化的现象，比如重复发相同或者相似的消息，比如过于机械的回复等等
														
 
															+            输出的格式为 JSON，示例如：{output_format}
														
 
															+        """
														
 
															+        completion = fetch_deepseek_completion(prompt=analysis_prompt, output_type='json')
														
 
															+        return completion
														
 
															+
														
 
															+    def evaluate_emotional_intelligence(self):
														
 
															+        """情感智能评估（15分）"""
														
 
															+        output_format = {
														
 
															+            "共情深度": {
														
 
															+                "score": 10,
														
 
															+                "reason": "识别用户情感状态，提供相应的回应"
														
 
															+            },
														
 
															+        }
														
 
															+        analysis_prompt = f"""
														
 
															+             通过分析用户的以下特征
														
 
															+                年龄: {self.user_profile['age']}, 
														
 
															+                性别: {self.user_profile['gender']}, 
														
 
															+                兴趣: {self.user_profile['interests']},
														
 
															+                健康状态: {self.user_profile['health_conditions']},
														
 
															+                用药状态: {self.user_profile['medications']},
														
 
															+                居住地址: {self.user_profile['region']},
														
 
															+                家庭成员: {self.user_profile['family_members']},
														
 
															+            和 agent 和用户的对话：{self.dialogue_history} 挖掘出用户的情感需求
														
 
															+            并且 agent 发送的消息: {self.push_message}, 判断该message是否能和用户共情，提升用户的情感体验
														
 
															+            满分 15分
														
 
															+            输出格式为 JSON，示例如：{output_format}
														
 
															+        """
														
 
															+        completion = fetch_deepseek_completion(prompt=analysis_prompt, output_type='json')
														
 
															+        return completion
														
 
															+
														
 
															+
														
 
															+def evaluate_push_agent(
														
 
															+        dialogue_history,
														
 
															+        push_message,
														
 
															+        push_time,
														
 
															+        user_profile,
														
 
															+        agent_profile
														
 
															+):
														
 
															+    # 创建评估引擎实例
														
 
															+    evaluator = PushAgentEvaluator(
														
 
															+        dialogue_history,
														
 
															+        push_message,
														
 
															+        push_time,
														
 
															+        user_profile,
														
 
															+        agent_profile,
														
 
															+    )
														
 
															+
														
 
															+    # 执行模块化评估
														
 
															+    evaluation_report = {
														
 
															+        "基础能力": {
														
 
															+            "用户匹配": evaluator.evaluate_user_match(),
														
 
															+            "人设一致": evaluator.evaluate_agent_consistency()
														
 
															+        },
														
 
															+        "任务效能": {
														
 
															+            "兴趣激发": evaluator.evaluate_interest_arousal(),
														
 
															+            "执行质量": evaluator.evaluate_execution_quality(),
														
 
															+            "情感智能": evaluator.evaluate_emotional_intelligence()
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    return evaluation_report
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    with open("dev.json") as f:
														
 
															+        data = json.load(f)
														
 
															+
														
 
															+    evaluation_report = evaluate_push_agent(
														
 
															+        data['dialogue_history'],
														
 
															+        data['push_message'],
														
 
															+        data['push_time'],
														
 
															+        data['user_profile'],
														
 
															+        data['agent_profile']
														
 
															+    )
														
 
															+    print(json.dumps(evaluation_report, indent=4, ensure_ascii=False))
														
 
															+
														
--- a/push_message_evaluation_result.json
+++ b/push_message_evaluation_result.json
@@ -14831,6 +14831,7 @@
 
															             "improvement_suggestions": "建议更深入地挖掘和利用用户已知的兴趣爱好来定制push_message，以增强相关性和用户参与度。同时，可以尝试更多共情的表达方式，以提升用户的情感体验。"
														
 
															         }
														
 
															     },
														
 
															+
														
 
															     {
														
 
															         "user_profile": {
														
 
															             "name": "",