|
@@ -9,10 +9,10 @@ from openai import OpenAI
|
|
|
from typing import List, Dict
|
|
|
from pymysql.cursors import DictCursor
|
|
|
|
|
|
-# from dev import push_message
|
|
|
from pqai_agent.database import MySQLManager
|
|
|
from pqai_agent.logging_service import logger
|
|
|
from pqai_agent import configs, logging_service
|
|
|
+from pqai_agent.utils import prompt_utils
|
|
|
from pqai_agent_server.utils.prompt_util import format_dialogue_history
|
|
|
|
|
|
logging_service.setup_root_logger()
|
|
@@ -48,7 +48,8 @@ def fetch_deepseek_completion(prompt, output_type="text"):
|
|
|
model='qwen3-235b-a22b',
|
|
|
response_format=response_format,
|
|
|
stream=False,
|
|
|
- extra_body={"enable_thinking": False}
|
|
|
+ extra_body={"enable_thinking": False},
|
|
|
+ temperature=0.2
|
|
|
)
|
|
|
response = chat_completion.choices[0].message.content
|
|
|
if output_type == "json":
|
|
@@ -125,23 +126,22 @@ output_dict = {
|
|
|
}
|
|
|
|
|
|
def generate_prompt(dialogue_history: str, message: str,
|
|
|
- send_time: str, user_profile: Dict, agent_profile: Dict) -> str:
|
|
|
+ send_time: str, user_profile: str, agent_profile: str) -> str:
|
|
|
"""
|
|
|
生成评估prompt
|
|
|
:return: prompt
|
|
|
"""
|
|
|
prompt = f"""
|
|
|
## 评估任务说明
|
|
|
-当 客服与用户长时间无互动时,客服会主动推送 message 以维系联系。
|
|
|
-请根据输入信息,对该 message 按下列维度逐项打分。
|
|
|
-
|
|
|
-输入字段:
|
|
|
+你是一个专业的语言学专家,你需要完成一项语言评估任务。
|
|
|
+该任务的背景为:当客服与用户长时间无互动时,客服会主动推送内容尝试开启互动对话。
|
|
|
+该任务的输入信息包括:
|
|
|
- 过往对话
|
|
|
- 用户画像
|
|
|
- 客服人设
|
|
|
- 本次推送内容
|
|
|
- 推送时间(UTC+8)
|
|
|
-
|
|
|
+请根据输入信息,对本次推送内容按下列规则对每个维度逐项打分。
|
|
|
评分规则:
|
|
|
- 每个 **子指标** 只取 0 或 1 分。
|
|
|
1 分:满足判分要点,或该项“无需评估”
|
|
@@ -240,8 +240,9 @@ def generate_prompt(dialogue_history: str, message: str,
|
|
|
5.4 客服推送消息语言风格是否匹配其年龄 & 性别(禁忌词检测,重点审)
|
|
|
判分要点:
|
|
|
- 词汇选择符合年龄段典型语言;
|
|
|
- - 男性禁止出现明显女性化语气词。比如说:呢、啦、呀、宝子、yyds;
|
|
|
- - 45+ 及以上避免“冲鸭”“绝绝子”“yyds”等新潮词;
|
|
|
+ - 男性客服禁止出现明显女性化语气词,绝对禁止出现:呢、啦、呀、宝子、yyds等女性化用词!
|
|
|
+ - 男性客服禁止出现“~”等女性标点符号!
|
|
|
+ - 45+及以上避免“冲鸭”“绝绝子”“yyds”等新潮词;
|
|
|
- 青年男性应简洁直接,可偶用“哈哈”“酷”;青年女性可用“呀”“哦”;
|
|
|
- 不出现与性别、年龄严重背离的口头禅
|
|
|
正例:
|
|
@@ -344,6 +345,12 @@ value 也是一个JSON,包含两个 key:score 和 reason,分别代表分
|
|
|
{message}
|
|
|
### 推送时间
|
|
|
{send_time}
|
|
|
+
|
|
|
+## 特别注意
|
|
|
+* 请严格按照上述输出格式输出,不要输出任何额外的内容
|
|
|
+* 请务必注意禁止出现的情况,不要做出相反的评分!
|
|
|
+
|
|
|
+现在,请开始评估。
|
|
|
"""
|
|
|
return prompt
|
|
|
|
|
@@ -377,18 +384,6 @@ class PushMessageEvaluator(AgentEvaluator):
|
|
|
return history_conversation
|
|
|
|
|
|
def evaluate_task(self, line):
|
|
|
- # staff_id = line['staff_id']
|
|
|
- # user_id = line['user_id']
|
|
|
- # conversation_id_list = json.loads(line['conversation'])
|
|
|
- # push_message = line['content']
|
|
|
- # send_time = line['send_time']
|
|
|
- # send_date_str = datetime.datetime.fromtimestamp(send_time).strftime('%Y-%m-%d %H:%M:%S')
|
|
|
- # dialogue_list = self.get_dialogue_history_by_id(staff_id, tuple(conversation_id_list))
|
|
|
- # format_dialogue = compose_dialogue(dialogue_list)
|
|
|
- # agent_profile = self.get_profile_info(staff_id, "staff")[0]['profile']
|
|
|
- # agent_profile = json.loads(agent_profile)
|
|
|
- # user_profile = self.get_profile_info(user_id, "user")[0]['profile']
|
|
|
- # user_profile = json.loads(user_profile)
|
|
|
user_profile = line["user_profile"]
|
|
|
agent_profile = line["agent_profile"]
|
|
|
send_date_str = line["push_time"]
|
|
@@ -398,10 +393,10 @@ class PushMessageEvaluator(AgentEvaluator):
|
|
|
dialogue_history=format_dialogue,
|
|
|
message=push_message,
|
|
|
send_time=send_date_str,
|
|
|
- agent_profile=agent_profile,
|
|
|
- user_profile=user_profile,
|
|
|
+ agent_profile=prompt_utils.format_agent_profile(agent_profile),
|
|
|
+ user_profile=prompt_utils.format_user_profile(user_profile),
|
|
|
)
|
|
|
- print(evaluator_prompt)
|
|
|
+ # print(len(evaluator_prompt))
|
|
|
response = fetch_deepseek_completion(evaluator_prompt, output_type='json')
|
|
|
return {
|
|
|
"user_profile": user_profile,
|
|
@@ -419,7 +414,7 @@ class PushMessageEvaluator(AgentEvaluator):
|
|
|
data = json.loads(f.read())
|
|
|
|
|
|
samples = random.sample(data, 48)
|
|
|
- samples = [i for i in data if i['push_message'] == '文芝阿姨,晚上好呀!今天有没有抽空做做颈部拉伸运动或者热敷一下颈椎呢?这些小方法对缓解头晕很有帮助哦~']
|
|
|
+ # samples = [i for i in data if i['push_message'] == '文芝阿姨,晚上好呀!今天有没有抽空做做颈部拉伸运动或者热敷一下颈椎呢?这些小方法对缓解头晕很有帮助哦~']
|
|
|
|
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
|
from tqdm import tqdm
|
|
@@ -434,18 +429,19 @@ class PushMessageEvaluator(AgentEvaluator):
|
|
|
for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
|
|
|
result = future.result()
|
|
|
if result:
|
|
|
- print(json.dumps(result, ensure_ascii=False, indent=4))
|
|
|
+ # print(json.dumps(result, ensure_ascii=False, indent=4))
|
|
|
L.append(result)
|
|
|
+
|
|
|
# for line in tqdm(data):
|
|
|
# response = self.evaluate_task(line)
|
|
|
# print("\n")
|
|
|
# print(json.dumps(response, ensure_ascii=False, indent=4))
|
|
|
# if response:
|
|
|
# L.append(response)
|
|
|
- # #
|
|
|
+ #
|
|
|
# 保存结果(与原代码相同)
|
|
|
- # with open("test_0618_v3.json", "w", encoding="utf-8") as f:
|
|
|
- # json.dump(L, f, ensure_ascii=False, indent=4)
|
|
|
+ with open("test_0618_qw_v2.json", "w", encoding="utf-8") as f:
|
|
|
+ json.dump(L, f, ensure_ascii=False, indent=4)
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|