|
@@ -29,7 +29,7 @@ def generate_prompt(account_title_list):
|
|
|
** 评估维度及权重 **
|
|
|
1. 受众精准度(50%)
|
|
|
正向匹配:存款/养老/健康/饮食/疾病警示/家庭伦理/近现代战争历史/老知青/奇闻异事
|
|
|
- 负向排除:影视解说/文学解读/个人收藏(钱币/邮票)/机械科普/数码测评/电子游戏/时尚潮流/明星八卦/极限运动/学术研究/网络热梗/宠物饲养
|
|
|
+ 负向排除:影视解说/文学解读/个人收藏(钱币/邮票)/机械科普/数码测评/电子游戏/时尚潮流/明星八卦/极限运动/学术研究/网络热梗/宠物饲养/音乐/棋牌
|
|
|
|
|
|
2. 标题技法(40%)
|
|
|
悬念设计:疑问句/省略号/反转结构(例:"打开后瞬间愣住...")
|
|
@@ -91,12 +91,20 @@ def recognize_each_account(thread_db_client, account):
|
|
|
|
|
|
# process
|
|
|
title_list = json.loads(account["title_list"])
|
|
|
- if len(title_list) < 15:
|
|
|
+ if len(title_list) < 15 and account['platform'] == 'toutiao':
|
|
|
# 账号数量不足,直接跳过
|
|
|
print("bad account, skip")
|
|
|
update_task_status(thread_db_client, task_id, 1, 11)
|
|
|
return
|
|
|
|
|
|
+ # 标题长度过长,需要过滤
|
|
|
+ title_total_length = sum(len(title) for title in title_list)
|
|
|
+ avg_title_length = title_total_length / len(title_list)
|
|
|
+ if avg_title_length > 30:
|
|
|
+ print("title too long, skip")
|
|
|
+ update_task_status(thread_db_client, task_id, 1, 14)
|
|
|
+ return
|
|
|
+
|
|
|
prompt = generate_prompt(title_list)
|
|
|
response = fetch_deepseek_response(model="DeepSeek-R1", prompt=prompt)
|
|
|
response_score_str = response.strip()
|
|
@@ -148,7 +156,7 @@ class AccountRecognizer:
|
|
|
get account task from database
|
|
|
"""
|
|
|
fetch_query = f"""
|
|
|
- select id, title_list from crawler_candidate_account_pool
|
|
|
+ select id, title_list, platform from crawler_candidate_account_pool
|
|
|
where avg_score is null and status = 0 and title_list is not null;
|
|
|
"""
|
|
|
fetch_response = self.db_client.fetch(fetch_query, cursor_type=DictCursor)
|