2 ماه پیش · cb59f250e3
--- a/tasks/account_recognize_by_llm.py
+++ b/tasks/account_recognize_by_llm.py
@@ -29,7 +29,7 @@ def generate_prompt(account_title_list):
 
				     ** 评估维度及权重 **
			
 
				         1. 受众精准度（50%）
			
 
				             正向匹配：存款/养老/健康/饮食/疾病警示/家庭伦理/近现代战争历史/老知青/奇闻异事
			
 
				-            负向排除：影视解说/文学解读/个人收藏（钱币/邮票）/机械科普/数码测评/电子游戏/时尚潮流/明星八卦/极限运动/学术研究/网络热梗/宠物饲养
			
 
				+            负向排除：影视解说/文学解读/个人收藏（钱币/邮票）/机械科普/数码测评/电子游戏/时尚潮流/明星八卦/极限运动/学术研究/网络热梗/宠物饲养/音乐/棋牌
			
 
				             
			
 
				         2. 标题技法（40%）
			
 
				             悬念设计：疑问句/省略号/反转结构（例："打开后瞬间愣住..."）
			
@@ -91,12 +91,20 @@ def recognize_each_account(thread_db_client, account):
 
				 
			
 
				     # process
			
 
				     title_list = json.loads(account["title_list"])
			
 
				-    if len(title_list) < 15:
			
 
				+    if len(title_list) < 15 and account['platform'] == 'toutiao':
			
 
				         # 账号数量不足，直接跳过
			
 
				         print("bad account, skip")
			
 
				         update_task_status(thread_db_client, task_id, 1, 11)
			
 
				         return
			
 
				 
			
 
				+    # 标题长度过长，需要过滤
			
 
				+    title_total_length = sum(len(title) for title in title_list)
			
 
				+    avg_title_length = title_total_length / len(title_list)
			
 
				+    if avg_title_length > 30:
			
 
				+        print("title too long, skip")
			
 
				+        update_task_status(thread_db_client, task_id, 1, 14)
			
 
				+        return
			
 
				+
			
 
				     prompt = generate_prompt(title_list)
			
 
				     response = fetch_deepseek_response(model="DeepSeek-R1", prompt=prompt)
			
 
				     response_score_str = response.strip()
			
@@ -148,7 +156,7 @@ class AccountRecognizer:
 
				         get account task from database
			
 
				         """
			
 
				         fetch_query = f"""
			
 
				-            select id, title_list from crawler_candidate_account_pool
			
 
				+            select id, title_list, platform from crawler_candidate_account_pool
			
 
				             where avg_score is null and status = 0 and title_list is not null;
			
 
				         """
			
 
				         fetch_response = self.db_client.fetch(fetch_query, cursor_type=DictCursor)