|
@@ -4,6 +4,7 @@ from typing import List, Dict
|
|
|
from tqdm import tqdm
|
|
|
|
|
|
from applications.api import fetch_deepseek_completion
|
|
|
+from applications.utils import ci_lower
|
|
|
|
|
|
|
|
|
class CandidateAccountProcessConst:
|
|
@@ -20,6 +21,8 @@ class CandidateAccountProcessConst:
|
|
|
ARTICLE_COUNT_THRESHOLD = 13
|
|
|
AVG_TITLE_LENGTH_THRESHOLD = 45
|
|
|
|
|
|
+ ACCOUNT_GOOD_STATUS = 1
|
|
|
+
|
|
|
@staticmethod
|
|
|
def generate_title_match_score_prompt(title_list):
|
|
|
title_list_string = "\n".join(title_list)
|
|
@@ -71,7 +74,7 @@ class CandidateAccountQualityScoreRecognizer(CandidateAccountProcessConst):
|
|
|
get account tasks from the database
|
|
|
"""
|
|
|
fetch_query = f"""
|
|
|
- select id, title_list, platform
|
|
|
+ select id, title_list, platform, account_id, account_name
|
|
|
from crawler_candidate_account_pool
|
|
|
where avg_score is null and status = {self.INIT_STATUS} and title_list is not null;
|
|
|
"""
|
|
@@ -93,6 +96,20 @@ class CandidateAccountQualityScoreRecognizer(CandidateAccountProcessConst):
|
|
|
update_query, (new_status, account_id, ori_status)
|
|
|
)
|
|
|
|
|
|
+ async def insert_account_into_crawler_queue(self, score_list: List[int], account: dict) -> None:
|
|
|
+ """
|
|
|
+ 计算账号的得分置信区间下限,若置信区间下限的分数大于阈值,则认为是好的账号
|
|
|
+ """
|
|
|
+ if ci_lower(score_list) > self.AVG_SCORE_THRESHOLD:
|
|
|
+ query = f"""
|
|
|
+ insert into article_meta_accounts (platform, account_id, account_name, account_source, status)
|
|
|
+ values (%s, %s, %s, %s, %s);
|
|
|
+ """
|
|
|
+ await self.pool.async_save(
|
|
|
+ query=query,
|
|
|
+ params=(account["platform"], account["account_id"], account["account_name"], 'ai_recognize', self.ACCOUNT_GOOD_STATUS)
|
|
|
+ )
|
|
|
+
|
|
|
async def score_for_each_account_by_llm(self, account):
|
|
|
account_id = account["id"]
|
|
|
# lock
|
|
@@ -141,6 +158,8 @@ class CandidateAccountQualityScoreRecognizer(CandidateAccountProcessConst):
|
|
|
self.SUCCESS_STATUS,
|
|
|
),
|
|
|
)
|
|
|
+ # 判断置信区间下限, 并且插入账号
|
|
|
+ await self.insert_account_into_crawler_queue(score_list=completion, account=account)
|
|
|
|
|
|
except Exception as e:
|
|
|
await self.log_client.log(
|