Selaa lähdekoodia

category model 优化

luojunhui 1 kuukausi sitten
vanhempi
commit
f6b27c9b66

+ 25 - 23
app/core/config/settings/task_chinese_name.py

@@ -9,31 +9,33 @@ class TaskChineseNameConfig(BaseSettings):
     # 分类映射
     name_map: Dict[str, str] = Field(
         default_factory=lambda: {
-            "title_rewrite": "标题重写",
-            "crawler_gzh_articles": "抓取公众号文章",
-            "crawler_account_manager": "抓取账号管理",
-            "article_pool_category_generation": "文章池品类生成",
-            "candidate_account_quality_analysis": "候选账号质量分析",
-            "article_pool_cold_start": "文章路冷启动",
-            "crawler_toutiao": "头条抓取",
+            "title_rewrite": "LLM--标题重写",
+            "crawler_gzh_articles": "供给--抓取公众号文章",
+            "crawler_account_manager": "供给--抓取账号管理",
+            "article_pool_category_generation": "LLM--文章池品类生成",
+            "candidate_account_quality_analysis": "LLM--候选账号质量分析",
+            "article_pool_cold_start": "冷启动--文章内容池",
+            "crawler_toutiao": "供给--头条抓取",
             "task_processing_monitor": "协程监测",
-            "update_root_source_id": "更新今日root_source_id",
-            "daily_publish_articles_recycle": "回收今日发文",
-            "inner_article_monitor": "账号发文违规监测",
-            "outside_article_monitor": "外部服务号发文监测",
-            "get_off_videos": "自动下架视频",
-            "check_publish_video_audit_status": "校验发布视频状态",
+            "update_root_source_id": "数据--更新今日root_source_id",
+            "daily_publish_articles_recycle": "数据--回收今日发文",
+            "inner_article_monitor": "安全--账号发文违规监测",
+            "outside_article_monitor": "安全--外部服务号发文监测",
+            "get_off_videos": "安全--自动下架视频",
+            "check_publish_video_audit_status": "安全--校验发布视频状态",
             "check_kimi_balance": "检验kimi余额",
-            "account_category_analysis": "账号品类分析",
-            "mini_program_detail_process": "更新小程序信息",
-            "crawler_detail_analysis": "抓取详情分析",
-            "limited_account_analysis": "限流账号分析处理",
-            "auto_follow_account": "自动关注账号",
-            "update_account_open_rate_avg": "更新账号平均打开率",
-            "update_limited_account_info": "更新限流账号信息",
-            "update_account_read_avg": "更新账号平均阅读率",
-            "get_follow_result": "获取自动关注回复",
-            "extract_reply_result": "解析自动回复结果",
+            "account_category_analysis": "LLM--账号品类分析",
+            "mini_program_detail_process": "数据--更新小程序信息",
+            "crawler_detail_analysis": "供给--抓取详情分析",
+            "limited_account_analysis": "数据--限流账号分析处理",
+            "update_account_open_rate_avg": "数据--更新账号平均打开率",
+            "update_limited_account_info": "数据--更新限流账号信息",
+            "update_account_read_avg": "数据--更新账号平均阅读率",
+            "auto_follow_account": "合作方--自动回复--创建任务",
+            "get_follow_result": "合作方--自动回复--获取结果",
+            "extract_reply_result": "合作方--自动回复--解析结果",
+            "cooperate_accounts_detail": "合作方--Daily--解析文章详情",
+            "cooperate_accounts_monitor": "合作方--Daily--账号内文章监控",
         }
     )
 

+ 9 - 9
app/domains/monitor_tasks/gzh_article_monitor.py

@@ -239,10 +239,10 @@ class InnerGzhArticlesMonitor(MonitorConst):
         :return: bool
         """
         title_md5 = str_to_md5(title)
-        query = f"""
-            select title_md5 from article_unsafe_title where title_md5 = '{title_md5}';
+        query = """
+            select title_md5 from article_unsafe_title where title_md5 = %s;
         """
-        response = await self.pool.async_fetch(query=query)
+        response = await self.pool.async_fetch(query=query, params=(title_md5,))
         return True if response else False
 
     async def fetch_article_list_to_check(self, run_date: str = None) -> Optional[List]:
@@ -256,13 +256,13 @@ class InnerGzhArticlesMonitor(MonitorConst):
             datetime.datetime.strptime(run_date, "%Y-%m-%d").timestamp()
         )
         start_timestamp = run_timestamp - self.MONITOR_CYCLE
-        query = f"""
+        query = """
             select ghId, accountName, title, ContentUrl, wx_sn, from_unixtime(publish_timestamp) as publish_timestamp
             from official_articles_v2
-            where publish_timestamp >= {start_timestamp}
+            where publish_timestamp >= %s
             order by publish_timestamp desc;
         """
-        response = await self.pool.async_fetch(query=query, db_name="piaoquan_crawler")
+        response = await self.pool.async_fetch(query=query, db_name="piaoquan_crawler", params=(start_timestamp,))
         if not response:
             await feishu_robot.bot(
                 title="站内微信公众号发文监测任务异常",
@@ -287,10 +287,10 @@ class InnerGzhArticlesMonitor(MonitorConst):
             response_code = response["code"]
             if response_code == self.ARTICLE_ILLEGAL_CODE:
                 error_detail = article.get("msg")
-                query = f"""
-                    insert ignore into illegal_articles
+                query = """
+                    INSERT IGNORE INTO illegal_articles
                         (gh_id, account_name, title, wx_sn, publish_date, illegal_reason)
-                    values 
+                    VALUES 
                         (%s, %s, %s, %s, %s, %s); 
                 """
                 affected_row = await self.pool.async_save(