luojunhui 1 месяц назад
Родитель
Сommit
d67cbbae31

+ 4 - 0
applications/config/task_chinese_name.py

@@ -18,4 +18,8 @@ name_map = {
     "mini_program_detail_process": "更新小程序信息",
     "crawler_detail_analysis": "抓取详情分析",
     "limited_account_analysis": "限流账号分析处理",
+    "auto_follow_account": "自动关注账号",
+    "update_account_open_rate_avg": "更新账号平均打开率",
+    "update_limited_account_info": "更新限流账号信息",
+    "update_account_read_avg": "更新账号平均阅读率",
 }

+ 1 - 0
applications/tasks/data_recycle_tasks/recycle_daily_publish_articles.py

@@ -46,6 +46,7 @@ class Const:
         "gh_5ae65db96cb7",
         "gh_72bace6b3059",
         "gh_dd4c857bbb36",
+        "gh_ff487cb5dab3",
     ]
 
     # NOT USED SERVER ACCOUNT

+ 125 - 24
applications/tasks/monitor_tasks/auto_reply_cards_monitor.py

@@ -154,6 +154,7 @@ class AutoReplyCardsMonitorUtils(AutoReplyCardsMonitorConst):
             GROUP BY 公众号名, ghid
             HAVING uv > 100
             ORDER BY uv DESC
+            LIMIT 10
             ;
         """
         result = fetch_from_odps(query)
@@ -173,18 +174,10 @@ class AutoReplyCardsMonitorMapper(AutoReplyCardsMonitorUtils):
         self.pool = pool
         self.log_client = log_client
 
-    # 获取关注公众号任务结果
-    async def get_follow_account_task_result(self, task_id):
-        pass
-
-    # 创建自动回复任务
-    async def create_auto_reply_task(self):
-        pass
-
     # 获取自动回复任务结果
     async def get_auto_reply_task_result(self, task_id):
         query = """
-            SELECT task_result, task_status, err_msg,from_unixtime(update_timestamp / 1000) AS update_time 
+            SELECT task_result, task_status, err_msg, update_timestamp
             FROM gzh_msg_record
             WHERE task_id = %s;
         """
@@ -192,18 +185,6 @@ class AutoReplyCardsMonitorMapper(AutoReplyCardsMonitorUtils):
             query=query, params=(task_id,), db_name="aigc"
         )
 
-    # 获取关注公众号任务列表
-    async def get_follow_account_task_list(self):
-        pass
-
-    # 获取自动回复任务列表
-    async def get_auto_reply_task_list(self):
-        pass
-
-    # 插入待关注公众号
-    async def insert_accounts_task(self, account_name, gh_id):
-        pass
-
     # 查询账号
     async def fetch_account_status(self, account_name):
         query = """
@@ -271,13 +252,69 @@ class AutoReplyCardsMonitorMapper(AutoReplyCardsMonitorUtils):
             query=query, params=(f"follow_{gh_id}",), db_name="aigc"
         )
 
+    # 创建自动回复任务
+    async def create_auto_reply_task(self, task_id, gh_id):
+        query = """
+            INSERT INTO cooperate_accounts_task (task_id, gh_id) VALUES (%s, %s);
+        """
+        return await self.pool.async_save(query=query, params=(task_id, gh_id))
+
+    async def update_auto_reply_task_status(
+        self, task_id, status_type, ori_status, new_status
+    ):
+        task_query = """
+            UPDATE cooperate_accounts_task SET task_status = %s WHERE task_id = %s AND task_status = %s;
+        """
+        extract_query = """
+            UPDATE cooperate_accounts_task SET extract_status = %s WHERE task_id = %s AND extract_status = %s;
+        """
+        match status_type:
+            case "task":
+                return await self.pool.async_save(
+                    query=task_query, params=(new_status, task_id, ori_status)
+                )
+            case "extract":
+                return await self.pool.async_save(
+                    query=extract_query, params=(new_status, task_id, ori_status)
+                )
+            case _:
+                print("status_type_error")
+                return None
+
+    # 获取正在自动回复卡片的任务 id
+    async def fetch_auto_replying_tasks(self):
+        query = """
+            SELECT task_id FROM cooperate_accounts_task WHERE task_status = %s;
+        """
+        return await self.pool.async_fetch(
+            query=query, params=(self.PROCESSING_STATUS,)
+        )
+
+    # 设置自动回复结果
+    async def set_auto_reply_result(self, task_id, finish_timestamp, result):
+        query = """
+            UPDATE cooperate_accounts_task 
+            SET finish_timestamp = %s, result = %s, task_status = %s
+            WHERE task_id = %s and task_status = %s;
+        """
+        return await self.pool.async_save(
+            query=query,
+            params=(
+                finish_timestamp,
+                result,
+                self.SUCCESS_STATUS,
+                task_id,
+                self.PROCESSING_STATUS,
+            ),
+        )
+
 
 class AutoReplyCardsMonitor(AutoReplyCardsMonitorMapper):
     def __init__(self, pool, log_client):
         super().__init__(pool, log_client)
 
     # 创建单个关注公众号任务
-    async def create_follow_account_task(self, gh_id):
+    async def create_follow_single_account_task(self, gh_id):
         response = await get_article_list_from_account(account_id=gh_id)
         code = response.get("code")
         match code:
@@ -304,6 +341,24 @@ class AutoReplyCardsMonitor(AutoReplyCardsMonitorMapper):
             case _:
                 pass
 
+    # 创建单个账号自动回复任务
+    async def create_auto_reply_single_account_task(self, gh_id, account_name):
+        task_id = self.generate_task_id(task_name="auto_reply", gh_id=gh_id)
+        # 先插入 task, 再创建自动回复任务
+        create_row = await self.create_auto_reply_task(task_id, gh_id)
+        if create_row:
+            affected_rows = await self.insert_aigc_auto_reply_task(
+                task_id, account_name
+            )
+            if not affected_rows:
+                print("发布任务至 AIGC 失败")
+            else:
+                await self.update_auto_reply_task_status(
+                    task_id, "task", self.INIT_STATUS, self.PROCESSING_STATUS
+                )
+        else:
+            print("创建任务至 DB 失败")
+
     async def follow_gzh_task(self):
         account_list = self.get_monitor_account_list()
         for account in account_list:
@@ -326,7 +381,7 @@ class AutoReplyCardsMonitor(AutoReplyCardsMonitorMapper):
 
                     match follow_status:
                         case self.INIT_STATUS:
-                            await self.create_follow_account_task(
+                            await self.create_follow_single_account_task(
                                 account_detail["gh_id"]
                             )
 
@@ -361,7 +416,10 @@ class AutoReplyCardsMonitor(AutoReplyCardsMonitorMapper):
                                     )
 
                         case self.SUCCESS_STATUS:
-                            continue
+                            # 账号已经关注,创建获取自动回复任务
+                            await self.create_auto_reply_single_account_task(
+                                account_detail["gh_id"], account.公众号名
+                            )
 
                         case _:
                             print(f"{account.公众号名}账号状态异常")
@@ -369,11 +427,54 @@ class AutoReplyCardsMonitor(AutoReplyCardsMonitorMapper):
             except Exception as e:
                 print(f"处理账号{account.公众号名}异常", e)
 
+    # 异步获取关注结果
+    async def get_auto_reply_response(self):
+        task_list = await self.fetch_auto_replying_tasks()
+        if not task_list:
+            return
+
+        for task in task_list:
+            try:
+                task_id = task["task_id"]
+                response = await self.get_auto_reply_task_result(task_id)
+                if not response:
+                    continue
+
+                task_status = response[0]["task_status"]
+                task_result = response[0]["task_result"]
+                update_timestamp = response[0]["update_timestamp"]
+                match task_status:
+                    case self.FETCH_FAIL_STATUS:
+                        await self.update_auto_reply_task_status(
+                            task_id, "task", self.PROCESSING_STATUS, self.FAIL_STATUS
+                        )
+
+                    case self.FETCH_SUCCESS_STATUS:
+                        await self.set_auto_reply_result(
+                            task_id, update_timestamp, task_result
+                        )
+
+                    case _:
+                        continue
+
+            except Exception as e:
+                print(e)
+
+    # 解析 xml 并且更新数据
+    async def extract_task(self):
+        pass
+
     # main function
     async def deal(self, task_name):
         match task_name:
             case "follow_gzh_task":
                 await self.follow_gzh_task()
 
+            case "get_auto_reply_task":
+                await self.get_auto_reply_response()
+
+            case "extract_task":
+                await self.extract_task()
+
             case _:
                 print("task_error")

+ 32 - 9
dev/crontab_back.txt

@@ -1,3 +1,8 @@
+
+0 3 * * * curl -X POST http://192.168.100.31:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "crawler_gzh_articles", "account_method": "1030-手动挑号", "crawl_mode": "account", "strategy": "V1"}'
+
+0 4 * * * curl -X POST http://192.168.100.31:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "crawler_gzh_articles", "account_method": "cooperate_account", "crawl_mode": "account", "strategy": "V1"}'
+
 # 定时清理文件
 0 1 * * * find /root/luojunhui/LongArticlesJob/static -type f -name "*.mp4" -mtime +5 -delete
 # 每天 9 点, 18 点执行 gzh 视频抓取
@@ -36,13 +41,15 @@
 30 10 * * * bash /root/luojunhui/LongArticlesJob/sh/run_article_title_exit_v1.sh
 
 # 每天上午4点执行账号冷启动任务
-0 1 * * * bash /root/luojunhui/LongArticlesJob/sh/run_account_cold_start_daily.sh
+# 0 1 * * * bash /root/luojunhui/LongArticlesJob/sh/run_account_cold_start_daily.sh
 
 # 晚上6点执行头条文章冷启动
 0 18 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "article_pool_cold_start", "platform": "toutiao", "crawler_methods": ["toutiao_account_association"]}'
 
 # 17:50执行公众号文章战冷启动
-30 17 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "article_pool_cold_start", "strategy": "strategy_v2"}'
+30 17 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "article_pool_cold_start", "strategy": "strategy_v3"}'
+
+0 4 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "article_pool_cold_start", "strategy": "strategy_v1"}'
 
 # 早上执行sohu 抓取
 0 6 * * * bash /root/luojunhui/LongArticlesJob/sh/run_schedule_app.sh
@@ -55,10 +62,20 @@
 0 9 * * * bash /root/luojunhui/LongArticlesJob/sh/run_account_association.sh
 
 # 每天 10 点执行前一天的阅读率均值代码
-0 10 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_account_read_rate_avg.sh
+# 0 10 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_account_read_rate_avg.sh
 
 # 每天10点40执行阅读均值任务
-40 10 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_account_avg_v3.sh
+# 40 10 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_account_avg_v3.sh
+
+# 执行阅读率均值
+0 10 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_account_read_rate_avg"}'
+
+# 执行阅读均值
+40 10 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_account_read_avg"}'
+
+# 执行打开率均值
+50 10 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_account_open_rate_avg"}'
+
 
 # 每天11点执行文章联想任务
 0 11 * * * bash /root/luojunhui/LongArticlesJob/sh/run_article_association.sh
@@ -77,8 +94,12 @@
 # 20 14 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_published_articles_daily.sh
 # 50 20 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_published_articles_daily.sh
 
-0 8,15,21 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "daily_publish_articles_recycle"}'
+30 8,15 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "daily_publish_articles_recycle"}'
+
+0 21 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "daily_publish_articles_recycle"}'
+
 10 22 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "daily_publish_articles_recycle"}'
+
 30 9,16,21 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_root_source_id"}'
 
 30 22 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_root_source_id"}'
@@ -110,19 +131,21 @@
 # 每4h校验一次kimi余额
 25 */4 * * * curl -X POST http://127.0.0.1:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "check_kimi_balance"}'
 
-0 11,23 * * * bash /root/luojunhui/LongArticlesJob/sh/run_account_explore.sh
+# 0 11,23 * * * bash /root/luojunhui/LongArticlesJob/sh/run_account_explore.sh
 
 # 0 5,10,15,20 * * * bash /root/luojunhui/LongArticlesJob/sh/run_account_quality_analysis.sh
 
 # 更新小程序信息
-13 3,4,5 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "mini_program_detail_process"}'
+0 3,4,5,6,7,8 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "mini_program_detail_process"}'
 
 
-*/8 * * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "extract_title_features", "batch_size": 10, "version": 2}'
+# */8 * * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "extract_title_features", "batch_size": 50, "version": 2}'
 
 0 9,16 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "recycle_outside_account_articles"}'
 
-# 35 16 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_outside_account_article_root_source_id"}'
+35 16 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_outside_account_article_root_source_id"}'
 
+# 执行限流文章分析
+40 10 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "update_limited_account_info"}'
 # 候选账号质量分析
 0 5,10,15,20 * * * curl -X POST http://192.168.142.66:6060/api/run_task -H "Content-Type: application/json" -d '{"task_name": "candidate_account_quality_analysis"}'