Jelajahi Sumber

feat:解构任务修改

zhaohaipeng 1 bulan lalu
induk
melakukan
be5b2b3297

+ 0 - 0
enums/__init__.py


+ 18 - 0
enums/automation_job.py

@@ -0,0 +1,18 @@
+from enum import Enum
+
+
+class AutomationJobCronInfo(Enum):
+    account_top = ("account_top", "top", 9)
+    account_extend_top = ("account_extend_top", "top", 9)
+    channel_topic_top = ("channel_topic_top", "top", 9)
+    channel_topic_extend_top = ("channel_topic_extend_top", "top", 9)
+    channel_image_search_video_top = ("channel_image_search_video_top", "top", 9)
+    channel_image_search_topic_top = ("channel_image_search_topic_top", "top", 9)
+    channel_image_search_topic_extend_top = ("channel_image_search_topic_extend_top", "top", 9)
+    channel_image_search_video_all_cate_top = ("channel_image_search_video_all_cate_top", "top", 9)
+    video_decode_accurate_text_top = ("video_decode_accurate_text_top", "top", 9)
+
+    def __init__(self, crawler_mode, video_source, min_hour):
+        self.crawler_mode = crawler_mode
+        self.video_source = video_source
+        self.min_hour = min_hour

+ 9 - 21
monitor/automation_provide_job_monitor.py

@@ -4,6 +4,7 @@ from typing import List, Tuple
 from aliyun.log import LogClient
 from aliyun.log.auth import AUTH_VERSION_4
 
+from enums.automation_job import AutomationJobCronInfo
 from util import feishu_inform_util
 
 endpoint = "cn-hangzhou.log.aliyuncs.com"
@@ -16,12 +17,6 @@ state_query_sql = "* | select crawlerMode, result, if(reason='null', '成功', r
 client = LogClient(endpoint=endpoint, accessKey=access_key, accessKeyId=access_key_id, auth_version=AUTH_VERSION_4, region='cn-hangzhou')
 webhook = 'https://open.feishu.cn/open-apis/bot/v2/hook/9f5c5cce-5eb2-4731-b368-33926f5549f9'
 
-all_crawler_mode_list = [
-    "account", "account_extend", "channel_topic", "channel_topic_extend", "channel_image_search_video",
-    "channel_image_search_topic", "channel_image_search_topic_extend", "channel_image_search_video_all_cate",
-    "video_decode_accurate_text"
-]
-
 card_json = {
     "schema": "2.0",
     "header": {
@@ -70,8 +65,9 @@ def job_run_state(start_ts: int, end_ts: int):
     resp = client.get_log(project=project, logstore=log_store, from_time=start_ts, to_time=end_ts, query=state_query_sql)
     log_data = resp.get_body().get('data')
 
+    all_crawler_mode = list(dict.fromkeys([cron_info.crawler_mode for cron_info in AutomationJobCronInfo]))
     collapsible_limit = 5
-    crawler_mode_group = [all_crawler_mode_list[i:i + collapsible_limit] for i in range(0, len(all_crawler_mode_list), collapsible_limit)]
+    crawler_mode_group = [all_crawler_mode[i:i + collapsible_limit] for i in range(0, len(all_crawler_mode), collapsible_limit)]
     for crawler_mode_partition in crawler_mode_group:
         elements = []
         for crawler_mode in crawler_mode_partition:
@@ -118,22 +114,14 @@ def main():
 
     job_run_state(start_ts, end_ts)
 
-    # 历史爆款
-    video_source_list = ["history"]
-    history_crawler_mode_list = ["account_extend"]
-    # 九点半之后统计每日爆款
-    if today.hour >= 9 and today.minute >= 30:
-        video_source_list.append("top")
+    current_hour = today.hour
 
     crawler_mode_and_video_source_list = []
-    for crawler_mode in all_crawler_mode_list:
-        for video_source in video_source_list:
-            if video_source == "history":
-                if crawler_mode not in history_crawler_mode_list:
-                    continue
-                crawler_mode_and_video_source_list.append((crawler_mode, video_source))
-            else:
-                crawler_mode_and_video_source_list.append((crawler_mode, video_source))
+    for cron_info in AutomationJobCronInfo:
+        if cron_info.min_hour < current_hour:
+            continue
+
+        crawler_mode_and_video_source_list.append((cron_info.crawler_mode, cron_info.video_source))
 
     crawler_mode_not_success_warning(start_ts, end_ts, crawler_mode_and_video_source_list)
 

+ 23 - 0
script/crawler_plan_filter_update.py

@@ -0,0 +1,23 @@
+import json
+
+from client.AIGCClient import AIGCClient
+
+aigc_client = AIGCClient(token="8bf14f27fc3a486788f3383452422d72", base_url="https://aigc-api.aiddit.com")
+
+log_list = []
+with open("/Users/zhao/Desktop/1.json", 'r') as f:
+    line = f.readline()
+    while line:
+        log_list.append(json.loads(line))
+        line = f.readline()
+
+for log in log_list:
+    crawler_plan_id = log.get("crawlerPlanId")
+
+    error_msg, crawler_plan_info = aigc_client.get_content_crawler_plan_by_id(crawler_plan_id)
+    if error_msg:
+        print(f"获取 {crawler_plan_id} 的爬取信息异常")
+        continue
+
+    update_data = crawler_plan_info['updateData']
+    update_data['accountFilters'] = []