5 bulan lalu · be5b2b3297
--- a/enums/__init__.py
+++ b/enums/__init__.py
--- a/enums/automation_job.py
+++ b/enums/automation_job.py
@@ -0,0 +1,18 @@
 
				+from enum import Enum
			
 
				+
			
 
				+
			
 
				+class AutomationJobCronInfo(Enum):
			
 
				+    account_top = ("account_top", "top", 9)
			
 
				+    account_extend_top = ("account_extend_top", "top", 9)
			
 
				+    channel_topic_top = ("channel_topic_top", "top", 9)
			
 
				+    channel_topic_extend_top = ("channel_topic_extend_top", "top", 9)
			
 
				+    channel_image_search_video_top = ("channel_image_search_video_top", "top", 9)
			
 
				+    channel_image_search_topic_top = ("channel_image_search_topic_top", "top", 9)
			
 
				+    channel_image_search_topic_extend_top = ("channel_image_search_topic_extend_top", "top", 9)
			
 
				+    channel_image_search_video_all_cate_top = ("channel_image_search_video_all_cate_top", "top", 9)
			
 
				+    video_decode_accurate_text_top = ("video_decode_accurate_text_top", "top", 9)
			
 
				+
			
 
				+    def __init__(self, crawler_mode, video_source, min_hour):
			
 
				+        self.crawler_mode = crawler_mode
			
 
				+        self.video_source = video_source
			
 
				+        self.min_hour = min_hour
			
--- a/monitor/automation_provide_job_monitor.py
+++ b/monitor/automation_provide_job_monitor.py
@@ -4,6 +4,7 @@ from typing import List, Tuple
 
				 from aliyun.log import LogClient
			
 
				 from aliyun.log.auth import AUTH_VERSION_4
			
 
				 
			
 
				+from enums.automation_job import AutomationJobCronInfo
			
 
				 from util import feishu_inform_util
			
 
				 
			
 
				 endpoint = "cn-hangzhou.log.aliyuncs.com"
			
@@ -16,12 +17,6 @@ state_query_sql = "* | select crawlerMode, result, if(reason='null', '成功', r
 
				 client = LogClient(endpoint=endpoint, accessKey=access_key, accessKeyId=access_key_id, auth_version=AUTH_VERSION_4, region='cn-hangzhou')
			
 
				 webhook = 'https://open.feishu.cn/open-apis/bot/v2/hook/9f5c5cce-5eb2-4731-b368-33926f5549f9'
			
 
				 
			
 
				-all_crawler_mode_list = [
			
 
				-    "account", "account_extend", "channel_topic", "channel_topic_extend", "channel_image_search_video",
			
 
				-    "channel_image_search_topic", "channel_image_search_topic_extend", "channel_image_search_video_all_cate",
			
 
				-    "video_decode_accurate_text"
			
 
				-]
			
 
				-
			
 
				 card_json = {
			
 
				     "schema": "2.0",
			
 
				     "header": {
			
@@ -70,8 +65,9 @@ def job_run_state(start_ts: int, end_ts: int):
 
				     resp = client.get_log(project=project, logstore=log_store, from_time=start_ts, to_time=end_ts, query=state_query_sql)
			
 
				     log_data = resp.get_body().get('data')
			
 
				 
			
 
				+    all_crawler_mode = list(dict.fromkeys([cron_info.crawler_mode for cron_info in AutomationJobCronInfo]))
			
 
				     collapsible_limit = 5
			
 
				-    crawler_mode_group = [all_crawler_mode_list[i:i + collapsible_limit] for i in range(0, len(all_crawler_mode_list), collapsible_limit)]
			
 
				+    crawler_mode_group = [all_crawler_mode[i:i + collapsible_limit] for i in range(0, len(all_crawler_mode), collapsible_limit)]
			
 
				     for crawler_mode_partition in crawler_mode_group:
			
 
				         elements = []
			
 
				         for crawler_mode in crawler_mode_partition:
			
@@ -118,22 +114,14 @@ def main():
 
				 
			
 
				     job_run_state(start_ts, end_ts)
			
 
				 
			
 
				-    # 历史爆款
			
 
				-    video_source_list = ["history"]
			
 
				-    history_crawler_mode_list = ["account_extend"]
			
 
				-    # 九点半之后统计每日爆款
			
 
				-    if today.hour >= 9 and today.minute >= 30:
			
 
				-        video_source_list.append("top")
			
 
				+    current_hour = today.hour
			
 
				 
			
 
				     crawler_mode_and_video_source_list = []
			
 
				-    for crawler_mode in all_crawler_mode_list:
			
 
				-        for video_source in video_source_list:
			
 
				-            if video_source == "history":
			
 
				-                if crawler_mode not in history_crawler_mode_list:
			
 
				-                    continue
			
 
				-                crawler_mode_and_video_source_list.append((crawler_mode, video_source))
			
 
				-            else:
			
 
				-                crawler_mode_and_video_source_list.append((crawler_mode, video_source))
			
 
				+    for cron_info in AutomationJobCronInfo:
			
 
				+        if cron_info.min_hour < current_hour:
			
 
				+            continue
			
 
				+
			
 
				+        crawler_mode_and_video_source_list.append((cron_info.crawler_mode, cron_info.video_source))
			
 
				 
			
 
				     crawler_mode_not_success_warning(start_ts, end_ts, crawler_mode_and_video_source_list)
			
 
				 
			
--- a/script/crawler_plan_filter_update.py
+++ b/script/crawler_plan_filter_update.py
@@ -0,0 +1,23 @@
 
				+import json
			
 
				+
			
 
				+from client.AIGCClient import AIGCClient
			
 
				+
			
 
				+aigc_client = AIGCClient(token="8bf14f27fc3a486788f3383452422d72", base_url="https://aigc-api.aiddit.com")
			
 
				+
			
 
				+log_list = []
			
 
				+with open("/Users/zhao/Desktop/1.json", 'r') as f:
			
 
				+    line = f.readline()
			
 
				+    while line:
			
 
				+        log_list.append(json.loads(line))
			
 
				+        line = f.readline()
			
 
				+
			
 
				+for log in log_list:
			
 
				+    crawler_plan_id = log.get("crawlerPlanId")
			
 
				+
			
 
				+    error_msg, crawler_plan_info = aigc_client.get_content_crawler_plan_by_id(crawler_plan_id)
			
 
				+    if error_msg:
			
 
				+        print(f"获取 {crawler_plan_id} 的爬取信息异常")
			
 
				+        continue
			
 
				+
			
 
				+    update_data = crawler_plan_info['updateData']
			
 
				+    update_data['accountFilters'] = []