Browse Source

Merge branch '2024-12-06-luojunhui-improve-new-tasks' of Server/title_with_video into 2024-09-23newDbTasks

luojunhui 4 months ago
parent
commit
af031b3ca0
1 changed files with 12 additions and 4 deletions
  1. 12 4
      tasks/new_contentId_task.py

+ 12 - 4
tasks/new_contentId_task.py

@@ -62,12 +62,20 @@ class NewContentIdTask(object):
         # 获取  process_times <= 3 且  content_status = 0 的任务
         select_sql = f"""
             SELECT
-                trace_id, content_id, flow_pool_level, gh_id, process_times, publish_flag
+                t1.trace_id, t1.content_id, t1.flow_pool_level, t1.gh_id, t1.process_times, t1.publish_flag
             FROM
-                {self.article_match_video_table}
+                {self.article_match_video_table} t1
+            LEFT JOIN (
+                SELECT content_id, count(1) as cnt
+                FROM {self.article_crawler_video_table}
+                WHERE download_status = {NewContentIdTaskConst.VIDEO_DOWNLOAD_SUCCESS_STATUS}
+                GROUP BY content_id
+            ) t2
+            ON t1.content_id = t2.content_id
             WHERE
-                    content_status = {NewContentIdTaskConst.TASK_INIT_STATUS}
-                and process_times <= {NewContentIdTaskConst.TASK_MAX_PROCESS_TIMES}
+                    t1.content_status = {NewContentIdTaskConst.TASK_INIT_STATUS}
+                AND t1.process_times <= {NewContentIdTaskConst.TASK_MAX_PROCESS_TIMES}
+                AND t2.cnt IS NULL
             ORDER BY flow_pool_level, request_timestamp
             LIMIT {self.spider_coroutines};
         """