浏览代码

Update task1: re-process skipped contents

StrayWarrior 5 月之前
父节点
当前提交
9120e57703
共有 1 个文件被更改,包括 8 次插入1 次删除
  1. 8 1
      tasks/task1.py

+ 8 - 1
tasks/task1.py

@@ -9,7 +9,7 @@ from applications.functions.log import logging
 from static.config import spider_coroutines
 
 # Temporary solution for task dead-lock
-g_values = {'row_offset': 0}
+g_values = {'row_offset': 0, 'skip_num': 0}
 
 class MatchTask1(object):
     """
@@ -41,6 +41,10 @@ class MatchTask1(object):
         for content_id in content_ids:
             unique_content_ids.add(content_id[0])
         if not unique_content_ids:
+            if g_values['skip_num'] > 0:
+                logging(code=9001, function="task1.get_task", info="reset row offset to 0")
+                g_values['row_offset'] = 0
+                g_values['skip_num'] = 0
             return []
         g_values['row_offset'] = content_ids[-1][1]
         print(f"update row offset to: {g_values['row_offset']}")
@@ -54,6 +58,9 @@ class MatchTask1(object):
             history_videos = await self.get_history_videos(content_id)
             if not history_videos:
                 content_ids_to_process.append(content_id)
+        if spider_coroutines > len(content_ids_to_process):
+            logging(code=9001, function="task1.get_task", info="some content is skipped, process it later")
+            g_values['skip_num'] = 1
         content_ids_to_process = content_ids_to_process[0:spider_coroutines]
         logging(
             code=9001,