Browse Source

Update task1: paging in get_task

StrayWarrior 5 months ago
parent
commit
cfc82db52c
2 changed files with 13 additions and 5 deletions
  1. 11 4
      tasks/task1.py
  2. 2 1
      tasks/task3.py

+ 11 - 4
tasks/task1.py

@@ -8,6 +8,8 @@ from applications.schedule import search_videos
 from applications.functions.log import logging
 from static.config import spider_coroutines
 
+# Temporary solution for task dead-lock
+g_values = {'row_offset': 0}
 
 class MatchTask1(object):
     """
@@ -27,17 +29,21 @@ class MatchTask1(object):
         """
         select_limit = spider_coroutines * 5
         select_sql1 = f"""
-            SELECT content_id
+            SELECT content_id, id
             FROM {db_article} 
             WHERE content_status = 0 and process_times <= 3
-            ORDER BY request_time_stamp
-            ASC
+            AND id >= {g_values['row_offset']}
+            ORDER BY id
             LIMIT {select_limit};
         """
         content_ids = await self.mysql_client.async_select(select_sql1)
         unique_content_ids = set()
         for content_id in content_ids:
             unique_content_ids.add(content_id[0])
+        if not unique_content_ids:
+            return []
+        g_values['row_offset'] = content_ids[-1][1]
+        print(f"update row offset to: {g_values['row_offset']}")
         unique_content_ids = list(unique_content_ids)[0:spider_coroutines]
         content_ids_tuple = str(unique_content_ids).replace("[", "(").replace("]", ")")
         if len(content_ids_tuple) > 3:
@@ -62,8 +68,9 @@ class MatchTask1(object):
             ]
             logging(
                 code="9001",
+                function="task1.get_task",
                 info="本次任务获取到 {} 条视频".format(len(task_obj_list)),
-                data=task_obj_list
+                data=[x['content_id'] for x in task_obj_list]
             )
             return task_obj_list
         else:

+ 2 - 1
tasks/task3.py

@@ -58,8 +58,9 @@ class MatchTask3(object):
         ]
         logging(
             code="9001",
+            function="task3.get_task",
             info="本次任务获取到 {} 条视频".format(len(task_obj_list)),
-            data=task_obj_list
+            data=[x['content_id'] for x in task_obj_list]
         )
         return task_obj_list