|
@@ -8,6 +8,8 @@ from applications.schedule import search_videos
|
|
|
from applications.functions.log import logging
|
|
|
from static.config import spider_coroutines
|
|
|
|
|
|
+# Temporary solution for task dead-lock
|
|
|
+g_values = {'row_offset': 0}
|
|
|
|
|
|
class MatchTask1(object):
|
|
|
"""
|
|
@@ -27,17 +29,21 @@ class MatchTask1(object):
|
|
|
"""
|
|
|
select_limit = spider_coroutines * 5
|
|
|
select_sql1 = f"""
|
|
|
- SELECT content_id
|
|
|
+ SELECT content_id, id
|
|
|
FROM {db_article}
|
|
|
WHERE content_status = 0 and process_times <= 3
|
|
|
- ORDER BY request_time_stamp
|
|
|
- ASC
|
|
|
+ AND id >= {g_values['row_offset']}
|
|
|
+ ORDER BY id
|
|
|
LIMIT {select_limit};
|
|
|
"""
|
|
|
content_ids = await self.mysql_client.async_select(select_sql1)
|
|
|
unique_content_ids = set()
|
|
|
for content_id in content_ids:
|
|
|
unique_content_ids.add(content_id[0])
|
|
|
+ if not unique_content_ids:
|
|
|
+ return []
|
|
|
+ g_values['row_offset'] = content_ids[-1][1]
|
|
|
+ print(f"update row offset to: {g_values['row_offset']}")
|
|
|
unique_content_ids = list(unique_content_ids)[0:spider_coroutines]
|
|
|
content_ids_tuple = str(unique_content_ids).replace("[", "(").replace("]", ")")
|
|
|
if len(content_ids_tuple) > 3:
|
|
@@ -62,8 +68,9 @@ class MatchTask1(object):
|
|
|
]
|
|
|
logging(
|
|
|
code="9001",
|
|
|
+ function="task1.get_task",
|
|
|
info="本次任务获取到 {} 条视频".format(len(task_obj_list)),
|
|
|
- data=task_obj_list
|
|
|
+ data=[x['content_id'] for x in task_obj_list]
|
|
|
)
|
|
|
return task_obj_list
|
|
|
else:
|