|
@@ -8,6 +8,8 @@ from applications.schedule import search_videos
|
|
|
from applications.functions.log import logging
|
|
|
from static.config import spider_coroutines
|
|
|
|
|
|
+# Temporary solution for task dead-lock
|
|
|
+g_values = {'row_offset': 0}
|
|
|
|
|
|
class MatchTask1(object):
|
|
|
"""
|
|
@@ -25,19 +27,30 @@ class MatchTask1(object):
|
|
|
获取任务
|
|
|
:return:
|
|
|
"""
|
|
|
+ select_limit = spider_coroutines * 100
|
|
|
select_sql1 = f"""
|
|
|
- SELECT DISTINCT (content_id)
|
|
|
+ SELECT content_id, id
|
|
|
FROM {db_article}
|
|
|
WHERE content_status = 0 and process_times <= 3
|
|
|
- ORDER BY request_time_stamp
|
|
|
- ASC
|
|
|
- LIMIT {spider_coroutines};
|
|
|
+ AND id >= {g_values['row_offset']}
|
|
|
+ ORDER BY id
|
|
|
+ LIMIT {select_limit};
|
|
|
"""
|
|
|
content_ids = await self.mysql_client.async_select(select_sql1)
|
|
|
- cil = []
|
|
|
+ unique_content_ids = set()
|
|
|
for content_id in content_ids:
|
|
|
- cil.append(content_id[0])
|
|
|
- content_ids_tuple = str(cil).replace("[", "(").replace("]", ")")
|
|
|
+ unique_content_ids.add(content_id[0])
|
|
|
+ if not unique_content_ids:
|
|
|
+ return []
|
|
|
+ g_values['row_offset'] = content_ids[-1][1]
|
|
|
+ print(f"update row offset to: {g_values['row_offset']}")
|
|
|
+ unique_content_ids = list(unique_content_ids)[0:spider_coroutines]
|
|
|
+ logging(
|
|
|
+ code=9001,
|
|
|
+ function="task1.get_task",
|
|
|
+ info=f"unique content ids in batch: {len(unique_content_ids)}"
|
|
|
+ )
|
|
|
+ content_ids_tuple = str(unique_content_ids).replace("[", "(").replace("]", ")")
|
|
|
if len(content_ids_tuple) > 3:
|
|
|
select_sql = f"""
|
|
|
SELECT trace_id, content_id, gh_id, article_title, article_text, content_status, process_times
|
|
@@ -60,8 +73,9 @@ class MatchTask1(object):
|
|
|
]
|
|
|
logging(
|
|
|
code="9001",
|
|
|
+ function="task1.get_task",
|
|
|
info="本次任务获取到 {} 条视频".format(len(task_obj_list)),
|
|
|
- data=task_obj_list
|
|
|
+ data=[x['content_id'] for x in task_obj_list]
|
|
|
)
|
|
|
return task_obj_list
|
|
|
else:
|