|
@@ -9,7 +9,7 @@ from applications.functions.log import logging
|
|
from static.config import spider_coroutines
|
|
from static.config import spider_coroutines
|
|
|
|
|
|
# Temporary solution for task dead-lock
|
|
# Temporary solution for task dead-lock
|
|
-g_values = {'row_offset': 0}
|
|
|
|
|
|
+g_values = {'row_offset': 0, 'skip_num': 0}
|
|
|
|
|
|
class MatchTask1(object):
|
|
class MatchTask1(object):
|
|
"""
|
|
"""
|
|
@@ -41,6 +41,10 @@ class MatchTask1(object):
|
|
for content_id in content_ids:
|
|
for content_id in content_ids:
|
|
unique_content_ids.add(content_id[0])
|
|
unique_content_ids.add(content_id[0])
|
|
if not unique_content_ids:
|
|
if not unique_content_ids:
|
|
|
|
+ if g_values['skip_num'] > 0:
|
|
|
|
+ logging(code=9001, function="task1.get_task", info="reset row offset to 0")
|
|
|
|
+ g_values['row_offset'] = 0
|
|
|
|
+ g_values['skip_num'] = 0
|
|
return []
|
|
return []
|
|
g_values['row_offset'] = content_ids[-1][1]
|
|
g_values['row_offset'] = content_ids[-1][1]
|
|
print(f"update row offset to: {g_values['row_offset']}")
|
|
print(f"update row offset to: {g_values['row_offset']}")
|
|
@@ -54,6 +58,9 @@ class MatchTask1(object):
|
|
history_videos = await self.get_history_videos(content_id)
|
|
history_videos = await self.get_history_videos(content_id)
|
|
if not history_videos:
|
|
if not history_videos:
|
|
content_ids_to_process.append(content_id)
|
|
content_ids_to_process.append(content_id)
|
|
|
|
+ if spider_coroutines > len(content_ids_to_process):
|
|
|
|
+ logging(code=9001, function="task1.get_task", info="some content is skipped, process it later")
|
|
|
|
+ g_values['skip_num'] = 1
|
|
content_ids_to_process = content_ids_to_process[0:spider_coroutines]
|
|
content_ids_to_process = content_ids_to_process[0:spider_coroutines]
|
|
logging(
|
|
logging(
|
|
code=9001,
|
|
code=9001,
|