|
@@ -48,9 +48,20 @@ class MatchTask1(object):
|
|
|
logging(
|
|
|
code=9001,
|
|
|
function="task1.get_task",
|
|
|
- info=f"unique content ids in batch: {len(unique_content_ids)}"
|
|
|
+ info=f"unique content ids: {len(unique_content_ids)}"
|
|
|
)
|
|
|
- content_ids_tuple = str(unique_content_ids).replace("[", "(").replace("]", ")")
|
|
|
+ content_ids_to_process = []
|
|
|
+ for content_id in unique_content_ids:
|
|
|
+ history_videos = await self.get_history_videos(content_id)
|
|
|
+ if not history_videos:
|
|
|
+ content_ids_to_process.append(content_id)
|
|
|
+ content_ids_to_process = content_ids_to_process[0:spider_coroutines]
|
|
|
+ logging(
|
|
|
+ code=9001,
|
|
|
+ function="task1.get_task",
|
|
|
+ info=f"content ids to process: {len(content_ids_to_process)}"
|
|
|
+ )
|
|
|
+ content_ids_tuple = str(content_ids_to_process).replace("[", "(").replace("]", ")")
|
|
|
if len(content_ids_tuple) > 3:
|
|
|
select_sql = f"""
|
|
|
SELECT trace_id, content_id, gh_id, article_title, article_text, content_status, process_times
|