|
@@ -54,7 +54,7 @@ class NewContentIdTask(object):
|
|
|
"content_status_update_time": item[1],
|
|
|
"process_times": item[2]
|
|
|
}
|
|
|
- for item in processing_articles[0]
|
|
|
+ for item in processing_articles
|
|
|
]
|
|
|
for obj in processing_list:
|
|
|
if int(time.time()) - obj['content_status_update_time'] >= 3600:
|
|
@@ -332,6 +332,7 @@ class NewContentIdTask(object):
|
|
|
"""
|
|
|
爬虫任务
|
|
|
:return:
|
|
|
+ todo: 任务执行之前加一个判断,判断是存在 3 条以上的视频已经被抓取
|
|
|
"""
|
|
|
spider_default_status = 1
|
|
|
spider_success_status = 2
|
|
@@ -339,6 +340,18 @@ class NewContentIdTask(object):
|
|
|
content_id = params['content_id']
|
|
|
process_times = params['process_times']
|
|
|
gh_id = params['gh_id']
|
|
|
+ select_sql = f"""
|
|
|
+ select count(id) from {self.article_crawler_video_table} where content_id = '{content_id}';
|
|
|
+ """
|
|
|
+ count_tuple = await self.mysql_client.async_select(select_sql)
|
|
|
+ counts = count_tuple[0][0]
|
|
|
+ if counts >= 3:
|
|
|
+ await self.update_content_status(
|
|
|
+ new_content_status=spider_success_status,
|
|
|
+ trace_id=trace_id,
|
|
|
+ ori_content_status=spider_default_status
|
|
|
+ )
|
|
|
+ return True
|
|
|
try:
|
|
|
# 开始处理,将状态由 1 改成 101
|
|
|
await self.update_content_status(
|
|
@@ -422,9 +435,8 @@ class NewContentIdTask(object):
|
|
|
ORDER BY score DESC;
|
|
|
"""
|
|
|
videos_need_to_download_tuple = await self.mysql_client.async_select(select_sql)
|
|
|
- videos_need_to_download_list = videos_need_to_download_tuple[0]
|
|
|
downloaded_count = 0
|
|
|
- for line in videos_need_to_download_list:
|
|
|
+ for line in videos_need_to_download_tuple:
|
|
|
params = {
|
|
|
"id": line[0],
|
|
|
"video_id": line[1],
|
|
@@ -590,12 +602,15 @@ class NewContentIdTask(object):
|
|
|
kimi_result = await self.kimi_task(params)
|
|
|
if kimi_result:
|
|
|
# 等待 kimi 操作执行完成之后,开始执行 spider_task
|
|
|
+ print("kimi success")
|
|
|
spider_flag = await self.spider_task(params=params, kimi_result=kimi_result)
|
|
|
if spider_flag:
|
|
|
# 等待爬虫执行完成后,开始执行 etl_task
|
|
|
+ print("spider success")
|
|
|
etl_flag = await self.etl_task(params)
|
|
|
if etl_flag:
|
|
|
# 等待下载上传完成,执行发布任务
|
|
|
+ print("etl success")
|
|
|
try:
|
|
|
await self.publish_task(params, kimi_result['kimi_title'])
|
|
|
except Exception as e:
|
|
@@ -640,6 +655,8 @@ class NewContentIdTask(object):
|
|
|
)
|
|
|
else:
|
|
|
await self.start_process(params=params)
|
|
|
+ else:
|
|
|
+ print("存在已下载视频")
|
|
|
|
|
|
async def deal(self):
|
|
|
"""
|
|
@@ -647,6 +664,7 @@ class NewContentIdTask(object):
|
|
|
:return:
|
|
|
"""
|
|
|
task_list = await self.get_tasks()
|
|
|
+ print(task_list)
|
|
|
logging(
|
|
|
code="5001",
|
|
|
info="Match Task Got {} this time".format(len(task_list)),
|