il y a 9 mois · cfb2236d73
--- a/tasks/newContentIdTask.py
+++ b/tasks/newContentIdTask.py
@@ -54,7 +54,7 @@ class NewContentIdTask(object):
 
				                     "content_status_update_time": item[1],
			
 
				                     "process_times": item[2]
			
 
				                 }
			
 
				-                for item in processing_articles[0]
			
 
				+                for item in processing_articles
			
 
				             ]
			
 
				             for obj in processing_list:
			
 
				                 if int(time.time()) - obj['content_status_update_time'] >= 3600:
			
@@ -332,6 +332,7 @@ class NewContentIdTask(object):
 
				         """
			
 
				         爬虫任务
			
 
				         :return:
			
 
				+        todo: 任务执行之前加一个判断，判断是存在 3 条以上的视频已经被抓取
			
 
				         """
			
 
				         spider_default_status = 1
			
 
				         spider_success_status = 2
			
@@ -339,6 +340,18 @@ class NewContentIdTask(object):
 
				         content_id = params['content_id']
			
 
				         process_times = params['process_times']
			
 
				         gh_id = params['gh_id']
			
 
				+        select_sql = f"""
			
 
				+        select count(id) from {self.article_crawler_video_table} where content_id = '{content_id}';
			
 
				+        """
			
 
				+        count_tuple = await self.mysql_client.async_select(select_sql)
			
 
				+        counts = count_tuple[0][0]
			
 
				+        if counts >= 3:
			
 
				+            await self.update_content_status(
			
 
				+                new_content_status=spider_success_status,
			
 
				+                trace_id=trace_id,
			
 
				+                ori_content_status=spider_default_status
			
 
				+            )
			
 
				+            return True
			
 
				         try:
			
 
				             # 开始处理，将状态由 1 改成  101
			
 
				             await self.update_content_status(
			
@@ -422,9 +435,8 @@ class NewContentIdTask(object):
 
				                 ORDER BY score DESC;
			
 
				             """
			
 
				             videos_need_to_download_tuple = await self.mysql_client.async_select(select_sql)
			
 
				-            videos_need_to_download_list = videos_need_to_download_tuple[0]
			
 
				             downloaded_count = 0
			
 
				-            for line in videos_need_to_download_list:
			
 
				+            for line in videos_need_to_download_tuple:
			
 
				                 params = {
			
 
				                     "id": line[0],
			
 
				                     "video_id": line[1],
			
@@ -590,12 +602,15 @@ class NewContentIdTask(object):
 
				         kimi_result = await self.kimi_task(params)
			
 
				         if kimi_result:
			
 
				             # 等待 kimi 操作执行完成之后，开始执行 spider_task
			
 
				+            print("kimi success")
			
 
				             spider_flag = await self.spider_task(params=params, kimi_result=kimi_result)
			
 
				             if spider_flag:
			
 
				                 # 等待爬虫执行完成后，开始执行 etl_task
			
 
				+                print("spider success")
			
 
				                 etl_flag = await self.etl_task(params)
			
 
				                 if etl_flag:
			
 
				                     # 等待下载上传完成，执行发布任务
			
 
				+                    print("etl success")
			
 
				                     try:
			
 
				                         await self.publish_task(params, kimi_result['kimi_title'])
			
 
				                     except Exception as e:
			
@@ -640,6 +655,8 @@ class NewContentIdTask(object):
 
				                 )
			
 
				             else:
			
 
				                 await self.start_process(params=params)
			
 
				+        else:
			
 
				+            print("存在已下载视频")
			
 
				 
			
 
				     async def deal(self):
			
 
				         """
			
@@ -647,6 +664,7 @@ class NewContentIdTask(object):
 
				         :return:
			
 
				         """
			
 
				         task_list = await self.get_tasks()
			
 
				+        print(task_list)
			
 
				         logging(
			
 
				             code="5001",
			
 
				             info="Match Task Got {} this time".format(len(task_list)),