浏览代码

debug
57行
425 行

罗俊辉 7 月之前
父节点
当前提交
cfb2236d73
共有 1 个文件被更改,包括 21 次插入3 次删除
  1. 21 3
      tasks/newContentIdTask.py

+ 21 - 3
tasks/newContentIdTask.py

@@ -54,7 +54,7 @@ class NewContentIdTask(object):
                     "content_status_update_time": item[1],
                     "process_times": item[2]
                 }
-                for item in processing_articles[0]
+                for item in processing_articles
             ]
             for obj in processing_list:
                 if int(time.time()) - obj['content_status_update_time'] >= 3600:
@@ -332,6 +332,7 @@ class NewContentIdTask(object):
         """
         爬虫任务
         :return:
+        todo: 任务执行之前加一个判断,判断是存在 3 条以上的视频已经被抓取
         """
         spider_default_status = 1
         spider_success_status = 2
@@ -339,6 +340,18 @@ class NewContentIdTask(object):
         content_id = params['content_id']
         process_times = params['process_times']
         gh_id = params['gh_id']
+        select_sql = f"""
+        select count(id) from {self.article_crawler_video_table} where content_id = '{content_id}';
+        """
+        count_tuple = await self.mysql_client.async_select(select_sql)
+        counts = count_tuple[0][0]
+        if counts >= 3:
+            await self.update_content_status(
+                new_content_status=spider_success_status,
+                trace_id=trace_id,
+                ori_content_status=spider_default_status
+            )
+            return True
         try:
             # 开始处理,将状态由 1 改成  101
             await self.update_content_status(
@@ -422,9 +435,8 @@ class NewContentIdTask(object):
                 ORDER BY score DESC;
             """
             videos_need_to_download_tuple = await self.mysql_client.async_select(select_sql)
-            videos_need_to_download_list = videos_need_to_download_tuple[0]
             downloaded_count = 0
-            for line in videos_need_to_download_list:
+            for line in videos_need_to_download_tuple:
                 params = {
                     "id": line[0],
                     "video_id": line[1],
@@ -590,12 +602,15 @@ class NewContentIdTask(object):
         kimi_result = await self.kimi_task(params)
         if kimi_result:
             # 等待 kimi 操作执行完成之后,开始执行 spider_task
+            print("kimi success")
             spider_flag = await self.spider_task(params=params, kimi_result=kimi_result)
             if spider_flag:
                 # 等待爬虫执行完成后,开始执行 etl_task
+                print("spider success")
                 etl_flag = await self.etl_task(params)
                 if etl_flag:
                     # 等待下载上传完成,执行发布任务
+                    print("etl success")
                     try:
                         await self.publish_task(params, kimi_result['kimi_title'])
                     except Exception as e:
@@ -640,6 +655,8 @@ class NewContentIdTask(object):
                 )
             else:
                 await self.start_process(params=params)
+        else:
+            print("存在已下载视频")
 
     async def deal(self):
         """
@@ -647,6 +664,7 @@ class NewContentIdTask(object):
         :return:
         """
         task_list = await self.get_tasks()
+        print(task_list)
         logging(
             code="5001",
             info="Match Task Got {} this time".format(len(task_list)),