Parcourir la source

download video fails
add bot to dy_spider && baidu spider

luojunhui il y a 5 mois
Parent
commit
53d384756a
3 fichiers modifiés avec 71 ajouts et 41 suppressions
  1. 27 18
      applications/search/dy_search.py
  2. 8 13
      applications/search/hksp_search.py
  3. 36 10
      tasks/newContentIdTask.py

+ 27 - 18
applications/search/dy_search.py

@@ -2,13 +2,11 @@
 @author: luojunhui
 @author: luojunhui
 """
 """
 import json
 import json
-import traceback
 
 
 import requests
 import requests
 
 
 from applications.functions.common import sensitive_flag
 from applications.functions.common import sensitive_flag
 from applications.log import logging
 from applications.log import logging
-from applications.feishu import bot
 
 
 
 
 def douyin_search(keyword, sensitive_words, trace_id):
 def douyin_search(keyword, sensitive_words, trace_id):
@@ -35,6 +33,11 @@ def douyin_search(keyword, sensitive_words, trace_id):
     response = requests.request("POST", url, headers=headers, data=payload)
     response = requests.request("POST", url, headers=headers, data=payload)
     try:
     try:
         dt_list = response.json()['data']['data']
         dt_list = response.json()['data']['data']
+        logging(
+            code="4002",
+            info="抖音搜索成功",
+            trace_id=trace_id
+        )
         L = []
         L = []
         for obj in dt_list:
         for obj in dt_list:
             try:
             try:
@@ -60,22 +63,13 @@ def douyin_search(keyword, sensitive_words, trace_id):
             },
             },
             trace_id=trace_id
             trace_id=trace_id
         )
         )
-        if not L and len(dt_list) > 0:
-            bot(
-                title="抖音搜索失败",
-                detail={
-                    "keys": keyword,
-                    "搜索的视频数量": len(dt_list),
-                    "详情请求的视频数量": len(L)
-                },
-                mention=False
-            )
         return L
         return L
     except Exception as e:
     except Exception as e:
         logging(
         logging(
             code="4003",
             code="4003",
-            info="抖音搜索失败-搜索词:{} 原因:-{}".format(keyword, e),
-            trace_id=trace_id
+            info="抖音搜索失败",
+            trace_id=trace_id,
+            data={"error": str(e)}
         )
         )
         return []
         return []
     # logging(
     # logging(
@@ -99,9 +93,24 @@ def douyin_detail(video_id):
     headers = {
     headers = {
         'Content-Type': 'application/json'
         'Content-Type': 'application/json'
     }
     }
+    print(video_id)
     response = requests.request("POST", url, headers=headers, data=payload).json()
     response = requests.request("POST", url, headers=headers, data=payload).json()
-    video_info = response['data']['data']
-    if video_info['content_type'] == "note":
+    logging(
+        code="4005",
+        info="抖音请求详情",
+        data=response
+    )
+    print(json.dumps(response,ensure_ascii=False, indent=4))
+    try:
+        video_info = response['data']['data']
+        if video_info['content_type'] == "note":
+            return None
+        else:
+            return video_info
+    except Exception as e:
+        logging(
+            code="4006",
+            info="抖音请求详情失败",
+            data={"error": str(e)}
+        )
         return None
         return None
-    else:
-        return video_info

+ 8 - 13
applications/search/hksp_search.py

@@ -13,7 +13,6 @@ from fake_useragent import FakeUserAgent
 
 
 from applications.functions.common import sensitive_flag
 from applications.functions.common import sensitive_flag
 from applications.log import logging
 from applications.log import logging
-from applications.feishu import bot
 
 
 
 
 def tunnel_proxies():
 def tunnel_proxies():
@@ -108,6 +107,11 @@ def hksp_search(key, sensitive_words, trace_id):
         ).json()
         ).json()
         data_list = response['data']['list']
         data_list = response['data']['list']
         L = []
         L = []
+        logging(
+            code="4002",
+            info="百度搜索成功",
+            trace_id=trace_id
+        )
         for data in data_list:
         for data in data_list:
             try:
             try:
                 video_id = data['vid']
                 video_id = data['vid']
@@ -131,21 +135,12 @@ def hksp_search(key, sensitive_words, trace_id):
             },
             },
             trace_id=trace_id
             trace_id=trace_id
         )
         )
-        if not L and len(data_list) > 0:
-            bot(
-                title="抖音搜索失败",
-                detail={
-                    "keys": key,
-                    "搜索的视频数量": len(data_list),
-                    "详情请求的视频数量": len(L)
-                },
-                mention=False
-            )
         return L
         return L
     except Exception as e:
     except Exception as e:
         logging(
         logging(
             code="4003",
             code="4003",
-            info="百度搜索失败-搜索词:{} 原因:-{}".format(key, e),
-            trace_id=trace_id
+            info="百度搜索失败",
+            trace_id=trace_id,
+            data={"error": str(e)}
         )
         )
         return []
         return []

+ 36 - 10
tasks/newContentIdTask.py

@@ -410,6 +410,12 @@ class NewContentIdTask(object):
             )
             )
             return False
             return False
         try:
         try:
+            logging(
+                code="spider_1001",
+                info="开始执行搜索任务",
+                trace_id=trace_id,
+                data=kimi_result
+            )
             search_videos_count = await search_videos_from_web(
             search_videos_count = await search_videos_from_web(
                 info={
                 info={
                     "ori_title": kimi_result['ori_title'],
                     "ori_title": kimi_result['ori_title'],
@@ -425,6 +431,12 @@ class NewContentIdTask(object):
             )
             )
             if search_videos_count >= 3:
             if search_videos_count >= 3:
                 # 表示爬虫任务执行成功, 将状态从 101  改为 2
                 # 表示爬虫任务执行成功, 将状态从 101  改为 2
+                logging(
+                    code="spider_1002",
+                    info="搜索成功",
+                    trace_id=trace_id,
+                    data=kimi_result
+                )
                 await self.update_content_status(
                 await self.update_content_status(
                     new_content_status=self.TASK_SPIDER_FINISHED_STATUS,
                     new_content_status=self.TASK_SPIDER_FINISHED_STATUS,
                     trace_id=trace_id,
                     trace_id=trace_id,
@@ -432,6 +444,12 @@ class NewContentIdTask(object):
                 )
                 )
                 return True
                 return True
             else:
             else:
+                logging(
+                    code="spider_1003",
+                    info="搜索失败",
+                    trace_id=trace_id,
+                    data=kimi_result
+                )
                 await self.roll_back_content_status_when_fails(
                 await self.roll_back_content_status_when_fails(
                     process_times=process_times + 1,
                     process_times=process_times + 1,
                     trace_id=trace_id
                     trace_id=trace_id
@@ -529,6 +547,12 @@ class NewContentIdTask(object):
                             sql=update_sql,
                             sql=update_sql,
                             params=(VIDEO_DOWNLOAD_FAIL_STATUS, params['id'])
                             params=(VIDEO_DOWNLOAD_FAIL_STATUS, params['id'])
                         )
                         )
+                        logging(
+                            code="etl_1001",
+                            info="etl_下载视频失败",
+                            trace_id=trace_id,
+                            function="etl_task"
+                        )
                     else:
                     else:
                         # download cover
                         # download cover
                         cover_path = await download_cover(
                         cover_path = await download_cover(
@@ -566,6 +590,12 @@ class NewContentIdTask(object):
                             )
                             )
                         )
                         )
                         downloaded_count += 1
                         downloaded_count += 1
+                        logging(
+                            code="etl_1002",
+                            info="etl_视频下载成功",
+                            trace_id=trace_id,
+                            function="etl_task"
+                        )
                     # 如果下载的视频数已经大于3, 则直接退出循环,修改状态为ETL成功状态
                     # 如果下载的视频数已经大于3, 则直接退出循环,修改状态为ETL成功状态
                     if downloaded_count > 3:
                     if downloaded_count > 3:
                         await self.update_content_status(
                         await self.update_content_status(
@@ -584,6 +614,12 @@ class NewContentIdTask(object):
                         sql=update_sql,
                         sql=update_sql,
                         params=(VIDEO_DOWNLOAD_FAIL_STATUS, params['id'])
                         params=(VIDEO_DOWNLOAD_FAIL_STATUS, params['id'])
                     )
                     )
+                    logging(
+                        code="etl_1001",
+                        info="etl_下载视频失败",
+                        trace_id=trace_id,
+                        function="etl_task"
+                    )
             if downloaded_count >= 3:
             if downloaded_count >= 3:
                 await self.update_content_status(
                 await self.update_content_status(
                     ori_content_status=self.TASK_PROCESSING_STATUS,
                     ori_content_status=self.TASK_PROCESSING_STATUS,
@@ -596,16 +632,6 @@ class NewContentIdTask(object):
                     process_times=process_times + 1,
                     process_times=process_times + 1,
                     trace_id=trace_id
                     trace_id=trace_id
                 )
                 )
-                bot(
-                    title="视频下载失败",
-                    detail={
-                        "trace_id": trace_id,
-                        "success_count": downloaded_count,
-                        "total_count": len(videos_need_to_download_tuple),
-                        "content_id": content_id
-                    },
-                    mention=False
-                )
                 return False
                 return False
 
 
     async def publish_task(self, params, kimi_title):
     async def publish_task(self, params, kimi_title):