hai 7 meses · 53d384756a
--- a/applications/search/dy_search.py
+++ b/applications/search/dy_search.py
@@ -2,13 +2,11 @@
 
															 @author: luojunhui
														
 
															 """
														
 
															 import json
														
 
															-import traceback
														
 
															 import requests
														
 
															 from applications.functions.common import sensitive_flag
														
 
															 from applications.log import logging
														
 
															-from applications.feishu import bot
														
 
															 def douyin_search(keyword, sensitive_words, trace_id):
														
@@ -35,6 +33,11 @@ def douyin_search(keyword, sensitive_words, trace_id):
 
															     response = requests.request("POST", url, headers=headers, data=payload)
														
 
															     try:
														
 
															         dt_list = response.json()['data']['data']
														
 
															+        logging(
														
 
															+            code="4002",
														
 
															+            info="抖音搜索成功",
														
 
															+            trace_id=trace_id
														
 
															+        )
														
 
															         L = []
														
 
															         for obj in dt_list:
														
 
															             try:
														
@@ -60,22 +63,13 @@ def douyin_search(keyword, sensitive_words, trace_id):
 
															             },
														
 
															             trace_id=trace_id
														
 
															         )
														
 
															-        if not L and len(dt_list) > 0:
														
 
															-            bot(
														
 
															-                title="抖音搜索失败",
														
 
															-                detail={
														
 
															-                    "keys": keyword,
														
 
															-                    "搜索的视频数量": len(dt_list),
														
 
															-                    "详情请求的视频数量": len(L)
														
 
															-                },
														
 
															-                mention=False
														
 
															-            )
														
 
															         return L
														
 
															     except Exception as e:
														
 
															         logging(
														
 
															             code="4003",
														
 
															-            info="抖音搜索失败-搜索词：{} 原因：-{}".format(keyword, e),
														
 
															-            trace_id=trace_id
														
 
															+            info="抖音搜索失败",
														
 
															+            trace_id=trace_id,
														
 
															+            data={"error": str(e)}
														
 
															         )
														
 
															         return []
														
 
															     # logging(
														
@@ -99,9 +93,24 @@ def douyin_detail(video_id):
 
															     headers = {
														
 
															         'Content-Type': 'application/json'
														
 
															     }
														
 
															+    print(video_id)
														
 
															     response = requests.request("POST", url, headers=headers, data=payload).json()
														
 
															-    video_info = response['data']['data']
														
 
															-    if video_info['content_type'] == "note":
														
 
															+    logging(
														
 
															+        code="4005",
														
 
															+        info="抖音请求详情",
														
 
															+        data=response
														
 
															+    )
														
 
															+    print(json.dumps(response,ensure_ascii=False, indent=4))
														
 
															+    try:
														
 
															+        video_info = response['data']['data']
														
 
															+        if video_info['content_type'] == "note":
														
 
															+            return None
														
 
															+        else:
														
 
															+            return video_info
														
 
															+    except Exception as e:
														
 
															+        logging(
														
 
															+            code="4006",
														
 
															+            info="抖音请求详情失败",
														
 
															+            data={"error": str(e)}
														
 
															+        )
														
 
															         return None
														
 
															-    else:
														
 
															-        return video_info
														
--- a/applications/search/hksp_search.py
+++ b/applications/search/hksp_search.py
@@ -13,7 +13,6 @@ from fake_useragent import FakeUserAgent
 
															 from applications.functions.common import sensitive_flag
														
 
															 from applications.log import logging
														
 
															-from applications.feishu import bot
														
 
															 def tunnel_proxies():
														
@@ -108,6 +107,11 @@ def hksp_search(key, sensitive_words, trace_id):
 
															         ).json()
														
 
															         data_list = response['data']['list']
														
 
															         L = []
														
 
															+        logging(
														
 
															+            code="4002",
														
 
															+            info="百度搜索成功",
														
 
															+            trace_id=trace_id
														
 
															+        )
														
 
															         for data in data_list:
														
 
															             try:
														
 
															                 video_id = data['vid']
														
@@ -131,21 +135,12 @@ def hksp_search(key, sensitive_words, trace_id):
 
															             },
														
 
															             trace_id=trace_id
														
 
															         )
														
 
															-        if not L and len(data_list) > 0:
														
 
															-            bot(
														
 
															-                title="抖音搜索失败",
														
 
															-                detail={
														
 
															-                    "keys": key,
														
 
															-                    "搜索的视频数量": len(data_list),
														
 
															-                    "详情请求的视频数量": len(L)
														
 
															-                },
														
 
															-                mention=False
														
 
															-            )
														
 
															         return L
														
 
															     except Exception as e:
														
 
															         logging(
														
 
															             code="4003",
														
 
															-            info="百度搜索失败-搜索词：{} 原因：-{}".format(key, e),
														
 
															-            trace_id=trace_id
														
 
															+            info="百度搜索失败",
														
 
															+            trace_id=trace_id,
														
 
															+            data={"error": str(e)}
														
 
															         )
														
 
															         return []
														
--- a/tasks/newContentIdTask.py
+++ b/tasks/newContentIdTask.py
@@ -410,6 +410,12 @@ class NewContentIdTask(object):
 
															             )
														
 
															             return False
														
 
															         try:
														
 
															+            logging(
														
 
															+                code="spider_1001",
														
 
															+                info="开始执行搜索任务",
														
 
															+                trace_id=trace_id,
														
 
															+                data=kimi_result
														
 
															+            )
														
 
															             search_videos_count = await search_videos_from_web(
														
 
															                 info={
														
 
															                     "ori_title": kimi_result['ori_title'],
														
@@ -425,6 +431,12 @@ class NewContentIdTask(object):
 
															             )
														
 
															             if search_videos_count >= 3:
														
 
															                 # 表示爬虫任务执行成功, 将状态从 101  改为 2
														
 
															+                logging(
														
 
															+                    code="spider_1002",
														
 
															+                    info="搜索成功",
														
 
															+                    trace_id=trace_id,
														
 
															+                    data=kimi_result
														
 
															+                )
														
 
															                 await self.update_content_status(
														
 
															                     new_content_status=self.TASK_SPIDER_FINISHED_STATUS,
														
 
															                     trace_id=trace_id,
														
@@ -432,6 +444,12 @@ class NewContentIdTask(object):
 
															                 )
														
 
															                 return True
														
 
															             else:
														
 
															+                logging(
														
 
															+                    code="spider_1003",
														
 
															+                    info="搜索失败",
														
 
															+                    trace_id=trace_id,
														
 
															+                    data=kimi_result
														
 
															+                )
														
 
															                 await self.roll_back_content_status_when_fails(
														
 
															                     process_times=process_times + 1,
														
 
															                     trace_id=trace_id
														
@@ -529,6 +547,12 @@ class NewContentIdTask(object):
 
															                             sql=update_sql,
														
 
															                             params=(VIDEO_DOWNLOAD_FAIL_STATUS, params['id'])
														
 
															                         )
														
 
															+                        logging(
														
 
															+                            code="etl_1001",
														
 
															+                            info="etl_下载视频失败",
														
 
															+                            trace_id=trace_id,
														
 
															+                            function="etl_task"
														
 
															+                        )
														
 
															                     else:
														
 
															                         # download cover
														
 
															                         cover_path = await download_cover(
														
@@ -566,6 +590,12 @@ class NewContentIdTask(object):
 
															                             )
														
 
															                         )
														
 
															                         downloaded_count += 1
														
 
															+                        logging(
														
 
															+                            code="etl_1002",
														
 
															+                            info="etl_视频下载成功",
														
 
															+                            trace_id=trace_id,
														
 
															+                            function="etl_task"
														
 
															+                        )
														
 
															                     # 如果下载的视频数已经大于3， 则直接退出循环，修改状态为ETL成功状态
														
 
															                     if downloaded_count > 3:
														
 
															                         await self.update_content_status(
														
@@ -584,6 +614,12 @@ class NewContentIdTask(object):
 
															                         sql=update_sql,
														
 
															                         params=(VIDEO_DOWNLOAD_FAIL_STATUS, params['id'])
														
 
															                     )
														
 
															+                    logging(
														
 
															+                        code="etl_1001",
														
 
															+                        info="etl_下载视频失败",
														
 
															+                        trace_id=trace_id,
														
 
															+                        function="etl_task"
														
 
															+                    )
														
 
															             if downloaded_count >= 3:
														
 
															                 await self.update_content_status(
														
 
															                     ori_content_status=self.TASK_PROCESSING_STATUS,
														
@@ -596,16 +632,6 @@ class NewContentIdTask(object):
 
															                     process_times=process_times + 1,
														
 
															                     trace_id=trace_id
														
 
															                 )
														
 
															-                bot(
														
 
															-                    title="视频下载失败",
														
 
															-                    detail={
														
 
															-                        "trace_id": trace_id,
														
 
															-                        "success_count": downloaded_count,
														
 
															-                        "total_count": len(videos_need_to_download_tuple),
														
 
															-                        "content_id": content_id
														
 
															-                    },
														
 
															-                    mention=False
														
 
															-                )
														
 
															                 return False
														
 
															     async def publish_task(self, params, kimi_title):