ソースを参照

修改视频url为空和视频ID获取不到后不再保留该数据

zhangliang 1 週間 前
コミット
09ec3454a4
3 ファイル変更17 行追加5 行削除
  1. 5 3
      utils/dy_ks_get_url.py
  2. 0 2
      utils/feishu_form.py
  3. 12 0
      workers/consumption_work.py

+ 5 - 3
utils/dy_ks_get_url.py

@@ -89,6 +89,8 @@ class Dy_KS:
             }
             response = requests.request(url=url, method='GET', headers=headers, allow_redirects=False, timeout= 30)
             location = response.headers.get('Location', None)
+            if location == "https://kuaishou.com/":
+                return "作品不存在", None, None
             video_id = re.search(r'/(f|photo|short-video|long-video)/(.*)/?',
                                  location.split('?')[0] if location else url).group(2)
             url = "http://8.217.192.46:8889/crawler/kuai_shou/detail"
@@ -174,9 +176,9 @@ class Dy_KS:
                 msg = html.unescape(url)
                 pattern = re.search(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+(/[-\w._~:/#[\]@!$&()*+,;=]*)', msg)
                 logger.info(f"[+] pattern == {pattern}")
-                if pattern is None:
-                    logger.error(f"[+] {url} 提取 url失败")
-                    return "重新处理",None,None,None
+                # if pattern is None:
+                #     logger.error(f"[+] {url} 提取 url失败")
+                #     return "重新处理",None,None,None
                 url = pattern.group()
                 host = urlparse(url).netloc
                 logger.info(f"[+] url == {url}  host=={host}")

+ 0 - 2
utils/feishu_form.py

@@ -42,8 +42,6 @@ class Material():
                 video_clipping = row[17]  # 剪裁
                 video_clipping_time = row[18]  # 秒数剪裁
                 title_transform = row[19]  # 标题改造
-                if not video_url:
-                    continue
                 number_dict = {
                     "channel_mark": channel_mark,
                     "name":NAME,

+ 12 - 0
workers/consumption_work.py

@@ -492,6 +492,18 @@ class ConsumptionRecommend(object):
             return
         data = orjson.loads(data)
         try:
+            # 如果视频地址为空,则飞书群通知,数据不再写入
+            if not data['video_url']:
+                logger.info('[+] 视频url为空,跳过本次执行...')
+                text = (
+                    f"**负责人**: {data['name']}\n"
+                    f"**内容**: {data}\n"
+                    f"**失败信息**: 飞书文档视频地址为空\n"
+                )
+                Feishu.finish_bot(text,
+                                  "https://open.feishu.cn/open-apis/bot/v2/hook/65bc5463-dee9-46d0-bc2d-ec6c49a8f3cd",
+                                  f"【  搬运&改造效率工具失败通知 】")
+                return
             cls.data_handle(data, file_path, redis_name,studio_key)
             for filename in os.listdir(CACHE_DIR):
                 # 检查文件名是否包含关键字