Browse Source

头条视频测试抓取

luojunhui 7 months ago
parent
commit
9a3279e296
1 changed files with 4 additions and 3 deletions
  1. 4 3
      applications/pipeline/crawler_pipeline.py

+ 4 - 3
applications/pipeline/crawler_pipeline.py

@@ -5,6 +5,8 @@
 import os
 import json
 
+from applications import log
+
 from applications.utils import download_gzh_video
 from applications.utils import download_toutiao_video
 from applications.utils import upload_to_oss
@@ -47,6 +49,8 @@ def scrape_video_entities_process(video_item, db_client) -> dict:
     """
     video crawler pipeline
     """
+    article_url = video_item["article_url"]
+    platform = video_item["platform"]
     video_title = video_item["article_title"]
     # whether title sensitive
     if whether_title_sensitive(video_title):
@@ -57,9 +61,6 @@ def scrape_video_entities_process(video_item, db_client) -> dict:
         return empty_dict
 
     # download video
-    article_url = video_item["article_url"]
-    platform = video_item["platform"]
-
     match platform:
         case "toutiao":
             video_path = download_toutiao_video(article_url)