소스 검색

头条视频测试抓取

luojunhui 7 달 전
부모
커밋
9a3279e296
1개의 변경된 파일4개의 추가작업 그리고 3개의 파일을 삭제
  1. 4 3
      applications/pipeline/crawler_pipeline.py

+ 4 - 3
applications/pipeline/crawler_pipeline.py

@@ -5,6 +5,8 @@
 import os
 import os
 import json
 import json
 
 
+from applications import log
+
 from applications.utils import download_gzh_video
 from applications.utils import download_gzh_video
 from applications.utils import download_toutiao_video
 from applications.utils import download_toutiao_video
 from applications.utils import upload_to_oss
 from applications.utils import upload_to_oss
@@ -47,6 +49,8 @@ def scrape_video_entities_process(video_item, db_client) -> dict:
     """
     """
     video crawler pipeline
     video crawler pipeline
     """
     """
+    article_url = video_item["article_url"]
+    platform = video_item["platform"]
     video_title = video_item["article_title"]
     video_title = video_item["article_title"]
     # whether title sensitive
     # whether title sensitive
     if whether_title_sensitive(video_title):
     if whether_title_sensitive(video_title):
@@ -57,9 +61,6 @@ def scrape_video_entities_process(video_item, db_client) -> dict:
         return empty_dict
         return empty_dict
 
 
     # download video
     # download video
-    article_url = video_item["article_url"]
-    platform = video_item["platform"]
-
     match platform:
     match platform:
         case "toutiao":
         case "toutiao":
             video_path = download_toutiao_video(article_url)
             video_path = download_toutiao_video(article_url)