|
@@ -5,6 +5,8 @@
|
|
|
import os
|
|
|
import json
|
|
|
|
|
|
+from applications import log
|
|
|
+
|
|
|
from applications.utils import download_gzh_video
|
|
|
from applications.utils import download_toutiao_video
|
|
|
from applications.utils import upload_to_oss
|
|
@@ -47,6 +49,8 @@ def scrape_video_entities_process(video_item, db_client) -> dict:
|
|
|
"""
|
|
|
video crawler pipeline
|
|
|
"""
|
|
|
+ article_url = video_item["article_url"]
|
|
|
+ platform = video_item["platform"]
|
|
|
video_title = video_item["article_title"]
|
|
|
# whether title sensitive
|
|
|
if whether_title_sensitive(video_title):
|
|
@@ -57,9 +61,6 @@ def scrape_video_entities_process(video_item, db_client) -> dict:
|
|
|
return empty_dict
|
|
|
|
|
|
# download video
|
|
|
- article_url = video_item["article_url"]
|
|
|
- platform = video_item["platform"]
|
|
|
-
|
|
|
match platform:
|
|
|
case "toutiao":
|
|
|
video_path = download_toutiao_video(article_url)
|