浏览代码

2024-06-17
标题该用原标题

罗俊辉 11 月之前
父节点
当前提交
07d562ac84
共有 2 个文件被更改,包括 30 次插入3 次删除
  1. 27 0
      applications/functions/browser_extract.py
  2. 3 3
      applications/functions/video_item.py

+ 27 - 0
applications/functions/browser_extract.py

@@ -0,0 +1,27 @@
+# """
+# @author: luojunhui
+# """
+# import time
+# from selenium import webdriver
+# from selenium.webdriver.chrome.service import Service
+# from selenium.webdriver.chrome.options import Options
+# from webdriver_manager.chrome import ChromeDriverManager
+#
+#
+# def get_source_code(url):
+#     """
+#     :param url:
+#     :return:
+#     """
+#     # 配置 Chrome 选项
+#     chrome_options = Options()
+#     chrome_options.add_argument('--headless')  # 无头模式
+#     chrome_options.add_argument('--disable-gpu')
+#     chrome_options.add_argument('--incognito')
+#     service = Service(ChromeDriverManager().install())
+#     driver = webdriver.Chrome(service=service, options=chrome_options)
+#     driver.get(url)
+#     time.sleep(3)
+#     page_text = driver.page_source
+#     driver.quit()
+#     return page_text

+ 3 - 3
applications/functions/video_item.py

@@ -155,7 +155,7 @@ class VideoProducer(object):
         item.add_video_info("user_id", user["uid"])
         item.add_video_info("user_name", user["nick_name"])
         item.add_video_info("video_id", video_obj['id'])
-        item.add_video_info("video_title", trace_id)
+        item.add_video_info("video_title", video_obj.get('title', trace_id))
         item.add_video_info("publish_time_stamp", publish_time_stamp)
         item.add_video_info("video_url", video_obj["playurl"])
         item.add_video_info("cover_url", video_obj["poster"])
@@ -185,7 +185,7 @@ class VideoProducer(object):
         item.add_video_info("user_id", user["uid"])
         item.add_video_info("user_name", user["nick_name"])
         item.add_video_info("video_id", video_obj['video_id'])
-        item.add_video_info("video_title", trace_id)
+        item.add_video_info("video_title", video_obj.get('video_title', trace_id))
         item.add_video_info("publish_time_stamp", int(publish_time_stamp))
         item.add_video_info("video_url", video_obj["video_url"])
         item.add_video_info("cover_url", video_obj["cover_url"])
@@ -214,7 +214,7 @@ class VideoProducer(object):
         item.add_video_info("user_id", user["uid"])
         item.add_video_info("user_name", user["nick_name"])
         item.add_video_info("video_id", video_obj['channel_content_id'])
-        item.add_video_info("video_title", trace_id)
+        item.add_video_info("video_title", video_obj.get('title', trace_id))
         item.add_video_info("publish_time_stamp", int(publish_time_stamp))
         item.add_video_info("video_url", video_obj["video_url_list"][0]['video_url'])
         item.add_video_info("cover_url", video_obj["image_url_list"][0]['image_url'])