import time import json import random import requests from selenium.webdriver import DesiredCapabilities from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium import webdriver def find_video_url(article_url): # 打印请求配置 ca = DesiredCapabilities.CHROME ca["goog:loggingPrefs"] = {"performance": "ALL"} # 不打开浏览器运行 chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("headless") chrome_options.add_argument( f"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36" ) chrome_options.add_argument("--no-sandbox") driver = webdriver.Chrome(options=chrome_options) driver.implicitly_wait(random.randint(5, 10)) driver.get(article_url) time.sleep(1) if ( len( driver.find_elements( By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]' ) ) != 0 ): video_url = driver.find_element( By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]' ).get_attribute("src") elif ( len( driver.find_elements( By.XPATH, '//span[@class="js_tx_video_container"]/*[1]' ) ) != 0 ): iframe = driver.find_element( By.XPATH, '//span[@class="js_tx_video_container"]/*[1]' ).get_attribute("src") video_id = iframe.split("vid=")[-1].split("&")[0] video_url = get_tencent_video_url(video_id) else: video_url = 0 driver.quit() # print("successful find video url", video_url) return video_url def get_tencent_video_url(video_id): url = ( "https://vv.video.qq.com/getinfo?vids=" + str(video_id) + "&platform=101001&charge=0&otype=json" ) response = ( requests.get(url=url).text.replace("QZOutputJson=", "").replace('"};', '"}') ) response = json.loads(response) url = response["vl"]["vi"][0]["ul"]["ui"][0]["url"] fvkey = response["vl"]["vi"][0]["fvkey"] video_url = url + str(video_id) + ".mp4?vkey=" + fvkey return video_url