123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475 |
- import time
- import json
- import random
- import requests
- from selenium.webdriver import DesiredCapabilities
- from selenium.webdriver.chrome.service import Service
- from selenium.webdriver.common.by import By
- from selenium import webdriver
- def find_video_url(article_url):
- # 打印请求配置
- ca = DesiredCapabilities.CHROME
- ca["goog:loggingPrefs"] = {"performance": "ALL"}
- # 不打开浏览器运行
- chrome_options = webdriver.ChromeOptions()
- chrome_options.add_argument("headless")
- chrome_options.add_argument(
- f"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36"
- )
- chrome_options.add_argument("--no-sandbox")
- driver = webdriver.Chrome(options=chrome_options)
- driver.implicitly_wait(random.randint(5, 10))
- driver.get(article_url)
- time.sleep(1)
- if (
- len(
- driver.find_elements(
- By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]'
- )
- )
- != 0
- ):
- video_url = driver.find_element(
- By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]'
- ).get_attribute("src")
- elif (
- len(
- driver.find_elements(
- By.XPATH, '//span[@class="js_tx_video_container"]/*[1]'
- )
- )
- != 0
- ):
- iframe = driver.find_element(
- By.XPATH, '//span[@class="js_tx_video_container"]/*[1]'
- ).get_attribute("src")
- video_id = iframe.split("vid=")[-1].split("&")[0]
- video_url = get_tencent_video_url(video_id)
- else:
- video_url = 0
- driver.quit()
- # print("successful find video url", video_url)
- return video_url
- def get_tencent_video_url(video_id):
- url = (
- "https://vv.video.qq.com/getinfo?vids="
- + str(video_id)
- + "&platform=101001&charge=0&otype=json"
- )
- response = (
- requests.get(url=url).text.replace("QZOutputJson=", "").replace('"};', '"}')
- )
- response = json.loads(response)
- url = response["vl"]["vi"][0]["ul"]["ui"][0]["url"]
- fvkey = response["vl"]["vi"][0]["fvkey"]
- video_url = url + str(video_id) + ".mp4?vkey=" + fvkey
- return video_url
|