get_video_url.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. import time
  2. import json
  3. import random
  4. import requests
  5. from selenium.webdriver import DesiredCapabilities
  6. from selenium.webdriver.chrome.service import Service
  7. from selenium.webdriver.common.by import By
  8. from selenium import webdriver
  9. def find_video_url(article_url):
  10. # 打印请求配置
  11. ca = DesiredCapabilities.CHROME
  12. ca["goog:loggingPrefs"] = {"performance": "ALL"}
  13. # 不打开浏览器运行
  14. chrome_options = webdriver.ChromeOptions()
  15. chrome_options.add_argument("headless")
  16. chrome_options.add_argument(
  17. f"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36"
  18. )
  19. chrome_options.add_argument("--no-sandbox")
  20. driver = webdriver.Chrome(options=chrome_options)
  21. driver.implicitly_wait(random.randint(5, 10))
  22. driver.get(article_url)
  23. time.sleep(1)
  24. if (
  25. len(
  26. driver.find_elements(
  27. By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]'
  28. )
  29. )
  30. != 0
  31. ):
  32. video_url = driver.find_element(
  33. By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]'
  34. ).get_attribute("src")
  35. elif (
  36. len(
  37. driver.find_elements(
  38. By.XPATH, '//span[@class="js_tx_video_container"]/*[1]'
  39. )
  40. )
  41. != 0
  42. ):
  43. iframe = driver.find_element(
  44. By.XPATH, '//span[@class="js_tx_video_container"]/*[1]'
  45. ).get_attribute("src")
  46. video_id = iframe.split("vid=")[-1].split("&")[0]
  47. video_url = get_tencent_video_url(video_id)
  48. else:
  49. video_url = 0
  50. driver.quit()
  51. # print("successful find video url", video_url)
  52. return video_url
  53. def get_tencent_video_url(video_id):
  54. url = (
  55. "https://vv.video.qq.com/getinfo?vids="
  56. + str(video_id)
  57. + "&platform=101001&charge=0&otype=json"
  58. )
  59. response = (
  60. requests.get(url=url).text.replace("QZOutputJson=", "").replace('"};', '"}')
  61. )
  62. response = json.loads(response)
  63. url = response["vl"]["vi"][0]["ul"]["ui"][0]["url"]
  64. fvkey = response["vl"]["vi"][0]["fvkey"]
  65. video_url = url + str(video_id) + ".mp4?vkey=" + fvkey
  66. return video_url