XiguaRecommend.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. import random
  2. import string
  3. import urllib3
  4. import requests
  5. import re
  6. from requests.adapters import HTTPAdapter
  7. import base64
  8. """
  9. 西瓜推荐的方法
  10. """
  11. def tunnel_proxies():
  12. # 隧道域名:端口号
  13. tunnel = "q796.kdltps.com:15818"
  14. # 用户名密码方式
  15. username = "t17772369458618"
  16. password = "5zqcjkmy"
  17. tunnel_proxies = {
  18. "http": "http://%(user)s:%(pwd)s@%(proxy)s/"
  19. % {"user": username, "pwd": password, "proxy": tunnel},
  20. "https": "http://%(user)s:%(pwd)s@%(proxy)s/"
  21. % {"user": username, "pwd": password, "proxy": tunnel},
  22. }
  23. # 白名单方式(需提前设置白名单)
  24. # proxies = {
  25. # "http": "http://%(proxy)s/" % {"proxy": tunnel},
  26. # "https": "http://%(proxy)s/" % {"proxy": tunnel}
  27. # }
  28. # 要访问的目标网页
  29. # target_url = "https://www.kuaishou.com/profile/3xk9tkk6kkwkf7g"
  30. # target_url = "https://dev.kdlapi.com/testproxy"
  31. # # 使用隧道域名发送请求
  32. # response = requests.get(target_url, proxies=proxies)
  33. # print(response.text)
  34. return tunnel_proxies # {'http': 'http://t17772369458618:5zqcjkmy@q796.kdltps.com:15818/', 'https': 'http://t17772369458618:5zqcjkmy@q796.kdltps.com:15818/'}
  35. def random_signature():
  36. """
  37. # 生成随机签名
  38. :return: str
  39. """
  40. src_digits = string.digits # string_数字
  41. src_uppercase = string.ascii_uppercase # string_大写字母
  42. src_lowercase = string.ascii_lowercase # string_小写字母
  43. digits_num = random.randint(1, 6)
  44. uppercase_num = random.randint(1, 26 - digits_num - 1)
  45. lowercase_num = 26 - (digits_num + uppercase_num)
  46. password = (
  47. random.sample(src_digits, digits_num)
  48. + random.sample(src_uppercase, uppercase_num)
  49. + random.sample(src_lowercase, lowercase_num)
  50. )
  51. random.shuffle(password)
  52. new_password = "AAAAAAAAAA" + "".join(password)[10:-4] + "AAAB"
  53. new_password_start = new_password[0:18]
  54. new_password_end = new_password[-7:]
  55. if new_password[18] == "8":
  56. new_password = new_password_start + "w" + new_password_end
  57. elif new_password[18] == "9":
  58. new_password = new_password_start + "x" + new_password_end
  59. elif new_password[18] == "-":
  60. new_password = new_password_start + "y" + new_password_end
  61. elif new_password[18] == ".":
  62. new_password = new_password_start + "z" + new_password_end
  63. else:
  64. new_password = new_password_start + "y" + new_password_end
  65. return new_password
  66. def get_video_url(video_info):
  67. video_url_dict = {}
  68. video_resource = video_info.get("videoResource", {})
  69. dash_120fps = video_resource.get("dash_120fps", {})
  70. normal = video_resource.get("normal", {})
  71. # 从dash_120fps和normal字典中获取video_list字典
  72. video_list = dash_120fps.get("video_list", {}) or normal.get("video_list", {})
  73. # 获取video_list字典中的video_4、video_3、video_2或video_1的值。如果找到非空视频URL,则将其赋值给变量video_url。否则,将赋值为空字符串。
  74. video = (
  75. video_list.get("video_4")
  76. or video_list.get("video_3")
  77. or video_list.get("video_2")
  78. or video_list.get("video_1")
  79. )
  80. video_url = video.get("backup_url_1", "") if video else ""
  81. audio_url = video.get("backup_url_1", "") if video else ""
  82. video_width = video.get("vwidth", 0) if video else 0
  83. video_height = video.get("vheight", 0) if video else 0
  84. video_url = re.sub(r"[^a-zA-Z0-9+/=]", "", video_url) # 从视频URL中删除特殊字符
  85. audio_url = re.sub(r"[^a-zA-Z0-9+/=]", "", audio_url) # 从音频URL中删除特殊字符
  86. video_url = base64.b64decode(video_url).decode("utf8") # 解码视频URL
  87. audio_url = base64.b64decode(audio_url).decode("utf8") # 解码音频URL
  88. video_url_dict["video_url"] = video_url
  89. video_url_dict["audio_url"] = audio_url
  90. video_url_dict["video_width"] = video_width
  91. video_url_dict["video_height"] = video_height
  92. return video_url_dict
  93. class XiguaRecommend:
  94. def __init__(self):
  95. self.platform = "xigua"
  96. def get_comment_cnt(self, item_id):
  97. """
  98. 获取评论数量
  99. :param item_id:
  100. :return:
  101. """
  102. url = "https://www.ixigua.com/tlb/comment/article/v5/tab_comments/?"
  103. params = {
  104. "tab_index": "0",
  105. "count": "10",
  106. "offset": "10",
  107. "group_id": str(item_id),
  108. "item_id": str(item_id),
  109. "aid": "1768",
  110. "msToken": "50-JJObWB07HfHs-BMJWT1eIDX3G-6lPSF_i-QwxBIXE9VVa-iN0jbEXR5pG2DKjXBmP299n6ZTuXzY-GAy968CCvouSAYIS4GzvGQT3pNlKNejr5G4-1g==",
  111. "X-Bogus": "DFSzswVOyGtANVeWtCLMqR/F6q9U",
  112. "_signature": random_signature(),
  113. }
  114. headers = {
  115. "authority": "www.ixigua.com",
  116. "accept": "application/json, text/plain, */*",
  117. "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
  118. "cache-control": "no-cache",
  119. "cookie": "MONITOR_WEB_ID=67cb5099-a022-4ec3-bb8e-c4de6ba51dd0; passport_csrf_token=72b2574f3c99f8ba670e42df430218fd; passport_csrf_token_default=72b2574f3c99f8ba670e42df430218fd; sid_guard=c7472b508ea631823ba765a60cf8757f%7C1680867422%7C3024002%7CFri%2C+12-May-2023+11%3A37%3A04+GMT; uid_tt=c13f47d51767f616befe32fb3e9f485a; uid_tt_ss=c13f47d51767f616befe32fb3e9f485a; sid_tt=c7472b508ea631823ba765a60cf8757f; sessionid=c7472b508ea631823ba765a60cf8757f; sessionid_ss=c7472b508ea631823ba765a60cf8757f; sid_ucp_v1=1.0.0-KGUzNWYxNmRkZGJiZjgxY2MzZWNkMTEzMTkwYjY1Yjg5OTY5NzVlNmMKFQiu3d-eqQIQ3oDAoQYYGCAMOAhACxoCaGwiIGM3NDcyYjUwOGVhNjMxODIzYmE3NjVhNjBjZjg3NTdm; ssid_ucp_v1=1.0.0-KGUzNWYxNmRkZGJiZjgxY2MzZWNkMTEzMTkwYjY1Yjg5OTY5NzVlNmMKFQiu3d-eqQIQ3oDAoQYYGCAMOAhACxoCaGwiIGM3NDcyYjUwOGVhNjMxODIzYmE3NjVhNjBjZjg3NTdm; odin_tt=b893608d4dde2e1e8df8cd5d97a0e2fbeafc4ca762ac72ebef6e6c97e2ed19859bb01d46b4190ddd6dd17d7f9678e1de; SEARCH_CARD_MODE=7168304743566296612_0; support_webp=true; support_avif=false; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; tt_scid=7Pux7s634-z8DYvCM20y7KigwH5u7Rh6D9C-RROpnT.aGMEcz6Vsxp.oai47wJqa4f86; ttwid=1%7CHHtv2QqpSGuSu8r-zXF1QoWsvjmNi1SJrqOrZzg-UCY%7C1683858689%7Ca5223fe1500578e01e138a0d71d6444692018296c4c24f5885af174a65873c95; ixigua-a-s=3; msToken=50-JJObWB07HfHs-BMJWT1eIDX3G-6lPSF_i-QwxBIXE9VVa-iN0jbEXR5pG2DKjXBmP299n6ZTuXzY-GAy968CCvouSAYIS4GzvGQT3pNlKNejr5G4-1g==; __ac_nonce=0645dcbf0005064517440; __ac_signature=_02B4Z6wo00f01FEGmAwAAIDBKchzCGqn-MBRJpyAAHAjieFC5GEg6gGiwz.I4PRrJl7f0GcixFrExKmgt6QI1i1S-dQyofPEj2ugWTCnmKUdJQv-wYuDofeKNe8VtMtZq2aKewyUGeKU-5Ud21; ixigua-a-s=3",
  120. "pragma": "no-cache",
  121. "referer": f"https://www.ixigua.com/{item_id}?logTag=3c5aa86a8600b9ab8540",
  122. "sec-ch-ua": '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
  123. "sec-ch-ua-mobile": "?0",
  124. "sec-ch-ua-platform": '"macOS"',
  125. "sec-fetch-dest": "empty",
  126. "sec-fetch-mode": "cors",
  127. "sec-fetch-site": "same-origin",
  128. "tt-anti-token": "cBITBHvmYjEygzv-f9c78c1297722cf1f559c74b084e4525ce4900bdcf9e8588f20cc7c2e3234422",
  129. "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.35",
  130. "x-secsdk-csrf-token": "000100000001f8e733cf37f0cd255a51aea9a81ff7bc0c09490cfe41ad827c3c5c18ec809279175e4d9f5553d8a5",
  131. }
  132. urllib3.disable_warnings()
  133. s = requests.session()
  134. # max_retries=3 重试3次
  135. s.mount("http://", HTTPAdapter(max_retries=3))
  136. s.mount("https://", HTTPAdapter(max_retries=3))
  137. response = s.get(
  138. url=url,
  139. headers=headers,
  140. params=params,
  141. verify=False,
  142. proxies=tunnel_proxies(),
  143. timeout=5,
  144. )
  145. response.close()
  146. if (
  147. response.status_code != 200
  148. or "total_number" not in response.json()
  149. or response.json() == {}
  150. ):
  151. return 0
  152. return response.json().get("total_number", 0)