zhuhaoshiduomo.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. import os
  2. import json
  3. import random
  4. import sys
  5. import time
  6. import uuid
  7. import requests
  8. from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
  9. from cryptography.hazmat.backends import default_backend
  10. sys.path.append(os.getcwd())
  11. from application.items import VideoItem
  12. from application.pipeline import PiaoQuanPipelineTest
  13. from application.common.messageQueue import MQ
  14. from application.common.proxies import tunnel_proxies
  15. class AESCipher:
  16. def __init__(self):
  17. self.key = b'50102fa64073ad76' # 用适当的方式转换或直接定义为字节串
  18. self.iv = b'173d023138824bb0' # 同上
  19. def aes_encrypt(self, data):
  20. cipher = Cipher(algorithms.AES(self.key), modes.CBC(self.iv), backend=default_backend())
  21. encryptor = cipher.encryptor()
  22. ct = encryptor.update(self._pad(data).encode()) + encryptor.finalize()
  23. return ct.hex().upper()
  24. def aes_decrypt(self, data):
  25. cipher = Cipher(algorithms.AES(self.key), modes.CBC(self.iv), backend=default_backend())
  26. decryptor = cipher.decryptor()
  27. decrypted_data = decryptor.update(bytes.fromhex(data)) + decryptor.finalize()
  28. return self._unpad(decrypted_data).decode()
  29. def _pad(self, s):
  30. return s + (16 - len(s) % 16) * chr(16 - len(s) % 16)
  31. def _unpad(self, s):
  32. return s[:-ord(s[len(s) - 1:])]
  33. class ZhuHaoShiDuoMoRecommend(object):
  34. def __init__(self, platform, mode, rule_dict, user_list, env):
  35. self.platform = platform
  36. self.mode = mode
  37. self.rule_dict = rule_dict
  38. self.user_list = user_list
  39. self.env = env
  40. self.download_cnt = 0
  41. self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
  42. self.expire_flag = False
  43. self.cryptor = AESCipher()
  44. def get_recommend_list(self):
  45. url = "https://api.lidongze.cn/jeecg-boot/ugc/getVideoListsEn2"
  46. headers = {
  47. 'Host': 'api.lidongze.cn',
  48. 'xweb_xhr': '1',
  49. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.4(0x13080410)XWEB/31009',
  50. 'token': '',
  51. 'content-type': 'application/json',
  52. 'accept': '*/*',
  53. 'referer': 'https://servicewechat.com/wx0afdc2669ed8df2f/3/page-frame.html',
  54. 'accept-language': 'en-US,en;q=0.9'
  55. }
  56. page_index = 1
  57. total_page = 2
  58. while page_index <= total_page:
  59. query = {
  60. "pageNo": page_index,
  61. "pageSize": 10,
  62. "groupId": "1650323161797439489", # 推荐流的 ID
  63. "vn": 1,
  64. "gx": 1,
  65. "appid": "wx0afdc2669ed8df2f",
  66. "type": 0
  67. }
  68. params = {
  69. "v": self.cryptor.aes_encrypt(data=json.dumps(query))
  70. }
  71. response = requests.request("GET", url, headers=headers, params=params, proxies=tunnel_proxies())
  72. result = json.loads(self.cryptor.aes_decrypt(response.text))
  73. total_page = result['list']['pages']
  74. page_index = result['list']['current'] + 1
  75. for index, video_obj in enumerate(result['list']['records']):
  76. self.process_video_obj(video_obj)
  77. def process_video_obj(self, video_obj):
  78. trace_id = self.platform + str(uuid.uuid1())
  79. play_cnt = int(video_obj['playnum'].replace("万+", "0000")) if "万+" in video_obj['playnum'] else int(
  80. video_obj['playnum'])
  81. item = VideoItem()
  82. user_dict = random.choice(self.user_list)
  83. item.add_video_info("video_id", video_obj['id'])
  84. item.add_video_info("video_title", video_obj['vname'])
  85. item.add_video_info("play_cnt", play_cnt)
  86. item.add_video_info("publish_time_stamp", int(time.time()))
  87. item.add_video_info("out_user_id", video_obj['authid'])
  88. item.add_video_info("cover_url", video_obj['shareimg'])
  89. item.add_video_info("like_cnt", int(video_obj['likenum']))
  90. item.add_video_info("video_url", video_obj['videoaddr'])
  91. item.add_video_info("out_video_id", video_obj['id'])
  92. item.add_video_info("platform", self.platform)
  93. item.add_video_info("strategy", self.mode)
  94. item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
  95. item.add_video_info("user_id", user_dict['uid'])
  96. item.add_video_info("user_name", user_dict['link'])
  97. mq_obj = item.produce_item()
  98. pipeline = PiaoQuanPipelineTest(
  99. platform=self.platform,
  100. mode=self.mode,
  101. rule_dict=self.rule_dict,
  102. env=self.env,
  103. item=mq_obj,
  104. trace_id=trace_id,
  105. )
  106. if pipeline.process_item():
  107. print(json.dumps(mq_obj, ensure_ascii=False, indent=4))
  108. self.download_cnt += 1
  109. print(self.download_cnt)
  110. def run(self):
  111. """
  112. 执行函数
  113. """
  114. self.get_recommend_list()
  115. if __name__ == '__main__':
  116. Z = ZhuHaoShiDuoMoRecommend(
  117. platform="zhuwanwufusu",
  118. mode="recommend",
  119. rule_dict={},
  120. user_dict={"uid": 123456, "nick_name": "luojunhuishuaige"},
  121. env="prod"
  122. )
  123. Z.get_recommend_list()