zwwfs_recommend_test.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. import os
  2. import json
  3. import sys
  4. import time
  5. import uuid
  6. import requests
  7. sys.path.append(os.getcwd())
  8. from common.video_item import VideoItem
  9. from common import tunnel_proxies
  10. from common.pipeline import PiaoQuanPipelineTest
  11. from common.mq import MQ
  12. from zhuwanwufusu.crypt import AESCipher as AES
  13. class ZhuWanWuFuSuRecommend(object):
  14. def __init__(self, platform, mode, rule_dict, user_dict, env):
  15. self.platform = platform
  16. self.mode = mode
  17. self.rule_dict = rule_dict
  18. self.user_dict = user_dict
  19. self.env = env
  20. self.download_cnt = 0
  21. self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
  22. self.expire_flag = False
  23. self.cryptor = AES()
  24. def get_recommend_list(self):
  25. url = "https://api.lidongze.cn/jeecg-boot/ugc/getVideoListsEn2"
  26. headers = {
  27. 'Host': 'api.lidongze.cn',
  28. 'xweb_xhr': '1',
  29. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.4(0x13080410)XWEB/31009',
  30. 'token': '',
  31. 'content-type': 'application/json',
  32. 'accept': '*/*',
  33. 'referer': 'https://servicewechat.com/wx0afdc2669ed8df2f/3/page-frame.html',
  34. 'accept-language': 'en-US,en;q=0.9'
  35. }
  36. page_index = 1
  37. total_page = 2
  38. while page_index <= total_page:
  39. query = {
  40. "pageNo": page_index,
  41. "pageSize": 10,
  42. "groupId": "1650323161797439489", # 推荐流的 ID
  43. "vn": 1,
  44. "gx": 1,
  45. "appid": "wx0afdc2669ed8df2f",
  46. "type": 0
  47. }
  48. params = {
  49. "v": self.cryptor.aes_encrypt(data=json.dumps(query))
  50. }
  51. response = requests.request("GET", url, headers=headers, params=params, proxies=tunnel_proxies())
  52. result = json.loads(self.cryptor.aes_decrypt(response.text))
  53. total_page = result['list']['pages']
  54. page_index = result['list']['current'] + 1
  55. for index, video_obj in enumerate(result['list']['records']):
  56. self.process_video_obj(video_obj)
  57. def process_video_obj(self, video_obj):
  58. trace_id = self.platform + str(uuid.uuid1())
  59. play_cnt = int(video_obj['playnum'].replace("万+", "0000")) if "万+" in video_obj['playnum'] else int(
  60. video_obj['playnum'])
  61. item = VideoItem()
  62. item.add_video_info("video_id", video_obj['id'])
  63. item.add_video_info("video_title", video_obj['vname'])
  64. item.add_video_info("play_cnt", play_cnt)
  65. item.add_video_info("publish_time_stamp", int(time.time()))
  66. item.add_video_info("out_user_id", video_obj['authid'])
  67. item.add_video_info("cover_url", video_obj['shareimg'])
  68. item.add_video_info("like_cnt", int(video_obj['likenum']))
  69. item.add_video_info("video_url", video_obj['videoaddr'])
  70. item.add_video_info("out_video_id", video_obj['id'])
  71. item.add_video_info("platform", self.platform)
  72. item.add_video_info("strategy", self.mode)
  73. item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
  74. item.add_video_info("user_id", self.user_dict['uid'])
  75. item.add_video_info("user_name", self.user_dict['nick_name'])
  76. mq_obj = item.produce_item()
  77. pipeline = PiaoQuanPipelineTest(
  78. platform=self.platform,
  79. mode=self.mode,
  80. rule_dict=self.rule_dict,
  81. env=self.env,
  82. item=mq_obj,
  83. trace_id=trace_id,
  84. )
  85. if pipeline.process_item():
  86. print(json.dumps(mq_obj, ensure_ascii=False, indent=4))
  87. self.download_cnt += 1
  88. print(self.download_cnt)
  89. if __name__ == '__main__':
  90. Z = ZhuWanWuFuSuRecommend(
  91. platform="zhuwanwufusu",
  92. mode="recommend",
  93. rule_dict={},
  94. user_dict={"uid": 123456, "nick_name": "luojunhuishuaige"},
  95. env="prod"
  96. )
  97. Z.get_recommend_list()