zwwfs_recommend.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. import os
  2. import re
  3. import base64
  4. import json
  5. import random
  6. import sys
  7. import time
  8. import uuid
  9. import requests
  10. sys.path.append(os.getcwd())
  11. from common.video_item import VideoItem
  12. from common import PiaoQuanPipeline, AliyunLogger, tunnel_proxies
  13. from common.mq import MQ
  14. from common.scheduling_db import MysqlHelper
  15. from zhuwanwufusu.crypt import AESCipher as AES
  16. class ZhuWanWuFuSuRecommend(object):
  17. def __init__(self, platform, mode, rule_dict, user_dict, env):
  18. self.platform = platform
  19. self.mode = mode
  20. self.rule_dict = rule_dict
  21. self.user_dict = user_dict
  22. self.env = env
  23. self.download_cnt = 0
  24. self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
  25. self.expire_flag = False
  26. self.cryptor = AES()
  27. def get_recommend_list(self):
  28. url = "https://api.lidongze.cn/jeecg-boot/ugc/getVideoListsEn2"
  29. headers = {
  30. 'Host': 'api.lidongze.cn',
  31. 'xweb_xhr': '1',
  32. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.4(0x13080410)XWEB/31009',
  33. 'token': '',
  34. 'content-type': 'application/json',
  35. 'accept': '*/*',
  36. 'referer': 'https://servicewechat.com/wx0afdc2669ed8df2f/3/page-frame.html',
  37. 'accept-language': 'en-US,en;q=0.9'
  38. }
  39. page_index = 1
  40. total_page = 2
  41. while page_index <= total_page:
  42. try:
  43. query = {
  44. "pageNo": page_index,
  45. "pageSize": 10,
  46. "groupId": "1650323161797439489", # 推荐流的 ID
  47. "vn": 1,
  48. "gx": 1,
  49. "appid": "wx0afdc2669ed8df2f",
  50. "type": 0
  51. }
  52. params = {
  53. "v": self.cryptor.aes_encrypt(data=json.dumps(query))
  54. }
  55. response = requests.request("GET", url, headers=headers, params=params, proxies=tunnel_proxies())
  56. result = json.loads(self.cryptor.aes_decrypt(response.text))
  57. total_page = result['list']['pages']
  58. page_index = result['list']['current'] + 1
  59. for index, video_obj in enumerate(result['list']['records'], 1):
  60. try:
  61. AliyunLogger.logging(
  62. code="1001",
  63. platform=self.platform,
  64. mode=self.mode,
  65. env=self.env,
  66. message="扫描到一条视频",
  67. data=video_obj
  68. )
  69. self.process_video_obj(video_obj)
  70. except Exception as e:
  71. AliyunLogger.logging(
  72. code="3000",
  73. platform=self.platform,
  74. mode=self.mode,
  75. env=self.env,
  76. message="抓取单条视频失败, 该视频位于第{}页第{}条报错原因是{}".format(page_index, index, e)
  77. )
  78. except Exception as e:
  79. AliyunLogger.logging(
  80. code="3000",
  81. platform=self.platform,
  82. mode=self.mode,
  83. env=self.env,
  84. message="抓取第{}页的时候失败, 报错原因是{}".format(page_index, e)
  85. )
  86. def process_video_obj(self, video_obj):
  87. trace_id = self.platform + str(uuid.uuid1())
  88. play_cnt = int(video_obj['playnum'].replace("万+", "0000")) if "万+" in video_obj['playnum'] else int(
  89. video_obj['playnum'])
  90. item = VideoItem()
  91. item.add_video_info("video_id", video_obj['id'])
  92. item.add_video_info("video_title", video_obj['vname'])
  93. item.add_video_info("play_cnt", play_cnt)
  94. item.add_video_info("publish_time_stamp", int(time.time()))
  95. item.add_video_info("out_user_id", video_obj['authid'])
  96. item.add_video_info("cover_url", video_obj['shareimg'])
  97. item.add_video_info("like_cnt", int(video_obj['likenum']))
  98. item.add_video_info("video_url", video_obj['videoaddr'])
  99. item.add_video_info("out_video_id", video_obj['id'])
  100. item.add_video_info("platform", self.platform)
  101. item.add_video_info("strategy", self.mode)
  102. item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
  103. item.add_video_info("user_id", self.user_dict['uid'])
  104. item.add_video_info("user_name", self.user_dict['nick_name'])
  105. mq_obj = item.produce_item()
  106. pipeline = PiaoQuanPipeline(
  107. platform=self.platform,
  108. mode=self.mode,
  109. rule_dict=self.rule_dict,
  110. env=self.env,
  111. item=mq_obj,
  112. trace_id=trace_id,
  113. )
  114. if pipeline.process_item():
  115. self.download_cnt += 1
  116. self.mq.send_msg(mq_obj)
  117. AliyunLogger.logging(
  118. code="1002",
  119. platform=self.platform,
  120. mode=self.mode,
  121. env=self.env,
  122. message="成功发送至 ETL",
  123. data=mq_obj
  124. )
  125. if __name__ == '__main__':
  126. Z = ZhuWanWuFuSuRecommend(
  127. platform="zhuwanwufusu",
  128. mode="recommend",
  129. rule_dict={},
  130. user_dict={"uid": 123456, "nick_name": "luojunhuishuaige"},
  131. env="prod"
  132. )
  133. Z.get_recommend_list()