zhufuyiqifengfa_recommend_dev.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. import os
  2. import json
  3. import random
  4. import sys
  5. import time
  6. import uuid
  7. import requests
  8. sys.path.append(os.getcwd())
  9. from common.video_item import VideoItem
  10. from common import tunnel_proxies
  11. from common.pipeline import PiaoQuanPipelineTest
  12. class YiQiFengFaRecommend(object):
  13. def __init__(self, platform, mode, rule_dict, user_list, env):
  14. self.platform = platform
  15. self.mode = mode
  16. self.rule_dict = rule_dict
  17. self.user_list = user_list
  18. self.env = env
  19. self.download_cnt = 0
  20. self.limit_flag = False
  21. def get_video_list(self, token):
  22. """
  23. 推荐流大约110条数据,目前暂时不会更新
  24. """
  25. headers = {
  26. 'Host': 'api.xiahong.top',
  27. 'ik': 'b326b5062b2f0e69046810717534cb09',
  28. 'xweb_xhr': '1',
  29. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.5(0x13080510)XWEB/1100',
  30. 'token': token,
  31. 'Content-Type': 'application/json',
  32. 'Accept': '*/*',
  33. 'Sec-Fetch-Site': 'cross-site',
  34. 'Sec-Fetch-Mode': 'cors',
  35. 'Sec-Fetch-Dest': 'empty',
  36. 'Referer': 'https://servicewechat.com/wxe5389a260a0a4ee2/2/page-frame.html',
  37. 'Accept-Language': 'en-US,en;q=0.9',
  38. }
  39. while True:
  40. time.sleep(random.randint(1, 10))
  41. try:
  42. if self.limit_flag:
  43. message = "本轮已经抓取到足够的数据,自动退出\t{}".format(self.download_cnt)
  44. print(message)
  45. return
  46. else:
  47. params = {
  48. 's': 'mobile/Video/getList',
  49. 'cid': '1',
  50. 'page': '11',
  51. 'api_version': '4',
  52. 'appid': 'wxe5389a260a0a4ee2',
  53. 'version': '1.9.1',
  54. 'env_version': 'release',
  55. 'scene': '1089'
  56. }
  57. response = requests.get('https://api.xiahong.top/index.php', headers=headers, params=params, proxies=tunnel_proxies())
  58. video_list = response.json()['data']['list']
  59. if video_list:
  60. for index, video_obj in enumerate(video_list, 1):
  61. try:
  62. print("扫描到一条视频")
  63. self.process_video_obj(video_obj)
  64. except Exception as e:
  65. print("抓取第{}条的时候出现问题, 报错信息是{}".format(index, e))
  66. else:
  67. print("已经抓完了,自动退出")
  68. return
  69. except Exception as e:
  70. print("抓取推荐页的时候出现错误, 报错信息是{}".format(e))
  71. def process_video_obj(self, video_obj):
  72. obj = {
  73. "id": 224708,
  74. "title": "🔴这个女博士的遭遇❗️让人心疼,群友们都看看吧!",
  75. "images": "http://pic.weitunit.com/ca/a9/caa9007f30a99b6b427e1f3664fe527b.jpg",
  76. "video_url": "https://api-hl.huoshan.com/hotsoon/item/video/_source/?video_id=v0200fg10000clhclejc77u23q1m9ojg&line=0&app_id=0&vquality=normal&watermark=0&long_video=0&sf=4&ts=1702261882&item_id=7305624354258275638",
  77. "out_link_id": "",
  78. "type": 0,
  79. "share_title": "🔴这个女博士的遭遇❗️让人心疼,群友们都看看吧!",
  80. "image_breathing": False,
  81. "video_cover": "https://mmbiz.qpic.cn/sz_mmbiz_png/enW3mhIB3IULz7uuISC8VTFZvc6F86PdUpo9ZAdo0UfyBmFPic6l7zoXpKKWU2Dt0Vf2Q8XV9jKkX63yP20G0CA/640?wx_fmt=png&from=appmsg",
  82. "end_title": "❤️请帮忙转发各大群里!拜托大家!🙏",
  83. "end_cover": "https://mmbiz.qpic.cn/sz_mmbiz_png/enW3mhIB3IULz7uuISC8VTFZvc6F86PdCVQYE2xgQ9qyjLL43ib6aQ4EaFdFruicY79WUiapcxQK5a955sqia50KFA/640?wx_fmt=png&from=appmsg",
  84. "author_nickname": "",
  85. "author_picture": "",
  86. "prompt_type": "",
  87. "display_image": "https://mmbiz.qpic.cn/mmbiz_gif/NlUgcycicAT2CehvYKTx4YCTGSMJ8XFRXIQCwX1q6ibG9TprFAGicHre6aicHxQ1qxW0wzUgW5lmRDQDxPdyJNZxag/0?wx_fmt=gif",
  88. "share_image": "http://pic.weitunit.com/ca/a9/caa9007f30a99b6b427e1f3664fe527b.jpg?x-oss-process=image/resize,m_fill,w_400,h_320,limit_0/watermark,image_bmV3X3dhdGVybWFyay5wbmc_eC1vc3MtcHJvY2Vzcz1pbWFnZS9yZXNpemUsUF81MA,g_center/watermark,image_dmlld3MucG5nP3gtb3NzLXByb2Nlc3M9aW1hZ2UvcmVzaXplLFBfNDA,g_sw,x_10,y_10/quality,Q_70",
  89. "click_jump_path_index": "/pages/index/index?videoId=224708&from=video",
  90. "share_jump_path_index": "/pages/index/index?videoId=224708&from=video",
  91. "visited": 8376085,
  92. "shared": 1159920,
  93. "share_vid": 224708
  94. }
  95. trace_id = self.platform + str(uuid.uuid1())
  96. our_user = random.choice(self.user_list)
  97. item = VideoItem()
  98. item.add_video_info("user_id", our_user["uid"])
  99. item.add_video_info("user_name", our_user["nick_name"])
  100. item.add_video_info("video_id", video_obj["id"])
  101. item.add_video_info("video_title", video_obj["title"])
  102. item.add_video_info("video_url", video_obj['video_url'])
  103. item.add_video_info("cover_url", video_obj["video_cover"])
  104. item.add_video_info("play_cnt", video_obj['visited'])
  105. item.add_video_info("share_cnt", video_obj['shared'])
  106. item.add_video_info("out_video_id", video_obj["id"])
  107. item.add_video_info("platform", self.platform)
  108. item.add_video_info("strategy", self.mode)
  109. item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
  110. mq_obj = item.produce_item()
  111. pipeline = PiaoQuanPipelineTest(
  112. platform=self.platform,
  113. mode=self.mode,
  114. rule_dict=self.rule_dict,
  115. env=self.env,
  116. item=mq_obj,
  117. trace_id=trace_id,
  118. )
  119. if pipeline.process_item():
  120. self.download_cnt += 1
  121. # self.mq.send_msg(mq_obj)
  122. print(mq_obj)
  123. print("成功发送至 ETL")
  124. if self.download_cnt >= int(
  125. self.rule_dict.get("videos_cnt", {}).get("min", 200)
  126. ):
  127. self.limit_flag = True
  128. if __name__ == '__main__':
  129. S = YiQiFengFaRecommend(
  130. platform="zhufuyiqifengfa",
  131. mode="recommend",
  132. env="dev",
  133. rule_dict={},
  134. user_list=[{'nick_name': "Ivring", 'uid': "1997"}, {'nick_name': "paul", 'uid': "1998"}]
  135. )
  136. token = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE3MDIyNjIzMjUsIm5iZiI6MTcwMjI2MjMyNSwiZXhwIjoxNzAyMjY5NTI1LCJkYXRhIjp7InVzZXJfaWQiOjIzOTI0Nzc0MH19.LKxtz3OKw4ADlcLwU2sWMB5AJ5__aPNdVGqtRuk1ps4"
  137. S.get_video_list(token)