jixiangxingfu.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. import os
  2. import random
  3. import sys
  4. import time
  5. import uuid
  6. from datetime import datetime
  7. import requests
  8. from application.common import Feishu
  9. sys.path.append(os.getcwd())
  10. from application.items import VideoItem
  11. from application.pipeline import PiaoQuanPipeline
  12. from application.common.messageQueue import MQ
  13. from application.common.proxies import tunnel_proxies
  14. from application.common.log import AliyunLogger
  15. from application.common.mysql import MysqlHelper
  16. class JXXFRecommend(object):
  17. """
  18. 吉祥幸福-欢快吉祥早安祝福
  19. """
  20. def __init__(self, platform, mode, rule_dict, user_list, env="prod"):
  21. self.limit_flag = False
  22. self.platform = platform
  23. self.mode = mode
  24. self.rule_dict = rule_dict
  25. self.user_list = user_list
  26. self.env = env
  27. self.download_cnt = 0
  28. self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
  29. self.expire_flag = False
  30. self.aliyun_log = AliyunLogger(mode=self.mode, platform=self.platform)
  31. self.mysql = MysqlHelper(mode=self.mode, platform=self)
  32. def get_recommend_list(self):
  33. """
  34. 获取推荐页视频
  35. """
  36. headers = {
  37. 'Host': 'api.huanqiwl.top',
  38. 'Content-Type': 'application/json',
  39. 'Accept-Language': 'zh-cn',
  40. 'Accept': '*/*',
  41. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E217 MicroMessenger/6.8.0(0x16080000) NetType/WIFI Language/en Branch/Br_trunk MiniProgramEnv/Mac',
  42. 'Referer': 'https://servicewechat.com/wx6692a24ad2a88bfb/3/page-frame.html'
  43. }
  44. for i in range(10):
  45. time.sleep(random.randint(1, 10))
  46. url = f"https://api.huanqiwl.top/index.php?s=mobile/Video/getList&cid=1&page={i}&api_version=4&appid=wx6692a24ad2a88bfb&version=1.9.5&env_version=release&scene=1053"
  47. payload = {}
  48. response = requests.request("GET", url, headers=headers, data=payload, proxies=tunnel_proxies())
  49. for index, video_obj in enumerate(response.json()['data']['list'], 1):
  50. try:
  51. self.aliyun_log.logging(
  52. code="1001", message="扫描到一条视频", data=video_obj
  53. )
  54. self.process_video_obj(video_obj)
  55. except Exception as e:
  56. self.aliyun_log.logging(
  57. code="3000",
  58. message="抓取单条视频失败, 该视频位于第{}页第{}条报错原因是{}".format(
  59. i, index, e
  60. ),
  61. )
  62. if self.limit_flag:
  63. return
  64. time.sleep(random.randint(5, 10))
  65. def process_video_obj(self, video_obj):
  66. """
  67. 处理视频
  68. :param video_obj:
  69. """
  70. time.sleep(random.randint(3, 8))
  71. trace_id = self.platform + str(uuid.uuid1())
  72. our_user = random.choice(self.user_list)
  73. item = VideoItem()
  74. item.add_video_info("video_id", video_obj["id"])
  75. item.add_video_info("video_title", video_obj["title"])
  76. item.add_video_info("play_cnt", 0)
  77. item.add_video_info("publish_time_stamp", int(time.time()))
  78. item.add_video_info("out_user_id", video_obj["id"])
  79. item.add_video_info("cover_url", video_obj["images"])
  80. item.add_video_info("like_cnt", 0)
  81. item.add_video_info("video_url", video_obj["video_url"])
  82. item.add_video_info("out_video_id", video_obj["id"])
  83. item.add_video_info("platform", self.platform)
  84. item.add_video_info("strategy", self.mode)
  85. item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
  86. item.add_video_info("user_id", our_user["uid"])
  87. item.add_video_info("user_name", our_user["nick_name"])
  88. # 获取当前时间
  89. current_time = datetime.now()
  90. formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
  91. values = [[
  92. video_obj["id"],
  93. formatted_time,
  94. video_obj["title"],
  95. video_obj["images"],
  96. video_obj["video_url"]
  97. ]]
  98. Feishu.insert_columns(self.platform, 'jixiangxingfu', "L0KXHh", "ROWS", 1, 2)
  99. time.sleep(0.5)
  100. Feishu.update_values(self.platform, 'jixiangxingfu', "L0KXHh", "A2:Z2", values)
  101. mq_obj = item.produce_item()
  102. pipeline = PiaoQuanPipeline(
  103. platform=self.platform,
  104. mode=self.mode,
  105. rule_dict=self.rule_dict,
  106. env=self.env,
  107. item=mq_obj,
  108. trace_id=trace_id,
  109. )
  110. if pipeline.process_item():
  111. self.download_cnt += 1
  112. self.mq.send_msg(mq_obj)
  113. self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)
  114. if self.download_cnt >= int(
  115. self.rule_dict.get("videos_cnt", {}).get("min", 200)
  116. ):
  117. self.limit_flag = True
  118. def run(self):
  119. self.get_recommend_list()
  120. if __name__ == '__main__':
  121. J = JXXFRecommend(
  122. platform="jixiangxingfu",
  123. mode="recommend",
  124. rule_dict={},
  125. user_list=[{'uid': "123456", 'nick_name': "xiaoxiao"}],
  126. )
  127. J.get_recommend_list()