piaoquangushi.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. import os
  2. import random
  3. import sys
  4. import time
  5. import uuid
  6. import json
  7. import cv2
  8. import requests
  9. from application.common.mysql.sql import Sql
  10. from application.common.redis.xng_redis import xng_in_video_data
  11. sys.path.append(os.getcwd())
  12. from application.items import VideoItem
  13. from application.pipeline import PiaoQuanPipeline
  14. from application.common.messageQueue import MQ
  15. from application.common.log import AliyunLogger
  16. from application.common.mysql import MysqlHelper
  17. class PQGSRecommend(object):
  18. """
  19. 喜鹊播-篻圈故事
  20. """
  21. def __init__(self, platform, mode, rule_dict, user_list, env="prod"):
  22. self.limit_flag = False
  23. self.platform = platform
  24. self.mode = mode
  25. self.rule_dict = rule_dict
  26. self.user_list = user_list
  27. self.env = env
  28. self.download_cnt = 0
  29. self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
  30. self.expire_flag = False
  31. self.aliyun_log = AliyunLogger(mode=self.mode, platform=self.platform)
  32. self.mysql = MysqlHelper(mode=self.mode, platform=self)
  33. def get_recommend_list(self):
  34. print("篻圈故事")
  35. """
  36. 获取推荐页视频
  37. """
  38. headers = {
  39. 'Content-Type': 'application/json'
  40. }
  41. cursor = ""
  42. url = "http://8.217.192.46:8889/crawler/xi_que_bo/recommend"
  43. while True:
  44. payload = json.dumps({
  45. "cursor": cursor
  46. })
  47. response = requests.request("POST", url, headers=headers, data=payload)
  48. response = response.json()
  49. if response['code'] != 0:
  50. self.aliyun_log.logging(
  51. code="3000",
  52. message="抓取单条视频失败,请求失败"
  53. ),
  54. return
  55. cursor = response['data']["next_cursor"]
  56. data = response['data']['data']
  57. if len(data) == 0:
  58. return
  59. for index, video_obj in enumerate(data, 1):
  60. try:
  61. self.aliyun_log.logging(
  62. code="1001", message="扫描到一条视频", data=video_obj
  63. )
  64. self.process_video_obj(video_obj)
  65. except Exception as e:
  66. self.aliyun_log.logging(
  67. code="3000",
  68. message="抓取单条视频失败, 该视频位于第{}页第{}条报错原因是{}".format(
  69. 1, index, e
  70. ),
  71. )
  72. if self.limit_flag:
  73. return
  74. time.sleep(random.randint(1, 5))
  75. def process_video_obj(self, video_obj):
  76. """
  77. 处理视频
  78. :param video_obj:
  79. """
  80. time.sleep(random.randint(3, 8))
  81. trace_id = self.platform + str(uuid.uuid1())
  82. our_user = random.choice(self.user_list)
  83. video_url = self.get_video_url(video_obj["id"])
  84. if video_url:
  85. item = VideoItem()
  86. item.add_video_info("video_id", video_obj["id"])
  87. item.add_video_info("video_title", video_obj["title"])
  88. item.add_video_info("play_cnt", 0)
  89. item.add_video_info("publish_time_stamp", int(time.time()))
  90. item.add_video_info("out_user_id", video_obj["id"])
  91. item.add_video_info("cover_url", video_obj["cover"])
  92. item.add_video_info("like_cnt", 0)
  93. item.add_video_info("share_cnt", 0)
  94. item.add_video_info("comment_cnt", 0)
  95. item.add_video_info("video_url", video_url)
  96. item.add_video_info("out_video_id", video_obj["id"])
  97. item.add_video_info("platform", self.platform)
  98. item.add_video_info("strategy", self.mode)
  99. item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
  100. item.add_video_info("user_id", our_user["uid"])
  101. item.add_video_info("user_name", our_user["nick_name"])
  102. mq_obj = item.produce_item()
  103. pipeline = PiaoQuanPipeline(
  104. platform=self.platform,
  105. mode=self.mode,
  106. rule_dict=self.rule_dict,
  107. env=self.env,
  108. item=mq_obj,
  109. trace_id=trace_id,
  110. )
  111. if pipeline.process_item():
  112. self.download_cnt += 1
  113. self.mq.send_msg(mq_obj)
  114. self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)
  115. if self.download_cnt >= int(
  116. self.rule_dict.get("videos_cnt", {}).get("min", 200)
  117. ):
  118. self.limit_flag = True
  119. """获取视频链接"""
  120. def get_video_url(self, vid):
  121. url = "http://8.217.192.46:8889/crawler/piao_quan_gu_shi/detail"
  122. payload = json.dumps({
  123. "content_id": f"{vid}"
  124. })
  125. headers = {
  126. 'Content-Type': 'application/json'
  127. }
  128. try:
  129. response = requests.request("POST", url, headers=headers, data=payload)
  130. response = response.json()
  131. if response['code'] != 0:
  132. self.aliyun_log.logging(
  133. code="3000",
  134. message="获取视频链接失败"
  135. ),
  136. return None
  137. video_url = response['data']['data']['video_url_list'][0]['video_url']
  138. return video_url
  139. except Exception as e:
  140. return None
  141. def run(self):
  142. self.get_recommend_list()
  143. if __name__ == '__main__':
  144. J = PQGSRecommend(
  145. platform="piaoquangushi",
  146. mode="recommend",
  147. rule_dict={},
  148. user_list=[{'uid': "123456", 'nick_name': "xiaoxiao"}],
  149. )
  150. J.get_recommend_list()
  151. # J.logic()