zhufuquanzituijianliu.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. import os
  2. import random
  3. import sys
  4. import time
  5. import uuid
  6. import json
  7. from datetime import datetime
  8. import cv2
  9. import requests
  10. from application.common.feishu import FsData
  11. from application.common.feishu.feishu_utils import FeishuUtils
  12. from application.common.gpt import GPT4oMini
  13. from application.common.mysql.sql import Sql
  14. from application.common.redis.xng_redis import xng_in_video_data
  15. from application.config.config import zhufuquanzi_view_api,zhufuquanzi_history_api
  16. sys.path.append(os.getcwd())
  17. from application.items import VideoItem
  18. from application.pipeline import PiaoQuanPipeline
  19. from application.common.messageQueue import MQ
  20. from application.common.log import AliyunLogger
  21. from application.common.mysql import MysqlHelper
  22. def video_view(content_id, account_id):
  23. headers = {
  24. "Content-Type": "application/json"
  25. }
  26. payload = {
  27. "content_id": str(content_id),
  28. "account_id": str(account_id)
  29. }
  30. try:
  31. # 发送 POST 请求
  32. response = requests.post(
  33. zhufuquanzi_view_api,
  34. headers=headers,
  35. json=payload # 自动将字典转换为 JSON
  36. )
  37. # 检查 HTTP 状态码
  38. if response.status_code == 200:
  39. # 解析 JSON 响应
  40. result = response.json()
  41. # 提取关键字段
  42. code = result.get("code")
  43. msg = result.get("msg")
  44. # 业务逻辑处理(示例)
  45. if code == 0:
  46. print("请求成功")
  47. else:
  48. print(f"{zhufuquanzi_view_api}请求失败,错误码: {code}, 消息: {msg}")
  49. else:
  50. print(f"{zhufuquanzi_view_api}HTTP 请求失败,状态码: {response.status_code}")
  51. except requests.exceptions.RequestException as e:
  52. print(f"{zhufuquanzi_view_api}请求异常: {e}")
  53. except json.JSONDecodeError:
  54. print(f"{zhufuquanzi_view_api}响应不是有效的 JSON 格式")
  55. def video_history(video_view_lists):
  56. headers = {
  57. "Content-Type": "application/json"
  58. }
  59. payload = {
  60. "content_ids": video_view_lists
  61. }
  62. try:
  63. # 发送 POST 请求
  64. response = requests.post(
  65. zhufuquanzi_history_api,
  66. headers=headers,
  67. json=payload # 自动将字典转换为 JSON
  68. )
  69. # 检查 HTTP 状态码
  70. if response.status_code == 200:
  71. # 解析 JSON 响应
  72. result = response.json()
  73. # 提取关键字段
  74. code = result.get("code")
  75. msg = result.get("msg")
  76. # 业务逻辑处理(示例)
  77. if code == 0:
  78. print("请求成功")
  79. else:
  80. print(f"{zhufuquanzi_history_api}请求失败,错误码: {code}, 消息: {msg}")
  81. else:
  82. print(f"{zhufuquanzi_history_api}HTTP 请求失败,状态码: {response.status_code}")
  83. except requests.exceptions.RequestException as e:
  84. print(f"{zhufuquanzi_history_api}请求异常: {e}")
  85. except json.JSONDecodeError:
  86. print(f"{zhufuquanzi_history_api}响应不是有效的 JSON 格式")
  87. class ZFQZTJLRecommend(object):
  88. """
  89. 祝福圈子推荐流
  90. """
  91. def __init__(self, platform, mode, rule_dict, user_list, env="prod"):
  92. self.limit_flag = False
  93. self.platform = platform
  94. self.mode = mode
  95. self.rule_dict = rule_dict
  96. self.user_list = user_list
  97. self.env = env
  98. self.download_cnt = 0
  99. self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
  100. self.expire_flag = False
  101. self.aliyun_log = AliyunLogger(mode=self.mode, platform=self.platform)
  102. self.mysql = MysqlHelper(mode=self.mode, platform=self)
  103. def get_video_duration(self, video_link: str) -> int:
  104. cap = cv2.VideoCapture(video_link)
  105. if cap.isOpened():
  106. rate = cap.get(5)
  107. frame_num = cap.get(7)
  108. duration = int(frame_num / rate)
  109. return duration
  110. return 0
  111. def get_recommend_list(self):
  112. print("祝福圈子推荐流开始")
  113. """
  114. 获取推荐页视频
  115. """
  116. headers = {
  117. 'Content-Type': 'application/json'
  118. }
  119. data_rule = FsData()
  120. title_rule = data_rule.get_title_rule()
  121. for i in range(4):
  122. # 目前90%视频会被过滤掉,改为频率和策略
  123. # for i in range(100):
  124. url = "http://8.217.192.46:8889/crawler/zhu_fu_quan_zi/recommend"
  125. payload = json.dumps({})
  126. response = requests.request("POST", url, headers=headers, data=payload)
  127. response = response.json()
  128. if response['code'] != 0:
  129. self.aliyun_log.logging(
  130. code="3000",
  131. message="抓取单条视频失败,请求失败"
  132. ),
  133. return
  134. video_view_lists = []
  135. for index, video_obj in enumerate(response['data']['data'], 1):
  136. try:
  137. self.aliyun_log.logging(
  138. code="1001", message="扫描到一条视频", data=video_obj
  139. )
  140. vid = video_obj['id']
  141. video_view_lists.append(str(vid))
  142. self.process_video_obj(video_obj,title_rule)
  143. except Exception as e:
  144. self.aliyun_log.logging(
  145. code="3000",
  146. message="抓取单条视频失败, 该视频位于第{}页第{}条报错原因是{}".format(
  147. 1, index, e
  148. ),
  149. )
  150. if self.limit_flag:
  151. return
  152. time.sleep(random.randint(5, 10))
  153. # time.sleep(random.randint(5, 30))
  154. video_history(video_view_lists)
  155. def process_video_obj(self, video_obj, title_rule):
  156. """
  157. 处理视频
  158. :param video_obj:
  159. """
  160. time.sleep(random.randint(3, 8))
  161. trace_id = self.platform + str(uuid.uuid1())
  162. our_user = random.choice(self.user_list)
  163. item = VideoItem()
  164. vid = video_obj['id']
  165. mid = int(video_obj['user']['mid'])
  166. print(f"vid={vid},mid={mid}")
  167. try:
  168. user_name = video_obj['user']['nick']
  169. avatar_url = video_obj['user']['hurl']
  170. sql = Sql()
  171. max_id = sql.select_id(mid)
  172. if max_id:
  173. sql.update_name_url(mid, avatar_url, user_name)
  174. else:
  175. time.sleep(1)
  176. link = sql.select_id_status(mid)
  177. if link:
  178. sql.insert_name_url(mid, avatar_url, user_name)
  179. print(f"开始写入{mid}")
  180. xng_in_video_data(json.dumps({"mid": mid}))
  181. except Exception as e:
  182. print(f"写入异常{e}")
  183. pass
  184. url = video_obj["v_url"]
  185. duration = self.get_video_duration(url)
  186. item.add_video_info("video_id", video_obj["id"])
  187. item.add_video_info("video_title", video_obj["title"])
  188. item.add_video_info("play_cnt", int(video_obj["play_pv"]))
  189. item.add_video_info("publish_time_stamp", int(int(video_obj["t"])/1000))
  190. item.add_video_info("out_user_id", video_obj["id"])
  191. item.add_video_info("cover_url", video_obj["url"])
  192. item.add_video_info("like_cnt", 0)
  193. item.add_video_info("share_cnt", int(video_obj["share"]))
  194. item.add_video_info("comment_cnt", int(video_obj["comment_count"]))
  195. item.add_video_info("video_url", video_obj["v_url"])
  196. item.add_video_info("out_video_id", video_obj["id"])
  197. item.add_video_info("duration", int(duration))
  198. item.add_video_info("platform", self.platform)
  199. item.add_video_info("strategy", self.mode)
  200. item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
  201. item.add_video_info("user_id", our_user["uid"])
  202. item.add_video_info("user_name", our_user["nick_name"])
  203. mq_obj = item.produce_item()
  204. pipeline = PiaoQuanPipeline(
  205. platform=self.platform,
  206. mode=self.mode,
  207. rule_dict=self.rule_dict,
  208. env=self.env,
  209. item=mq_obj,
  210. trace_id=trace_id,
  211. )
  212. if pipeline.process_item():
  213. title_list = title_rule.split(",")
  214. title = video_obj["title"]
  215. contains_keyword = any(keyword in title for keyword in title_list)
  216. if contains_keyword:
  217. new_title = GPT4oMini.get_ai_mini_title(title)
  218. if new_title:
  219. item.add_video_info("video_title", new_title)
  220. current_time = datetime.now()
  221. formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
  222. values = [
  223. [
  224. video_obj["v_url"],
  225. video_obj["url"],
  226. title,
  227. new_title,
  228. formatted_time,
  229. ]
  230. ]
  231. FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "GVottu", "ROWS", 1, 2)
  232. time.sleep(0.5)
  233. FeishuUtils.update_values("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "GVottu", "A2:Z2", values)
  234. self.download_cnt += 1
  235. self.mq.send_msg(mq_obj)
  236. self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)
  237. video_view(vid, mid)
  238. self.aliyun_log.logging(code="1010", message="触发曝光", data=mq_obj)
  239. if self.download_cnt >= int(
  240. self.rule_dict.get("videos_cnt", {}).get("min", 200)
  241. ):
  242. self.limit_flag = True
  243. def run(self):
  244. self.get_recommend_list()
  245. if __name__ == '__main__':
  246. J = ZFQZTJLRecommend(
  247. platform="zhufuquanzituijianliu",
  248. mode="recommend",
  249. rule_dict={},
  250. user_list=[{"uid": 75590470, "link": "zfqz推荐流_接口1", "nick_name": "做你的尾巴"}, {"uid": 75590471, "link": "zfqz推荐流_接口2", "nick_name": "能够相遇"}, {"uid": 75590472, "link": "zfqz推荐流_接口3", "nick_name": "一别两宽各生欢喜"}, {"uid": 75590473, "link": "zfqz推荐流_接口4", "nick_name": "惹火"}, {"uid": 75590475, "link": "zfqz推荐流_接口5", "nick_name": "顾九"}, {"uid": 75590476, "link": "zfqz推荐流_接口6", "nick_name": "宠一身脾气惯一身毛病"}],
  251. )
  252. J.get_recommend_list()
  253. # J.logic()