download_up.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/4/18
  4. """
  5. 下载并上传:上升榜视频
  6. 规则:
  7. 1.满足基本规则
  8. 2.每隔一小时,检查视频播放量,>=1000,则下载及上传
  9. 3.超过 2 小时,则删除该视频信息
  10. """
  11. import json
  12. import os
  13. import random
  14. import sys
  15. import time
  16. import requests
  17. import urllib3
  18. sys.path.append(os.getcwd())
  19. from main.common import Common
  20. from main.get_feeds import get_feeds
  21. from main.publish import Publish
  22. class DownloadUp:
  23. @staticmethod
  24. def up_rule(up_width, up_height, up_duration, up_play_cnt):
  25. """
  26. 1.分辨率,宽或者高 >= 720 or ==0
  27. 2.600s >= 时长 >= 60s
  28. 3.视频播放量 >= 0
  29. """
  30. if int(up_width) >= 720 or int(up_height) >= 720 or str(up_width) == "0" or str(up_height) == "0":
  31. if 600 >= int(up_duration) >= 60:
  32. if int(up_play_cnt) >= 0:
  33. return True
  34. else:
  35. return False
  36. else:
  37. return False
  38. else:
  39. return False
  40. @classmethod
  41. def download_up_video(cls, env):
  42. """
  43. 1.从 kanyikan_feeds.txt 中获取 videoid
  44. 2.根据 videoid,从 videoinfo 接口,获取当前视频最新的信息
  45. 3.根据下载规则判断,符合规则进行下载:
  46. 1 更新视频 ID 到 "./txt/kanyikan_videoid.txt"
  47. 2 视频信息写入文件 "./videos/{d_title}/info.txt"
  48. 4.上传完成:
  49. 1 删除该视频在 "./txt/kanyikan_feeds.txt" 中的信息
  50. """
  51. get_video_info_session = Common.get_session()
  52. Common.crawler_log().info("获取视频info时,session:{}".format(get_video_info_session))
  53. lines = Common.read_txt("kanyikan_feeds.txt")
  54. for line in lines:
  55. v_time = line.strip().split(" + ")[0] # 第一次获取该视频的时间
  56. v_id = line.strip().split(" + ")[1] # 外网视频 ID
  57. v_play_ctn = line.strip().split(" + ")[2] # 播放量
  58. url = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxagetonevideoinfo?"
  59. param = {
  60. "session": get_video_info_session,
  61. "vid": v_id,
  62. "wxaVersion": "3.9.2",
  63. "channelid": "208201",
  64. "scene": "32",
  65. "subscene": "1089",
  66. "model": "iPhone 11<iPhone12,1>14.7.1",
  67. "clientVersion": "8.0.18",
  68. "sharesearchid": "447665862521758270",
  69. "sharesource": "-1"
  70. }
  71. try:
  72. urllib3.disable_warnings()
  73. r = requests.get(url=url, params=param, verify=False)
  74. response = json.loads(r.content.decode("utf8"))
  75. if "data" not in response:
  76. Common.crawler_log().error("获取视频info时,session过期,等待30秒")
  77. # 如果返回空信息,则随机睡眠 31-35 秒
  78. time.sleep(random.randint(31, 35))
  79. else:
  80. data = response["data"]
  81. v_title = data["title"]
  82. v_duration = data["duration"]
  83. v_play_cnt_up = data["played_cnt"]
  84. v_comment_cnt = data["comment_cnt"]
  85. v_liked_cnt = data["liked_cnt"]
  86. v_shared_cnt = data["shared_cnt"]
  87. v_width = data["width"]
  88. v_height = data["height"]
  89. v_resolution = str(v_width) + "*" + str(v_height)
  90. v_send_date = data["upload_time"]
  91. v_username = data["user_info"]["nickname"]
  92. v_user_cover = data["user_info"]["headimg_url"]
  93. v_video_cover = data["cover_url"]
  94. if "items" not in data["play_info"]:
  95. if len(data["play_info"]) > 2:
  96. download_url_up = data["play_info"][2]["play_url"]
  97. else:
  98. download_url_up = data["play_info"][0]["play_url"]
  99. else:
  100. if len(data["play_info"]["items"]) > 2:
  101. download_url_up = data["play_info"]["items"][2]["play_url"]
  102. else:
  103. download_url_up = data["play_info"]["items"][0]["play_url"]
  104. # 判断基本规则
  105. if cls.up_rule(v_width, v_height, v_duration, v_play_cnt_up) is True \
  106. and v_id != "" and v_title != "" and v_duration != "" \
  107. and v_play_cnt_up != "" and v_comment_cnt != "" and v_liked_cnt != "" \
  108. and v_shared_cnt != "" and v_width != "" and v_height != "" \
  109. and v_send_date != "" and v_username != "" and v_user_cover != "" \
  110. and v_video_cover != "" and download_url_up != "":
  111. if int(time.time()) - int(v_time) < 3600:
  112. Common.crawler_log().info("距上次获取该视频时间:{}分钟".format(
  113. int(int(int(time.time()) - int(v_time)) / 60)) + ";{}".format(v_title))
  114. elif 7200 >= int(time.time()) - int(v_time) >= 3600:
  115. if int(v_play_cnt_up) - int(v_play_ctn) >= 1000:
  116. Common.crawler_log().info("该视频:{}".format(
  117. v_title) + " " + "在1小时内的播放量{}>=1000".format(int(v_play_cnt_up) - int(v_play_ctn)))
  118. # 下载封面
  119. Common.download_method("cover", v_title, v_video_cover)
  120. # 下载视频
  121. Common.download_method("video", v_title, download_url_up)
  122. # 保存视频 ID 到 "./txt/kanyikan_videoid.txt"
  123. with open("./txt/kanyikan_videoid.txt", "a", encoding="utf8") as f_a:
  124. f_a.write(v_id + "\n")
  125. # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
  126. with open("./videos/" + v_title + "/" + "info.txt",
  127. "a", encoding="utf8") as f_a2:
  128. f_a2.write(str(v_id) + "\n" +
  129. str(v_title) + "\n" +
  130. str(v_duration) + "\n" +
  131. str(v_play_cnt_up) + "\n" +
  132. str(v_comment_cnt) + "\n" +
  133. str(v_liked_cnt) + "\n" +
  134. str(v_shared_cnt) + "\n" +
  135. str(v_resolution) + "\n" +
  136. str(v_send_date) + "\n" +
  137. str(v_username) + "\n" +
  138. str(v_user_cover) + "\n" +
  139. str(download_url_up) + "\n" +
  140. str(v_video_cover) + "\n" +
  141. str(get_video_info_session))
  142. # 上传该视频
  143. Common.crawler_log().info("开始上传视频:{}".format(v_title))
  144. Publish.upload_and_publish(env, "up")
  145. # 删除该视频在kanyikan_feeds.txt中的信息
  146. Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
  147. with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f1:
  148. lines = f1.readlines()
  149. with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w1:
  150. for line1 in lines:
  151. if v_id in line1.split(" + ")[1]:
  152. continue
  153. f_w1.write(line1)
  154. else:
  155. # 删除之前保存的该视频信息,并把现在的信息保存进去
  156. Common.crawler_log().info("该视频1小时内的播放量:{}<1000".format(
  157. int(v_play_cnt_up) - int(v_play_ctn)) + ";"
  158. + "更新该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
  159. with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
  160. lines = f_r.readlines()
  161. with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
  162. for line2 in lines:
  163. if v_id in line2.split(" + ")[1]:
  164. continue
  165. f_w.write(line2)
  166. with open("./txt/kanyikan_feeds.txt", "a", encoding="utf-8") as f_a:
  167. f_a.write(str(time.time()) + " + "
  168. + str(v_id) + " + "
  169. + str(v_play_cnt_up) + " + "
  170. + str(v_title) + " + "
  171. + str(v_duration) + " + "
  172. + str(v_comment_cnt) + " + "
  173. + str(v_liked_cnt) + " + "
  174. + str(v_shared_cnt) + " + "
  175. + str(v_resolution) + " + "
  176. + str(v_send_date) + " + "
  177. + str(v_username) + " + "
  178. + str(v_user_cover) + " + "
  179. + str(v_video_cover) + " + "
  180. + str(download_url_up) + " + "
  181. + str(get_video_info_session) + "\n")
  182. elif int(time.time()) - int(v_time) > 7200:
  183. Common.crawler_log().info("距上次获取该视频时间:{}分钟。超过2小时,删除该视频".format(
  184. int((int(time.time()) - int(v_time)) / 60)) + ";" + "标题:{}".format(v_title))
  185. # 删除之前保存的该视频信息
  186. Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
  187. with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
  188. lines = f_r.readlines()
  189. with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
  190. for line2 in lines:
  191. if v_id in line2.split(" + ")[1]:
  192. continue
  193. f_w.write(line2)
  194. else:
  195. Common.crawler_log().info("不满足下载规则:{}".format(v_title))
  196. # 删除之前保存的该视频信息
  197. Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
  198. with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
  199. lines = f_r.readlines()
  200. with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
  201. for line3 in lines:
  202. if v_id in line3.split(" + ")[1]:
  203. continue
  204. f_w.write(line3)
  205. except Exception as e:
  206. Common.crawler_log().error("获取视频info异常:{}".format(e))
  207. if __name__ == "__main__":
  208. downloadup = DownloadUp()
  209. get_feeds()
  210. downloadup.download_up_video("dev")