download_play.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/4/18
  4. """
  5. 下载并上传:播放量视频
  6. """
  7. import os
  8. import sys
  9. sys.path.append(os.getcwd())
  10. from main.common import Common
  11. from main.get_feeds import get_feeds
  12. from main.publish import Publish
  13. class DownloadPlay:
  14. @staticmethod
  15. def play_rule(play_width, play_height, play_duration, play_play_cnt):
  16. """
  17. 1.分辨率,宽或者高 >= 720 or ==0
  18. 2.600s >= 时长 >= 60s
  19. 3.视频播放量 >= 150000
  20. """
  21. if int(play_width) >= 720 or int(play_height) >= 720 or play_width == "0" or play_height == "0":
  22. if 600 >= int(play_duration) >= 60:
  23. if int(play_play_cnt) >= 150000:
  24. return True
  25. else:
  26. return False
  27. else:
  28. return False
  29. else:
  30. return False
  31. @classmethod
  32. def download_play_video(cls, env):
  33. """
  34. 下载播放量视频
  35. 测试环境:env == dev
  36. 正式环境:env == prod
  37. """
  38. videos = Common.read_txt("kanyikan_feeds.txt")
  39. for video in videos:
  40. download_video_id = video.strip().split(" + ")[1]
  41. try:
  42. # download_video_id = video.strip().split(" + ")[1]
  43. download_video_title = video.strip().split(" + ")[3]
  44. download_video_duration = video.strip().split(" + ")[4]
  45. download_video_play_cnt = video.strip().split(" + ")[2]
  46. download_video_comment_cnt = video.strip().split(" + ")[5]
  47. download_video_like_cnt = video.strip().split(" + ")[6]
  48. download_video_share_cnt = video.strip().split(" + ")[7]
  49. download_video_resolution = video.strip().split(" + ")[8]
  50. download_video_width = download_video_resolution.split("*")[0]
  51. download_video_height = download_video_resolution.split("*")[-1]
  52. download_video_send_time = video.strip().split(" + ")[9]
  53. download_user_name = video.strip().split(" + ")[10]
  54. download_head_url = video.strip().split(" + ")[11]
  55. download_cover_url = video.strip().split(" + ")[12]
  56. download_video_url = video.strip().split(" + ")[13]
  57. download_video_session = video.strip().split(" + ")[-1]
  58. if cls.play_rule(download_video_width,
  59. download_video_height,
  60. download_video_duration,
  61. download_video_play_cnt) is True:
  62. Common.crawler_log().info("开始下载视频:{}".format(download_video_title))
  63. # 下载封面
  64. Common.download_method(text="cover", d_name=download_video_title, d_url=download_cover_url)
  65. # 下载视频
  66. Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
  67. # 保存视频 ID 到 "./txt/kanyikan_videoid.txt"
  68. with open(r"./txt/kanyikan_videoid.txt", "a", encoding="utf8") as f_a:
  69. f_a.write(download_video_id + "\n")
  70. # 保存视频信息至 "./videos/{download_video_title}/info.txt"
  71. with open(r"./videos/" + download_video_title + r"/info.txt", "a", encoding="utf8") as f_a:
  72. f_a.write(str(download_video_id) + "\n" +
  73. str(download_video_title) + "\n" +
  74. str(download_video_duration) + "\n" +
  75. str(download_video_play_cnt) + "\n" +
  76. str(download_video_comment_cnt) + "\n" +
  77. str(download_video_like_cnt) + "\n" +
  78. str(download_video_share_cnt) + "\n" +
  79. str(download_video_resolution) + "\n" +
  80. str(download_video_send_time) + "\n" +
  81. str(download_user_name) + "\n" +
  82. str(download_head_url) + "\n" +
  83. str(download_video_url) + "\n" +
  84. str(download_cover_url) + "\n" +
  85. str(download_video_session))
  86. # 上传视频
  87. Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
  88. Publish.upload_and_publish(env, "play")
  89. # 删除该视频在kanyikan_feeds.txt中的信息
  90. Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(download_video_title))
  91. with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
  92. lines = f_r.readlines()
  93. with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
  94. for line in lines:
  95. if download_video_id in line.split(" + ")[1]:
  96. continue
  97. f_w.write(line)
  98. else:
  99. # 删除该视频在 recommend.txt中的信息
  100. Common.crawler_log().info("该视频不满足下载规则,删除在kanyikan_feeds.txt中的信息:{}".format(download_video_title))
  101. with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
  102. lines = f_r.readlines()
  103. with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
  104. for line in lines:
  105. if download_video_id in line.split(" + ")[1]:
  106. continue
  107. f_w.write(line)
  108. except Exception as e:
  109. Common.crawler_log().info("视频 info 异常".format(e))
  110. with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
  111. lines = f_r.readlines()
  112. with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
  113. for line in lines:
  114. if download_video_id in line.split(" + ")[1]:
  115. continue
  116. f_w.write(line)
  117. if __name__ == "__main__":
  118. download_play = DownloadPlay()
  119. get_feeds()
  120. download_play.download_play_video("dev")