common.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/3/30
  4. """
  5. 公共方法,包含:生成log / 删除log / 下载方法 / 读取文件 / 统计下载数
  6. """
  7. from datetime import date, timedelta
  8. from loguru import logger
  9. import datetime
  10. import logging
  11. import os
  12. import time
  13. import requests
  14. import urllib3
  15. proxies = {"http": None, "https": None}
  16. class Common:
  17. # 统一获取当前时间 <class 'datetime.datetime'> 2022-04-14 20:13:51.244472
  18. now = datetime.datetime.now()
  19. # 昨天 <class 'str'> 2022-04-13
  20. yesterday = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d")
  21. # 今天 <class 'datetime.date'> 2022-04-14
  22. today = date.today()
  23. # 明天 <class 'str'> 2022-04-15
  24. tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d")
  25. # 使用 logging 模块生成日志
  26. @staticmethod
  27. def crawler_log():
  28. """
  29. 生成 log 日志
  30. """
  31. # 日志路径
  32. log_dir = r"./logs/"
  33. log_path = os.getcwd() + os.sep + log_dir
  34. if not os.path.isdir(log_path):
  35. os.makedirs(log_path)
  36. # 日志参数
  37. log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
  38. date_format = "%Y-%m-%d %p %H:%M:%S"
  39. log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + '.log'
  40. # 日志初始化
  41. logging.basicConfig(filename=log_path + log_name, level=logging.INFO, format=log_format, datefmt=date_format)
  42. crawler_logger = logging.getLogger("crawler-log")
  43. return crawler_logger
  44. # 使用 logger 模块生成日志
  45. @staticmethod
  46. def logger():
  47. """
  48. 使用 logger 模块生成日志
  49. """
  50. # 日志路径
  51. log_dir = r"./logs/"
  52. log_path = os.getcwd() + os.sep + log_dir
  53. if not os.path.isdir(log_path):
  54. os.makedirs(log_path)
  55. # 日志文件名
  56. log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + '.log'
  57. # 日志不打印到控制台
  58. logger.remove(handler_id=None)
  59. # rotation="500 MB",实现每 500MB 存储一个文件
  60. # rotation="12:00",实现每天 12:00 创建一个文件
  61. # rotation="1 week",每周创建一个文件
  62. # retention="10 days",每隔10天之后就会清理旧的日志
  63. # 初始化日志
  64. logger.add(log_dir + log_name, level="INFO", rotation='00:00')
  65. return logger
  66. # 清除日志,保留最近 7 个文件
  67. @classmethod
  68. def del_logs(cls):
  69. """
  70. 清除冗余日志文件
  71. :return: 保留最近 7 个日志
  72. """
  73. log_dir = r"./logs/"
  74. all_files = sorted(os.listdir(log_dir))
  75. all_logs = []
  76. for log in all_files:
  77. name = os.path.splitext(log)[-1]
  78. if name == ".log":
  79. all_logs.append(log)
  80. if len(all_logs) <= 7:
  81. pass
  82. else:
  83. for file in all_logs[:len(all_logs) - 7]:
  84. os.remove(log_dir + file)
  85. cls.logger().info("清除冗余日志成功")
  86. # 封装下载视频或封面的方法
  87. @classmethod
  88. def download_method(cls, text, d_name, d_url):
  89. """
  90. 下载封面:text == "cover" ; 下载视频:text == "video"
  91. 需要下载的视频标题:d_title
  92. 视频封面,或视频播放地址:d_url
  93. 下载保存路径:"./files/{d_title}/"
  94. """
  95. # 首先创建一个保存该视频相关信息的文件夹
  96. video_dir = "./videos/" + d_name + "/"
  97. if not os.path.exists(video_dir):
  98. os.mkdir(video_dir)
  99. cls.logger().info("删除 charles 缓存文件成功")
  100. # 下载视频
  101. if text == "video":
  102. # 需要下载的视频地址
  103. video_url = d_url
  104. # 视频名
  105. video_name = "video.mp4"
  106. # 下载视频
  107. urllib3.disable_warnings()
  108. response = requests.get(video_url, stream=True, proxies=proxies, verify=False)
  109. try:
  110. with open(video_dir + video_name, "wb") as f:
  111. for chunk in response.iter_content(chunk_size=10240):
  112. f.write(chunk)
  113. cls.logger().info("==========视频下载完成==========")
  114. except Exception as e:
  115. cls.logger().exception("视频下载失败:{}", e)
  116. # 下载封面
  117. elif text == "cover":
  118. # 需要下载的封面地址
  119. cover_url = d_url
  120. # 封面名
  121. cover_name = "image.jpg"
  122. # 下载封面
  123. urllib3.disable_warnings()
  124. response = requests.get(cover_url, proxies=proxies, verify=False)
  125. try:
  126. with open(video_dir + cover_name, "wb") as f:
  127. f.write(response.content)
  128. cls.logger().info("==========封面下载完成==========")
  129. except Exception as e:
  130. cls.logger().exception("封面下载失败:{}", e)
  131. # 读取 txt 内容,返回 f.readlines()
  132. @staticmethod
  133. def read_txt(t_name):
  134. """
  135. 读取 txt 文件
  136. :param t_name: 文件名
  137. :return: 文件内容
  138. """
  139. with open(r"./txt/" + t_name, "r", encoding="UTF-8") as f:
  140. return f.readlines()
  141. # 统计 txt 内容数量
  142. @classmethod
  143. def kuaishou_download_count(cls):
  144. videoid_path = r"./txt/kuaishou_videoid.txt"
  145. count = 0
  146. for count, line in enumerate(open(videoid_path, "rb").readlines()):
  147. count += 1
  148. cls.logger().info('累计下载视频数: {}\n', count)
  149. if __name__ == "__main__":
  150. common = Common()