get_url.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/9/1
  4. import json
  5. import os
  6. import sys
  7. import time
  8. sys.path.append(os.getcwd())
  9. from crawler_shipinhao.main.common import Common
  10. # from main.common import Common
  11. class GetUrl:
  12. @classmethod
  13. def get_url(cls, log_type):
  14. try:
  15. # charles 抓包文件保存目录
  16. # charles_file_dir = r"./crawler_kanyikan_recommend/chlsfiles/"
  17. charles_file_dir = r"./chlsfiles/"
  18. if int(len(os.listdir(charles_file_dir))) == 1:
  19. Common.logger(log_type).info("未找到chlsfile文件,等待60s")
  20. time.sleep(60)
  21. else:
  22. # 目标文件夹下所有文件
  23. all_file = sorted(os.listdir(charles_file_dir))
  24. # 获取到目标文件
  25. old_file = all_file[-1]
  26. # 分离文件名与扩展名
  27. new_file = os.path.splitext(old_file)
  28. # 重命名文件后缀
  29. os.rename(os.path.join(charles_file_dir, old_file),
  30. os.path.join(charles_file_dir, new_file[0] + ".txt"))
  31. with open(charles_file_dir + new_file[0] + ".txt", encoding='utf-8-sig', errors='ignore') as f:
  32. contents = json.load(f, strict=False)
  33. video_url_list = []
  34. cover_url_list = []
  35. if "finder.video.qq.com" in [text['host'] for text in contents]:
  36. for text in contents:
  37. if text["host"] == "finder.video.qq.com" and text["path"] == "/251/20302/stodownload":
  38. video_url_list.append(text)
  39. elif text["host"] == "finder.video.qq.com" and text["path"] == "/251/20350/stodownload":
  40. cover_url_list.append(text)
  41. video_url = video_url_list[0]['host']+video_url_list[0]['path']+'?'+video_url_list[0]['query']
  42. cover_url = cover_url_list[0]['host']+cover_url_list[0]['path']+'?'+cover_url_list[0]['query']
  43. return video_url, cover_url
  44. else:
  45. Common.logger(log_type).info("未找到 url,10s后重新获取")
  46. time.sleep(10)
  47. cls.get_url(log_type)
  48. except Exception as e:
  49. Common.logger(log_type).exception("get_url异常:{}", e)
  50. return None
  51. if __name__ == '__main__':
  52. GetUrl.get_url('recommend')