12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364 |
- # -*- coding: utf-8 -*-
- # @Author: wangkun
- # @Time: 2022/9/1
- import json
- import os
- import sys
- import time
- sys.path.append(os.getcwd())
- from crawler_shipinhao.main.common import Common
- class GetUrl:
- @classmethod
- def get_url(cls, log_type):
- try:
- # charles 抓包文件保存目录
- charles_file_dir = r"./crawler_kanyikan_recommend/chlsfiles/"
- if int(len(os.listdir(charles_file_dir))) == 1:
- Common.logger(log_type).info("未找到chlsfile文件,等待60s")
- time.sleep(60)
- else:
- # 目标文件夹下所有文件
- all_file = sorted(os.listdir(charles_file_dir))
- # 获取到目标文件
- old_file = all_file[-1]
- # 分离文件名与扩展名
- new_file = os.path.splitext(old_file)
- # 重命名文件后缀
- os.rename(os.path.join(charles_file_dir, old_file),
- os.path.join(charles_file_dir, new_file[0] + ".txt"))
- with open(charles_file_dir + new_file[0] + ".txt", encoding='utf-8-sig', errors='ignore') as f:
- contents = json.load(f, strict=False)
- video_url_list = []
- cover_url_list = []
- if "finder.video.qq.com" in [text['host'] for text in contents]:
- for text in contents:
- if text["host"] == "finder.video.qq.com" and text["path"] == "/251/20302/stodownload":
- video_url_list.append(text)
- elif text["host"] == "finder.video.qq.com" and text["path"] == "/251/20350/stodownload":
- cover_url_list.append(text)
- video_url = video_url_list[0]['host']+video_url_list[0]['path']+'?'+video_url_list[0]['query']
- cover_url = cover_url_list[0]['host']+cover_url_list[0]['path']+'?'+cover_url_list[0]['query']
- return video_url, cover_url
- else:
- Common.logger(log_type).info("未找到 url,10s后重新获取")
- time.sleep(10)
- cls.get_url(log_type)
- except Exception as e:
- Common.logger(log_type).exception("get_url异常:{}", e)
- return None
- if __name__ == '__main__':
- GetUrl.get_url('recommend')
|