search_spider.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. """
  2. 获取热点日历
  3. """
  4. import schedule
  5. import requests
  6. import json
  7. import datetime
  8. class PQuanCalendar(object):
  9. """
  10. 热点日历
  11. """
  12. def __init__(self):
  13. self.url = "https://www.adguider.com/sv1/calendar/getCalendarAjax"
  14. self.c_dict = {}
  15. def get_calendar(self):
  16. """
  17. 请求日历
  18. :return:
  19. """
  20. today = datetime.datetime.today()
  21. tomorrow = today + datetime.timedelta(days=1)
  22. tomorrow = tomorrow.strftime("%Y-%m-%d")
  23. payload = json.dumps({
  24. "startTime": tomorrow,
  25. "endTime": tomorrow,
  26. "fdIdList": [
  27. 13,
  28. 4,
  29. 3,
  30. 10,
  31. 5,
  32. 8,
  33. ]
  34. })
  35. headers = {
  36. 'Accept': 'application/json, text/javascript, */*; q=0.01',
  37. 'Accept-Language': 'en,zh;q=0.9,zh-CN;q=0.8',
  38. 'Connection': 'keep-alive',
  39. 'Content-Type': 'application/json',
  40. 'Origin': 'https://www.adguider.com',
  41. 'Referer': 'https://www.adguider.com/sv1/calendar/getCalendar?mode=1&startDate=2024/03/13',
  42. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
  43. }
  44. response = requests.post(self.url, headers=headers, data=payload)
  45. result = response.json()
  46. # print(json.dumps(result, ensure_ascii=False, indent=4))
  47. return result
  48. def process_response(self, response_json):
  49. """
  50. 对获取的数据进行处理, 处理结构为
  51. date_info: [festival_obj1, festival_obj2, festival_obj3, festival_obj4 ......]
  52. :param response_json:
  53. :return: calender_dict
  54. """
  55. if response_json['data']:
  56. for cat_obj in response_json['data']:
  57. if cat_obj.get("adFestivalFixedVos"):
  58. festival_list = cat_obj["adFestivalFixedVos"]
  59. category = cat_obj["ftName"]
  60. self.c_dict[category] = [item['ffName'] for item in festival_list]
  61. print(json.dumps(self.c_dict, ensure_ascii=False, indent=4))
  62. return self.c_dict
  63. class SearchSpider(object):
  64. """
  65. 定时从日历中获取明天的节日,通过节日去搜索视频,并且把视频发送至 ETL 下载
  66. """
  67. def __init__(self, festival_dict):
  68. self.key_list = festival_dict
  69. if __name__ == '__main__':
  70. P = PQuanCalendar()
  71. res = P.get_calendar()
  72. P.process_response(res)