detail_recommend.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. """
  2. @author: luojunhui
  3. """
  4. from __future__ import annotations
  5. import json
  6. import requests
  7. from tenacity import retry
  8. from applications.utils import proxy, request_retry
  9. from .use_js import call_js_function
  10. retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=30)
  11. @retry(**retry_desc)
  12. def get_associated_recommendation(article_id: str, cookie: str):
  13. """
  14. toutiao related recommendation
  15. """
  16. ms_token = "-aYwLj97uyCi3oghPfhz2nXaekLoFR5YnYUBA5SuyQZae_NLllO4zC30-CeVLth0A6Hmm7MuGr4_IN9MjHUn8wkq-UQKXJxoGmIAokpUsPsOLjdQKffe-cGWCiZ6xqgh7XE%3D"
  17. query_params = [
  18. 0,
  19. 1,
  20. 14,
  21. "min_behot_time=0&channel_id=91558184576&category=pc_profile_channel&disable_raw_data=true&client_extra_params=%7B%22playparam%22%3A%22codec_type%3A0%2Cenable_dash%3A1%2Cunwatermark%3A1%22%2C%22group_id%22%3A%22{}%22%7D&aid=24&app_name=toutiao_web&msToken={}".format(
  22. article_id, ms_token, ms_token
  23. ),
  24. "",
  25. "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
  26. ]
  27. a_bogus = call_js_function(query_params)
  28. url = f"https://www.toutiao.com/api/pc/list/feed?min_behot_time=0&channel_id=91558184576&category=pc_profile_channel&disable_raw_data=true&client_extra_params=%7B%22playparam%22%3A%22codec_type%3A0%2Cenable_dash%3A1%2Cunwatermark%3A1%22%2C%22group_id%22%3A%22{article_id}%22%7D&aid=24&app_name=toutiao_web&msToken={ms_token}&a_bogus={a_bogus}"
  29. headers = {
  30. "accept": "application/json, text/plain, */*",
  31. "accept-language": "zh",
  32. "referer": "https://www.toutiao.com/video/{}/".format(article_id),
  33. "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
  34. "Cookie": cookie,
  35. }
  36. try:
  37. response = requests.get(url, headers=headers, proxies=proxy())
  38. response.raise_for_status()
  39. return response.json()
  40. except requests.exceptions.RequestException as e:
  41. log(
  42. task="toutiao account crawler",
  43. function="get_toutiao_account_video_list",
  44. message=f"API请求失败: {e}",
  45. data={"account_id": article_id},
  46. )
  47. except json.JSONDecodeError as e:
  48. log(
  49. task="toutiao account crawler",
  50. function="get_toutiao_account_video_list",
  51. message=f"响应解析失败: {e}",
  52. data={"account_id": article_id},
  53. )
  54. return None