get_recommedation.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. import requests
  2. import json
  3. from tenacity import retry
  4. from applications import log
  5. from applications.utils import proxy, request_retry
  6. from coldStartTasks.crawler.sohu.basic import generate_random_strings
  7. from coldStartTasks.crawler.sohu.basic import get_ms_timestamp
  8. retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=30)
  9. @retry(**retry_desc)
  10. def get_recommendation_video_list(seed_url, author_id, article_id, page):
  11. url = "https://odin.sohu.com/odin/api/a/blockdata?origin=article"
  12. payload = json.dumps(
  13. {
  14. "url": "//odin.sohu.com/odin/api/a/blockdata?origin=article",
  15. "pageId": f"{get_ms_timestamp()}_{generate_random_strings(3)}",
  16. "pvId": f"{get_ms_timestamp()}_{generate_random_strings(7)}",
  17. "mainContent": {
  18. "productId": "",
  19. "productType": "",
  20. "secureScore": "100",
  21. "categoryId": "13",
  22. "authorId": author_id,
  23. "articleId": article_id,
  24. },
  25. "resourceList": [
  26. {
  27. "tplCompKey": "recommendVideoFeed",
  28. "content": {
  29. "page": page,
  30. "requestId": f"{get_ms_timestamp()}_{generate_random_strings(3)}",
  31. "size": 24,
  32. "productId": 1558,
  33. "productType": 13,
  34. "spm": "smpc.vd-land.end-rec",
  35. },
  36. "context": {
  37. "page_refer_url": "",
  38. "mkey": "channelId_13--mpid_{}".format(article_id),
  39. },
  40. "adInfo": {},
  41. "spmCCode": "end-rec",
  42. "resourceId": "000000000000000000",
  43. }
  44. ],
  45. }
  46. )
  47. headers = {
  48. "Accept": "application/json, text/plain, */*",
  49. "Accept-Language": "zh",
  50. "Connection": "keep-alive",
  51. "Content-Type": "application/json",
  52. "Origin": "https://www.sohu.com",
  53. "Referer": seed_url,
  54. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
  55. }
  56. try:
  57. response = requests.post(url, headers=headers, data=payload)
  58. response.raise_for_status()
  59. return response.json()
  60. except requests.exceptions.RequestException as e:
  61. log(
  62. task="sohu_recommendation",
  63. function="get_recommendation_video_list",
  64. message=f"API请求失败: {e}",
  65. data={"url": seed_url},
  66. )
  67. except json.JSONDecodeError as e:
  68. log(
  69. task="sohu_recommendation",
  70. function="get_recommendation_video_list",
  71. message=f"响应解析失败: {e}",
  72. data={"url": seed_url},
  73. )
  74. return None
  75. # usage example
  76. if __name__ == '__main__':
  77. res = get_recommendation_video_list(
  78. seed_url='https://www.sohu.com/a/877214751_121141867',
  79. author_id='121141867',
  80. article_id='877214751',
  81. page=2
  82. )
  83. print(json.dumps(res, indent=4, ensure_ascii=False))