get_user_homepage.py 3.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. import requests
  2. import json
  3. from tenacity import retry
  4. from applications import log
  5. from applications.utils import proxy, request_retry
  6. from coldStartTasks.crawler.sohu.basic import generate_random_strings
  7. from coldStartTasks.crawler.sohu.basic import generate_random_digits
  8. from coldStartTasks.crawler.sohu.basic import get_ms_timestamp
  9. retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=30)
  10. @retry(**retry_desc)
  11. def get_user_homepage_videos(author_id, page):
  12. url = "https://odin.sohu.com/odin/api/blockdata"
  13. payload = {
  14. "pvId": f"{get_ms_timestamp()}_{generate_random_strings(7)}",
  15. "pageId": f"{get_ms_timestamp()}_{generate_random_digits(13)}_{get_ms_timestamp()}",
  16. "mainContent": {
  17. "productType": "13",
  18. "productId": "324",
  19. "secureScore": "5",
  20. "categoryId": "47",
  21. "adTags": "11111111",
  22. "authorId": 121135924,
  23. },
  24. "resourceList": [
  25. {
  26. "tplCompKey": "FeedSlideloadAuthor_2_0_pc_1655965929143_data2",
  27. "isServerRender": False,
  28. "isSingleAd": False,
  29. "configSource": "mp",
  30. "content": {
  31. "productId": "325",
  32. "productType": "13",
  33. "size": 20,
  34. "pro": "0,1,3,4,5",
  35. "feedType": "XTOPIC_SYNTHETICAL",
  36. "view": "operateFeedMode",
  37. "innerTag": "work",
  38. "spm": "smpc.channel_248.block3_308_hHsK47_2_fd",
  39. "page": page,
  40. "requestId": f"{get_ms_timestamp()}{generate_random_strings(7)}_324",
  41. },
  42. "adInfo": {},
  43. "context": {"mkey": author_id},
  44. }
  45. ],
  46. }
  47. headers = {
  48. "Accept": "application/json, text/javascript, */*; q=0.01",
  49. "Accept-Language": "zh",
  50. "Connection": "keep-alive",
  51. "Content-Type": "application/json;charset=UTF-8",
  52. "Origin": "https://mp.sohu.com",
  53. "Referer": "https://mp.sohu.com",
  54. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
  55. }
  56. try:
  57. response = requests.post(
  58. url, headers=headers, data=json.dumps(payload), proxies=proxy()
  59. )
  60. response.raise_for_status()
  61. return response.json()
  62. except requests.exceptions.RequestException as e:
  63. log(
  64. task="sohu_author_homepage",
  65. function="get_homepage_video_list",
  66. message=f"API请求失败: {e}",
  67. data={"author_id": author_id},
  68. )
  69. except json.JSONDecodeError as e:
  70. log(
  71. task="sohu_author_homepage",
  72. function="get_homepage_video_list",
  73. message=f"响应解析失败: {e}",
  74. data={"author_id": author_id},
  75. )
  76. return None
  77. # # usage example
  78. # if __name__ == '__main__':
  79. # response_ = get_user_homepage_videos(
  80. # author_id="121141867",
  81. # page=2
  82. # )
  83. # print(json.dumps(response_, indent=4, ensure_ascii=False))