1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889 |
- import requests
- import json
- from tenacity import retry
- from applications import log
- from applications.utils import proxy, request_retry
- from coldStartTasks.crawler.sohu.basic import generate_random_strings
- from coldStartTasks.crawler.sohu.basic import generate_random_digits
- from coldStartTasks.crawler.sohu.basic import get_ms_timestamp
- retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=30)
- @retry(**retry_desc)
- def get_user_homepage_videos(author_id, page):
- url = "https://odin.sohu.com/odin/api/blockdata"
- payload = {
- "pvId": f"{get_ms_timestamp()}_{generate_random_strings(7)}",
- "pageId": f"{get_ms_timestamp()}_{generate_random_digits(13)}_{get_ms_timestamp()}",
- "mainContent": {
- "productType": "13",
- "productId": "324",
- "secureScore": "5",
- "categoryId": "47",
- "adTags": "11111111",
- "authorId": 121135924,
- },
- "resourceList": [
- {
- "tplCompKey": "FeedSlideloadAuthor_2_0_pc_1655965929143_data2",
- "isServerRender": False,
- "isSingleAd": False,
- "configSource": "mp",
- "content": {
- "productId": "325",
- "productType": "13",
- "size": 20,
- "pro": "0,1,3,4,5",
- "feedType": "XTOPIC_SYNTHETICAL",
- "view": "operateFeedMode",
- "innerTag": "work",
- "spm": "smpc.channel_248.block3_308_hHsK47_2_fd",
- "page": page,
- "requestId": f"{get_ms_timestamp()}{generate_random_strings(7)}_324",
- },
- "adInfo": {},
- "context": {"mkey": author_id},
- }
- ],
- }
- headers = {
- "Accept": "application/json, text/javascript, */*; q=0.01",
- "Accept-Language": "zh",
- "Connection": "keep-alive",
- "Content-Type": "application/json;charset=UTF-8",
- "Origin": "https://mp.sohu.com",
- "Referer": "https://mp.sohu.com",
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
- }
- try:
- response = requests.post(
- url, headers=headers, data=json.dumps(payload), proxies=proxy()
- )
- response.raise_for_status()
- return response.json()
- except requests.exceptions.RequestException as e:
- log(
- task="sohu_author_homepage",
- function="get_homepage_video_list",
- message=f"API请求失败: {e}",
- data={"author_id": author_id},
- )
- except json.JSONDecodeError as e:
- log(
- task="sohu_author_homepage",
- function="get_homepage_video_list",
- message=f"响应解析失败: {e}",
- data={"author_id": author_id},
- )
- return None
- # # usage example
- # if __name__ == '__main__':
- # response_ = get_user_homepage_videos(
- # author_id="121141867",
- # page=2
- # )
- # print(json.dumps(response_, indent=4, ensure_ascii=False))
|