import requests import json from tenacity import retry from applications import log from applications.utils import proxy, request_retry from coldStartTasks.crawler.sohu.basic import generate_random_strings from coldStartTasks.crawler.sohu.basic import get_ms_timestamp retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=30) @retry(**retry_desc) def get_recommendation_video_list(seed_url, author_id, article_id, page): url = "https://odin.sohu.com/odin/api/a/blockdata?origin=article" payload = json.dumps( { "url": "//odin.sohu.com/odin/api/a/blockdata?origin=article", "pageId": f"{get_ms_timestamp()}_{generate_random_strings(3)}", "pvId": f"{get_ms_timestamp()}_{generate_random_strings(7)}", "mainContent": { "productId": "", "productType": "", "secureScore": "100", "categoryId": "13", "authorId": author_id, "articleId": article_id, }, "resourceList": [ { "tplCompKey": "recommendVideoFeed", "content": { "page": page, "requestId": f"{get_ms_timestamp()}_{generate_random_strings(3)}", "size": 24, "productId": 1558, "productType": 13, "spm": "smpc.vd-land.end-rec", }, "context": { "page_refer_url": "", "mkey": "channelId_13--mpid_{}".format(article_id), }, "adInfo": {}, "spmCCode": "end-rec", "resourceId": "000000000000000000", } ], } ) headers = { "Accept": "application/json, text/plain, */*", "Accept-Language": "zh", "Connection": "keep-alive", "Content-Type": "application/json", "Origin": "https://www.sohu.com", "Referer": seed_url, "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36", } try: response = requests.post(url, headers=headers, data=payload) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: log( task="sohu_recommendation", function="get_recommendation_video_list", message=f"API请求失败: {e}", data={"url": seed_url}, ) except json.JSONDecodeError as e: log( task="sohu_recommendation", function="get_recommendation_video_list", message=f"响应解析失败: {e}", data={"url": seed_url}, ) return None # usage example if __name__ == '__main__': res = get_recommendation_video_list( seed_url='https://www.sohu.com/a/877214751_121141867', author_id='121141867', article_id='877214751', page=2 ) print(json.dumps(res, indent=4, ensure_ascii=False))