123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990 |
- import requests
- import json
- from tenacity import retry
- from applications import log
- from applications.utils import proxy, request_retry
- from coldStartTasks.crawler.sohu.basic import generate_random_strings
- from coldStartTasks.crawler.sohu.basic import get_ms_timestamp
- retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=30)
- @retry(**retry_desc)
- def get_recommendation_video_list(seed_url, author_id, article_id, page):
- url = "https://odin.sohu.com/odin/api/a/blockdata?origin=article"
- payload = json.dumps(
- {
- "url": "//odin.sohu.com/odin/api/a/blockdata?origin=article",
- "pageId": f"{get_ms_timestamp()}_{generate_random_strings(3)}",
- "pvId": f"{get_ms_timestamp()}_{generate_random_strings(7)}",
- "mainContent": {
- "productId": "",
- "productType": "",
- "secureScore": "100",
- "categoryId": "13",
- "authorId": author_id,
- "articleId": article_id,
- },
- "resourceList": [
- {
- "tplCompKey": "recommendVideoFeed",
- "content": {
- "page": page,
- "requestId": f"{get_ms_timestamp()}_{generate_random_strings(3)}",
- "size": 24,
- "productId": 1558,
- "productType": 13,
- "spm": "smpc.vd-land.end-rec",
- },
- "context": {
- "page_refer_url": "",
- "mkey": "channelId_13--mpid_{}".format(article_id),
- },
- "adInfo": {},
- "spmCCode": "end-rec",
- "resourceId": "000000000000000000",
- }
- ],
- }
- )
- headers = {
- "Accept": "application/json, text/plain, */*",
- "Accept-Language": "zh",
- "Connection": "keep-alive",
- "Content-Type": "application/json",
- "Origin": "https://www.sohu.com",
- "Referer": seed_url,
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
- }
- try:
- response = requests.post(url, headers=headers, data=payload)
- response.raise_for_status()
- return response.json()
- except requests.exceptions.RequestException as e:
- log(
- task="sohu_recommendation",
- function="get_recommendation_video_list",
- message=f"API请求失败: {e}",
- data={"url": seed_url},
- )
- except json.JSONDecodeError as e:
- log(
- task="sohu_recommendation",
- function="get_recommendation_video_list",
- message=f"响应解析失败: {e}",
- data={"url": seed_url},
- )
- return None
- # usage example
- if __name__ == '__main__':
- res = get_recommendation_video_list(
- seed_url='https://www.sohu.com/a/877214751_121141867',
- author_id='121141867',
- article_id='877214751',
- page=2
- )
- print(json.dumps(res, indent=4, ensure_ascii=False))
|