|
@@ -1,74 +1,90 @@
|
|
import requests
|
|
import requests
|
|
import json
|
|
import json
|
|
|
|
+from tenacity import retry
|
|
|
|
|
|
|
|
+from applications import log
|
|
|
|
+from applications.utils import proxy, request_retry
|
|
|
|
+from coldStartTasks.crawler.sohu.basic import generate_random_strings
|
|
|
|
+from coldStartTasks.crawler.sohu.basic import get_ms_timestamp
|
|
|
|
|
|
-def get_recommendation_video_list(page_id, page_size):
|
|
|
|
|
|
+
|
|
|
|
+retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=30)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+@retry(**retry_desc)
|
|
|
|
+def get_recommendation_video_list(seed_url, author_id, article_id, page):
|
|
url = "https://odin.sohu.com/odin/api/a/blockdata?origin=article"
|
|
url = "https://odin.sohu.com/odin/api/a/blockdata?origin=article"
|
|
- payload = json.dumps({
|
|
|
|
- "url": "//odin.sohu.com/odin/api/a/blockdata?origin=article",
|
|
|
|
- "pageId": "1744186073720NK8",
|
|
|
|
- "pvId": "1744186073657DQHXa2g",
|
|
|
|
- "mainContent": {
|
|
|
|
- "productId": "",
|
|
|
|
- "productType": "",
|
|
|
|
- "secureScore": "100",
|
|
|
|
- "categoryId": "13",
|
|
|
|
- "authorId": "121141867",
|
|
|
|
- "articleId": "877216434"
|
|
|
|
- },
|
|
|
|
- "resourceList": [
|
|
|
|
- {
|
|
|
|
- "tplCompKey": "recommendVideoFeed",
|
|
|
|
- "content": {
|
|
|
|
- "page": page_id,
|
|
|
|
- "requestId": "17441860918364TZ",
|
|
|
|
- "size": page_size,
|
|
|
|
- "productId": 1558,
|
|
|
|
- "productType": 13,
|
|
|
|
- "spm": "smpc.vd-land.end-rec"
|
|
|
|
- },
|
|
|
|
- "context": {
|
|
|
|
- "page_refer_url": "",
|
|
|
|
- "mkey": "channelId_13--mpid_877216434"
|
|
|
|
- },
|
|
|
|
- "adInfo": {},
|
|
|
|
- "spmCCode": "end-rec",
|
|
|
|
- "resourceId": "000000000000000000"
|
|
|
|
- }
|
|
|
|
- ]
|
|
|
|
- })
|
|
|
|
|
|
+ payload = json.dumps(
|
|
|
|
+ {
|
|
|
|
+ "url": "//odin.sohu.com/odin/api/a/blockdata?origin=article",
|
|
|
|
+ "pageId": f"{get_ms_timestamp()}_{generate_random_strings(3)}",
|
|
|
|
+ "pvId": f"{get_ms_timestamp()}_{generate_random_strings(7)}",
|
|
|
|
+ "mainContent": {
|
|
|
|
+ "productId": "",
|
|
|
|
+ "productType": "",
|
|
|
|
+ "secureScore": "100",
|
|
|
|
+ "categoryId": "13",
|
|
|
|
+ "authorId": author_id,
|
|
|
|
+ "articleId": article_id,
|
|
|
|
+ },
|
|
|
|
+ "resourceList": [
|
|
|
|
+ {
|
|
|
|
+ "tplCompKey": "recommendVideoFeed",
|
|
|
|
+ "content": {
|
|
|
|
+ "page": page,
|
|
|
|
+ "requestId": f"{get_ms_timestamp()}_{generate_random_strings(3)}",
|
|
|
|
+ "size": 24,
|
|
|
|
+ "productId": 1558,
|
|
|
|
+ "productType": 13,
|
|
|
|
+ "spm": "smpc.vd-land.end-rec",
|
|
|
|
+ },
|
|
|
|
+ "context": {
|
|
|
|
+ "page_refer_url": "",
|
|
|
|
+ "mkey": "channelId_13--mpid_{}".format(article_id),
|
|
|
|
+ },
|
|
|
|
+ "adInfo": {},
|
|
|
|
+ "spmCCode": "end-rec",
|
|
|
|
+ "resourceId": "000000000000000000",
|
|
|
|
+ }
|
|
|
|
+ ],
|
|
|
|
+ }
|
|
|
|
+ )
|
|
headers = {
|
|
headers = {
|
|
- 'Accept': 'application/json, text/plain, */*',
|
|
|
|
- 'Accept-Language': 'zh,zh-CN;q=0.9',
|
|
|
|
- 'Connection': 'keep-alive',
|
|
|
|
- 'Content-Type': 'application/json',
|
|
|
|
- 'Origin': 'https://www.sohu.com',
|
|
|
|
- 'Referer': 'https://www.sohu.com/a/877216434_121141867?scm=10001.325_13-109000.0.0.5_32',
|
|
|
|
- 'Sec-Fetch-Dest': 'empty',
|
|
|
|
- 'Sec-Fetch-Mode': 'cors',
|
|
|
|
- 'Sec-Fetch-Site': 'same-site',
|
|
|
|
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
|
|
|
- 'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
|
|
|
|
- 'sec-ch-ua-mobile': '?0',
|
|
|
|
- 'sec-ch-ua-platform': '"macOS"',
|
|
|
|
- 'Cookie': 'IPLOC=CN1200; SUV=250319174303GUDT; cto_bidid=DbraLl80TnZBSiUyRjd5Y3R3d3BPQ2gwNkhCQzFYcTR3cERUelpzdkVsOHIwbUx4VVB2Nm9obktXS1JicW00ZkZJbkY4MWtWTXJHajdKeEdIUEpnQ3ZNM2NNWFJRJTNEJTNE; _cc_id=16be057c82f6c7b9487f8e9de87093f8; cto_bundle=G-1cul95czh2RVh4SnRnZXRlUllFb0hyZFhKRkFiSHpWaU5JZDNNM0pKc25UMlUyQk9FcDYwRVNWcmc4VjdadmxDVyUyQmNhN3NydzJ6NVpJZSUyRjdHSnlhNVViSVUySDl0SCUyQk52blJFNk9VJTJCNTJZaVZHYzdUdUkwcHltWmkzR2d6aTI1TzNheFhkWiUyQjlvaGJkUldEQlElMkJOWTUlMkJTQSUzRCUzRA; gidinf=x099980107ee1a664f21e8892000bfb0cb568460d4f7; FCNEC=%5B%5B%22AKsRol-M9pfdhPRV6WdT0_UpWwGGHATDkhGhu3WhCRwchHNYyaiiIzdgxL07iMyYWnsT_EtmqDWtsEXTVncdSYtqnSPa5geKzsupz1uaDinhxC5vtZ5VYpyP2ce9ihomBxnBnoeGfP_Lbib3u5FF1RQacGdUubuKpg%3D%3D%22%5D%5D; clt=1743410197; cld=20250331163637; _ga=GA1.1.954524343.1743578691; reqtype=pc; _ga_DFBWYFE6Q0=GS1.1.1743578690.1.1.1743578734.16.0.0; cityIpLocation=61.48.133.26; beans_dmp=%7B%2210191%22%3A1744104695%2C%22admaster%22%3A1744104695%2C%22shunfei%22%3A1744104695%2C%22reachmax%22%3A1744104695%2C%22lingji%22%3A1744104695%2C%22yoyi%22%3A1744104695%2C%22ipinyou%22%3A1744104695%2C%22ipinyou_admaster%22%3A1744104695%2C%22miaozhen%22%3A1744104695%2C%22diantong%22%3A1744104695%2C%22huayang%22%3A1744104695%2C%22precisionS%22%3A1744104695%2C%22qunyi%22%3A1744104695%7D; _dfp=q4xXm1uuBqdI3QKRaKHbjDocPoUeGdyjTp29AM1Eak4%3D; hideAddDesktop=true; t=1744186073675'
|
|
|
|
|
|
+ "Accept": "application/json, text/plain, */*",
|
|
|
|
+ "Accept-Language": "zh",
|
|
|
|
+ "Connection": "keep-alive",
|
|
|
|
+ "Content-Type": "application/json",
|
|
|
|
+ "Origin": "https://www.sohu.com",
|
|
|
|
+ "Referer": seed_url,
|
|
|
|
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
|
|
}
|
|
}
|
|
-
|
|
|
|
- response = requests.request("POST", url, headers=headers, data=payload)
|
|
|
|
- recommend_list = response.json()['data']['recommendVideoFeed']['list']
|
|
|
|
- for item in recommend_list:
|
|
|
|
- L.append(item)
|
|
|
|
-
|
|
|
|
-L = []
|
|
|
|
-for i in range(1, 20):
|
|
|
|
try:
|
|
try:
|
|
- get_recommendation_video_list(i, 30)
|
|
|
|
- except Exception as e:
|
|
|
|
- print(e)
|
|
|
|
- print("page: ", i)
|
|
|
|
- continue
|
|
|
|
|
|
+ response = requests.post(url, headers=headers, data=payload)
|
|
|
|
+ response.raise_for_status()
|
|
|
|
+ return response.json()
|
|
|
|
+ except requests.exceptions.RequestException as e:
|
|
|
|
+ log(
|
|
|
|
+ task="sohu_recommendation",
|
|
|
|
+ function="get_recommendation_video_list",
|
|
|
|
+ message=f"API请求失败: {e}",
|
|
|
|
+ data={"url": seed_url},
|
|
|
|
+ )
|
|
|
|
+ except json.JSONDecodeError as e:
|
|
|
|
+ log(
|
|
|
|
+ task="sohu_recommendation",
|
|
|
|
+ function="get_recommendation_video_list",
|
|
|
|
+ message=f"响应解析失败: {e}",
|
|
|
|
+ data={"url": seed_url},
|
|
|
|
+ )
|
|
|
|
+ return None
|
|
|
|
|
|
-with open("877216434.json", "w") as f:
|
|
|
|
- f.write(json.dumps(L, ensure_ascii=False, indent=4))
|
|
|
|
- print("done")
|
|
|
|
|
|
|
|
|
|
+# usage example
|
|
|
|
+if __name__ == '__main__':
|
|
|
|
+ res = get_recommendation_video_list(
|
|
|
|
+ seed_url='https://www.sohu.com/a/877214751_121141867',
|
|
|
|
+ author_id='121141867',
|
|
|
|
+ article_id='877214751',
|
|
|
|
+ page=2
|
|
|
|
+ )
|
|
|
|
+ print(json.dumps(res, indent=4, ensure_ascii=False))
|