|
@@ -48,68 +48,78 @@ class YLGXXSPScheduling:
|
|
|
"Referer": "https://servicewechat.com/wx38382a240eab7214/4/page-frame.html",
|
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
|
}
|
|
|
- data = {
|
|
|
- "channelId": "1033",
|
|
|
- "needHybrid": "1",
|
|
|
- "pageNo": str(page_id),
|
|
|
- "pageSize": "10",
|
|
|
+ channel_id_dict = {
|
|
|
+ '1059': "搞笑",
|
|
|
+ "1058": "音乐",
|
|
|
+ "1061": "娱乐",
|
|
|
+ "1063": "社会",
|
|
|
+ "1066": "生活",
|
|
|
+ "1064": "猎奇"
|
|
|
}
|
|
|
- response = requests.post(
|
|
|
- "https://cpu.baidu.com/1033/a16a67fe", headers=headers, data=data
|
|
|
- )
|
|
|
- result = response.json()
|
|
|
- if "data" not in result or response.status_code != 200:
|
|
|
- AliyunLogger.logging(
|
|
|
- code="2000",
|
|
|
- platform=self.platform,
|
|
|
- mode=self.mode,
|
|
|
- env=self.env,
|
|
|
- data={},
|
|
|
- message="抓取第{}页失败,无数据".format(page_id),
|
|
|
- )
|
|
|
- return
|
|
|
- elif len(result["data"]["result"]) == 0:
|
|
|
- AliyunLogger.logging(
|
|
|
- code="2001",
|
|
|
- platform=self.platform,
|
|
|
- mode=self.mode,
|
|
|
- env=self.env,
|
|
|
- data={},
|
|
|
- message="抓取d到第{}页, 没有更多数据了".format(page_id),
|
|
|
- )
|
|
|
- return
|
|
|
- else:
|
|
|
- data_list = result["data"]["result"]
|
|
|
- for index, video_obj in enumerate(data_list):
|
|
|
- try:
|
|
|
- AliyunLogger.logging(
|
|
|
- code="1001",
|
|
|
- platform=self.platform,
|
|
|
- mode=self.mode,
|
|
|
- env=self.env,
|
|
|
- data={},
|
|
|
- message="成功扫描到一条视频, 该视频位于第{}页{}条".format(page_id, index + 1),
|
|
|
- )
|
|
|
- self.process_video_obj(video_obj)
|
|
|
- except Exception as e:
|
|
|
- AliyunLogger.logging(
|
|
|
- code="3000",
|
|
|
- platform=self.platform,
|
|
|
- mode=self.mode,
|
|
|
- env=self.env,
|
|
|
- data=video_obj,
|
|
|
- message="抓取单条视频异常, 报错原因是: {}, 该视频位于第{}页{}条".format(
|
|
|
- e, page_id, index + 1
|
|
|
- ),
|
|
|
- )
|
|
|
- AliyunLogger.logging(
|
|
|
- code="1000",
|
|
|
- platform=self.platform,
|
|
|
- mode=self.mode,
|
|
|
- env=self.env,
|
|
|
- data={},
|
|
|
- message="完成抓取第{}页".format(page_id),
|
|
|
+ for channel_id in channel_id_dict:
|
|
|
+ data = {
|
|
|
+ "channelId": channel_id,
|
|
|
+ "needHybrid": "1",
|
|
|
+ "pageNo": str(page_id),
|
|
|
+ "pageSize": "10",
|
|
|
+ }
|
|
|
+ response = requests.post(
|
|
|
+ "https://cpu.baidu.com/1033/a16a67fe", headers=headers, data=data
|
|
|
)
|
|
|
+ result = response.json()
|
|
|
+ channel_name = channel_id_dict[channel_id]
|
|
|
+ if "data" not in result or response.status_code != 200:
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2000",
|
|
|
+ platform=self.platform,
|
|
|
+ mode=self.mode,
|
|
|
+ env=self.env,
|
|
|
+ data={},
|
|
|
+ message="{}抓取第{}页失败,无数据".format(channel_name, page_id),
|
|
|
+ )
|
|
|
+ return
|
|
|
+ elif len(result["data"]["result"]) == 0:
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2001",
|
|
|
+ platform=self.platform,
|
|
|
+ mode=self.mode,
|
|
|
+ env=self.env,
|
|
|
+ data={},
|
|
|
+ message="{}抓取d到第{}页, 没有更多数据了".format(channel_name, page_id),
|
|
|
+ )
|
|
|
+ return
|
|
|
+ else:
|
|
|
+ data_list = result["data"]["result"]
|
|
|
+ for index, video_obj in enumerate(data_list):
|
|
|
+ try:
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="1001",
|
|
|
+ platform=self.platform,
|
|
|
+ mode=self.mode,
|
|
|
+ env=self.env,
|
|
|
+ data={},
|
|
|
+ message="{}成功扫描到一条视频, 该视频位于第{}页{}条".format(channel_name, page_id, index + 1),
|
|
|
+ )
|
|
|
+ self.process_video_obj(video_obj)
|
|
|
+ except Exception as e:
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="3000",
|
|
|
+ platform=self.platform,
|
|
|
+ mode=self.mode,
|
|
|
+ env=self.env,
|
|
|
+ data=video_obj,
|
|
|
+ message="{}抓取单条视频异常, 报错原因是: {}, 该视频位于第{}页{}条".format(
|
|
|
+ channel_name, e, page_id, index + 1
|
|
|
+ ),
|
|
|
+ )
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="1000",
|
|
|
+ platform=self.platform,
|
|
|
+ mode=self.mode,
|
|
|
+ env=self.env,
|
|
|
+ data={},
|
|
|
+ message="{}完成抓取第{}页".format(channel_name, page_id),
|
|
|
+ )
|
|
|
|
|
|
def process_video_obj(self, video_obj):
|
|
|
video_id = video_obj.get("data", {}).get("id", 0)
|