123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149 |
- """
- Created on Mon Feb 19 2024
- """
- import os
- import sys
- import json
- import random
- import time
- import requests
- import datetime
- sys.path.append(os.getcwd())
- from application.common.proxies import tunnel_proxies
- from application.common.log import AliyunLogger
- from application.common.feishu import FeishuInsert
- class PiaoQuanVlog(object):
- """
- 票圈 vlog 推荐监测
- 会影响推荐的参数
- 算法: app_type, mid, timestamp, machine_Info, abtest, extParam, pageSource, IP
- """
- def __init__(self):
- self.url = "https://vlogapi.piaoquantv.com/longvideoapi/video/distribute/category/videoList/v2"
- self.headers = {
- 'Host': 'vlogapi.piaoquantv.com',
- 'xweb_xhr': '1',
- 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.6(0x13080610) XWEB/1156',
- 'content-type': 'application/x-www-form-urlencoded',
- 'accept': '*/*',
- 'sec-fetch-site': 'cross-site',
- 'sec-fetch-mode': 'cors',
- 'sec-fetch-dest': 'empty',
- 'referer': 'https://servicewechat.com/wx89e7eb06478361d7/735/page-frame.html',
- 'accept-language': 'en-US,en;q=0.9'
- }
- self.aliyun_log = AliyunLogger(platform="piaoquanVlog", mode="recommend")
- self.feishu = FeishuInsert(document_token="PN9usqN4ehDIFxtdsuqcH546nwe")
- self.sheet_id = "cc9ace"
- def send_request(self, page_num):
- """
- :param page_num: 第几页
- """
- payload = {
- "categoryJson": '{"categoryId":55}',
- "pageNo": page_num,
- "pageSize": 4,
- "sortField": 0,
- "pageSource": "vlog-pages/category",
- "sharePageVideoId": None,
- "shareId": "",
- "careModelStatus": 1,
- "token": "eacc679e9719307517de3d945479a545add3d48a",
- "loginUid": 64168088,
- "platform": "mac",
- "versionCode": 760,
- "machineCode": "weixin_openid_o0w175U16THwQmviRDIdGFmFgc6U",
- "appType": 0,
- "realAppType": 0,
- "system": "Mac OS X 14.2.1",
- "pageCategoryId": 55,
- "rootPageSource": "",
- "shareDepth": "",
- "rootPageCategoryId": "",
- "appId": "wx89e7eb06478361d7",
- "clientTimestamp": int(time.time() * 1000),
- "machineInfo": '{"sdkVersion":"3.3.4","brand":"apple","language":"zh_CN","model":"Mac14,2","platform":"mac","system":"Mac OS X 14.2.1","weChatVersion":"3.8.6","screenHeight":736,"screenWidth":414,"pixelRatio":2,"windowHeight":736,"windowWidth":414,"softVersion":"4.1.760"}',
- "networkType": "wifi",
- "network": "wifi",
- "sessionId": "1708320545204-7cfd21f2-2762-a2a8-3331-7fa04e844ab0",
- "subSessionId": "1708320545204-7cfd21f2-2762-a2a8-3331-7fa04e844ab0",
- "returnId": "",
- "jumpHomeVideoId": "",
- "senceType": 1089,
- "hotSenceType": 1089,
- "abExpInfo": '{"ab_test004":[{"abExpCode":"126","configValue":""},{"abExpCode":"211","configValue":""}],"ab_test005":[],"ab_test006":[{"abExpCode":"310","configValue":""},{"abExpCode":"321","configValue":""},{"abExpCode":"331","configValue":""},{"abExpCode":"356","configValue":"{\\"playProgress\\": 10, \\"delayHide\\": 8}"},{"abExpCode":"371","configValue":"{\\"playIcon\\": \\"http://weapppiccdn.yishihui.com/wxicon/common/icon_play_btn_font.png?v=2\\", \\"width\\": 125}"}],"ab_test001":[{"abExpCode":"223","configValue":""},{"abExpCode":"201","configValue":""},{"abExpCode":"410","configValue":"{\\"layerStyle\\": 1, \\"oneDayShowCount\\": 3, \\"everyRecommendVideo\\": 5, \\"playProgress\\": 90, \\"closePosition\\": \\"top\\", \\"guideDialogText\\": [\\"您可以点击‘关注票圈公众号’\\", \\"每日最新资讯不错过\\"], \\"guideButtonText\\": \\"关注票圈公众号\\", \\"topImage\\": \\"https://weapppiccdn.yishihui.com/wxicon/common/img_cgi_image3.png\\", \\"jumpUrl\\": \\"https://mp.weixin.qq.com/s?__biz=MzIxMjg2MzE2Mg==&mid=2247483675&idx=1&sn=0338228015ba7a5b0a1937b14e610efc&chksm=97bed0cea0c959d81d90a5d9ce82502ca24fa418df70d6e619a88d4e007a8b14b2b3b3e62386#rd\\", \\"gzhId\\": 105}"}]}',
- "extParams": '{"eventIds":"22040202,ab100,ab100,ab100,ab100,ab100,ab100,ab100,ab100,ab100,ab100","eventInfos":{"ab_test001":"ab100","ab_test002":"ab100","ab_test003":"ab100","ab_test004":"ab100","ab_test005":"ab100","ab_test006":"ab100","ab_test007":"ab100","ab_test008":"ab100","ab_test009":"ab100","ab_test010":"ab100"}}'
- }
- basic_response = requests.request("POST", self.url, headers=self.headers, data=payload,
- ).json()
- self.process_video_list(basic_response['data'])
- def process_video_list(self, video_list):
- """
- 处理返回的视频列表
- :param video_list: 请求到的 video_list
- :return: None
- """
- for video_obj in video_list:
- # print(json.dumps(video_obj, ensure_ascii=False, indent=4))
- video_item = {
- "id": video_obj['id'],
- "status": video_obj['status'],
- "uid": video_obj['uid'],
- "playCount": video_obj['playCount'],
- "title": video_obj['title'],
- "titleId": video_obj['titleId'],
- "shareCount": video_obj['shareCountFriend'],
- "favorCount": video_obj['favoriteds'],
- "publish_date": video_obj['gmtCreateDescr']
- }
- time.sleep(12)
- self.aliyun_log.logging(
- code="7001",
- message="监控到一条视频",
- data=video_item
- )
- line = [video_item["id"], video_item["status"], video_item["uid"], video_item["title"], video_item["titleId"],video_item['playCount'], video_item["shareCount"], video_item["favorCount"], datetime.datetime.now().__str__(), video_item['publish_date']]
- self.feishu.insert_value(
- sheet_id=self.sheet_id,
- values=[line],
- ranges="A2:K2"
- )
- def run(self):
- """
- 一天抓取 24h, 每个小时的 0-15min 不抓取,每一个小时抓取条数为 110,每天抓取条数为 24 * 110 = 2640 条
- 110 / 4 =~ 28, 每一小时大抓取 28 页
- :return: None
- """
- while True:
- # 每一小时执行一次
- current_time = datetime.datetime.now()
- if 0 <= current_time.minute < 15:
- # 计算需要等待的秒数,直到15分钟过去
- wait_time = (15 - current_time.minute) * 60 - current_time.second
- time.sleep(wait_time)
- else:
- # 平均 96 秒抓一页,即 96秒抓 4 条,每条视频之间等待时间是 24s
- # 一共抓取 28 页
- for index in range(1, 29):
- try:
- self.send_request(index)
- # 随机休息 1 - 50 秒
- time.sleep(random.randint(4 * 10, 4 * 14))
- except Exception as e:
- self.aliyun_log.logging(
- code="3000",
- message="扫描第{}页失败, 原因是{}".format(index, e)
- )
- if __name__ == '__main__':
- P = PiaoQuanVlog()
- P.run()
|