""" Created on Mon Feb 19 2024 """ import os import sys import json import random import time import requests import datetime sys.path.append(os.getcwd()) from application.common.proxies import tunnel_proxies from application.common.log import AliyunLogger from application.common.feishu import FeishuInsert class PiaoQuanVlog(object): """ 票圈 vlog 推荐监测 会影响推荐的参数 算法: app_type, mid, timestamp, machine_Info, abtest, extParam, pageSource, IP """ def __init__(self): self.url = "https://vlogapi.piaoquantv.com/longvideoapi/video/distribute/category/videoList/v2" self.headers = { 'Host': 'vlogapi.piaoquantv.com', 'xweb_xhr': '1', 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.6(0x13080610) XWEB/1156', 'content-type': 'application/x-www-form-urlencoded', 'accept': '*/*', 'sec-fetch-site': 'cross-site', 'sec-fetch-mode': 'cors', 'sec-fetch-dest': 'empty', 'referer': 'https://servicewechat.com/wx89e7eb06478361d7/735/page-frame.html', 'accept-language': 'en-US,en;q=0.9' } self.aliyun_log = AliyunLogger(platform="piaoquanVlog", mode="recommend") self.feishu = FeishuInsert(document_token="PN9usqN4ehDIFxtdsuqcH546nwe") self.sheet_id = "cc9ace" def send_request(self, page_num): """ :param page_num: 第几页 """ payload = { "categoryJson": '{"categoryId":55}', "pageNo": page_num, "pageSize": 4, "sortField": 0, "pageSource": "vlog-pages/category", "sharePageVideoId": None, "shareId": "", "careModelStatus": 1, "token": "eacc679e9719307517de3d945479a545add3d48a", "loginUid": 64168088, "platform": "mac", "versionCode": 760, "machineCode": "weixin_openid_o0w175U16THwQmviRDIdGFmFgc6U", "appType": 0, "realAppType": 0, "system": "Mac OS X 14.2.1", "pageCategoryId": 55, "rootPageSource": "", "shareDepth": "", "rootPageCategoryId": "", "appId": "wx89e7eb06478361d7", "clientTimestamp": int(time.time() * 1000), "machineInfo": '{"sdkVersion":"3.3.4","brand":"apple","language":"zh_CN","model":"Mac14,2","platform":"mac","system":"Mac OS X 14.2.1","weChatVersion":"3.8.6","screenHeight":736,"screenWidth":414,"pixelRatio":2,"windowHeight":736,"windowWidth":414,"softVersion":"4.1.760"}', "networkType": "wifi", "network": "wifi", "sessionId": "1708320545204-7cfd21f2-2762-a2a8-3331-7fa04e844ab0", "subSessionId": "1708320545204-7cfd21f2-2762-a2a8-3331-7fa04e844ab0", "returnId": "", "jumpHomeVideoId": "", "senceType": 1089, "hotSenceType": 1089, "abExpInfo": '{"ab_test004":[{"abExpCode":"126","configValue":""},{"abExpCode":"211","configValue":""}],"ab_test005":[],"ab_test006":[{"abExpCode":"310","configValue":""},{"abExpCode":"321","configValue":""},{"abExpCode":"331","configValue":""},{"abExpCode":"356","configValue":"{\\"playProgress\\": 10, \\"delayHide\\": 8}"},{"abExpCode":"371","configValue":"{\\"playIcon\\": \\"http://weapppiccdn.yishihui.com/wxicon/common/icon_play_btn_font.png?v=2\\", \\"width\\": 125}"}],"ab_test001":[{"abExpCode":"223","configValue":""},{"abExpCode":"201","configValue":""},{"abExpCode":"410","configValue":"{\\"layerStyle\\": 1, \\"oneDayShowCount\\": 3, \\"everyRecommendVideo\\": 5, \\"playProgress\\": 90, \\"closePosition\\": \\"top\\", \\"guideDialogText\\": [\\"您可以点击‘关注票圈公众号’\\", \\"每日最新资讯不错过\\"], \\"guideButtonText\\": \\"关注票圈公众号\\", \\"topImage\\": \\"https://weapppiccdn.yishihui.com/wxicon/common/img_cgi_image3.png\\", \\"jumpUrl\\": \\"https://mp.weixin.qq.com/s?__biz=MzIxMjg2MzE2Mg==&mid=2247483675&idx=1&sn=0338228015ba7a5b0a1937b14e610efc&chksm=97bed0cea0c959d81d90a5d9ce82502ca24fa418df70d6e619a88d4e007a8b14b2b3b3e62386#rd\\", \\"gzhId\\": 105}"}]}', "extParams": '{"eventIds":"22040202,ab100,ab100,ab100,ab100,ab100,ab100,ab100,ab100,ab100,ab100","eventInfos":{"ab_test001":"ab100","ab_test002":"ab100","ab_test003":"ab100","ab_test004":"ab100","ab_test005":"ab100","ab_test006":"ab100","ab_test007":"ab100","ab_test008":"ab100","ab_test009":"ab100","ab_test010":"ab100"}}' } basic_response = requests.request("POST", self.url, headers=self.headers, data=payload, ).json() self.process_video_list(basic_response['data']) def process_video_list(self, video_list): """ 处理返回的视频列表 :param video_list: 请求到的 video_list :return: None """ for video_obj in video_list: # print(json.dumps(video_obj, ensure_ascii=False, indent=4)) video_item = { "id": video_obj['id'], "status": video_obj['status'], "uid": video_obj['uid'], "playCount": video_obj['playCount'], "title": video_obj['title'], "titleId": video_obj['titleId'], "shareCount": video_obj['shareCountFriend'], "favorCount": video_obj['favoriteds'], "publish_date": video_obj['gmtCreateDescr'] } time.sleep(12) self.aliyun_log.logging( code="7001", message="监控到一条视频", data=video_item ) line = [video_item["id"], video_item["status"], video_item["uid"], video_item["title"], video_item["titleId"],video_item['playCount'], video_item["shareCount"], video_item["favorCount"], datetime.datetime.now().__str__(), video_item['publish_date']] self.feishu.insert_value( sheet_id=self.sheet_id, values=[line], ranges="A2:K2" ) def run(self): """ 一天抓取 24h, 每个小时的 0-15min 不抓取,每一个小时抓取条数为 110,每天抓取条数为 24 * 110 = 2640 条 110 / 4 =~ 28, 每一小时大抓取 28 页 :return: None """ while True: # 每一小时执行一次 current_time = datetime.datetime.now() if 0 <= current_time.minute < 15: # 计算需要等待的秒数,直到15分钟过去 wait_time = (15 - current_time.minute) * 60 - current_time.second time.sleep(wait_time) else: # 平均 96 秒抓一页,即 96秒抓 4 条,每条视频之间等待时间是 24s # 一共抓取 28 页 for index in range(1, 29): try: self.send_request(index) # 随机休息 1 - 50 秒 time.sleep(random.randint(4 * 10, 4 * 14)) except Exception as e: self.aliyun_log.logging( code="3000", message="扫描第{}页失败, 原因是{}".format(index, e) ) if __name__ == '__main__': P = PiaoQuanVlog() P.run()