piaoquan_vlog.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. """
  2. Created on Mon Feb 19 2024
  3. """
  4. import os
  5. import sys
  6. import json
  7. import random
  8. import time
  9. import requests
  10. import datetime
  11. sys.path.append(os.getcwd())
  12. from application.common.proxies import tunnel_proxies
  13. from application.common.log import AliyunLogger
  14. from application.common.feishu import FeishuInsert
  15. class PiaoQuanVlog(object):
  16. """
  17. 票圈 vlog 推荐监测
  18. 会影响推荐的参数
  19. 算法: app_type, mid, timestamp, machine_Info, abtest, extParam, pageSource, IP
  20. """
  21. def __init__(self):
  22. self.url = "https://vlogapi.piaoquantv.com/longvideoapi/video/distribute/category/videoList/v2"
  23. self.headers = {
  24. 'Host': 'vlogapi.piaoquantv.com',
  25. 'xweb_xhr': '1',
  26. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.6(0x13080610) XWEB/1156',
  27. 'content-type': 'application/x-www-form-urlencoded',
  28. 'accept': '*/*',
  29. 'sec-fetch-site': 'cross-site',
  30. 'sec-fetch-mode': 'cors',
  31. 'sec-fetch-dest': 'empty',
  32. 'referer': 'https://servicewechat.com/wx89e7eb06478361d7/735/page-frame.html',
  33. 'accept-language': 'en-US,en;q=0.9'
  34. }
  35. self.aliyun_log = AliyunLogger(platform="piaoquanVlog", mode="recommend")
  36. self.feishu = FeishuInsert(document_token="PN9usqN4ehDIFxtdsuqcH546nwe")
  37. self.sheet_id = "cc9ace"
  38. def send_request(self, page_num):
  39. """
  40. :param page_num: 第几页
  41. """
  42. payload = {
  43. "categoryJson": '{"categoryId":55}',
  44. "pageNo": page_num,
  45. "pageSize": 4,
  46. "sortField": 0,
  47. "pageSource": "vlog-pages/category",
  48. "sharePageVideoId": None,
  49. "shareId": "",
  50. "careModelStatus": 1,
  51. "token": "eacc679e9719307517de3d945479a545add3d48a",
  52. "loginUid": 64168088,
  53. "platform": "mac",
  54. "versionCode": 760,
  55. "machineCode": "weixin_openid_o0w175U16THwQmviRDIdGFmFgc6U",
  56. "appType": 0,
  57. "realAppType": 0,
  58. "system": "Mac OS X 14.2.1",
  59. "pageCategoryId": 55,
  60. "rootPageSource": "",
  61. "shareDepth": "",
  62. "rootPageCategoryId": "",
  63. "appId": "wx89e7eb06478361d7",
  64. "clientTimestamp": int(time.time() * 1000),
  65. "machineInfo": '{"sdkVersion":"3.3.4","brand":"apple","language":"zh_CN","model":"Mac14,2","platform":"mac","system":"Mac OS X 14.2.1","weChatVersion":"3.8.6","screenHeight":736,"screenWidth":414,"pixelRatio":2,"windowHeight":736,"windowWidth":414,"softVersion":"4.1.760"}',
  66. "networkType": "wifi",
  67. "network": "wifi",
  68. "sessionId": "1708320545204-7cfd21f2-2762-a2a8-3331-7fa04e844ab0",
  69. "subSessionId": "1708320545204-7cfd21f2-2762-a2a8-3331-7fa04e844ab0",
  70. "returnId": "",
  71. "jumpHomeVideoId": "",
  72. "senceType": 1089,
  73. "hotSenceType": 1089,
  74. "abExpInfo": '{"ab_test004":[{"abExpCode":"126","configValue":""},{"abExpCode":"211","configValue":""}],"ab_test005":[],"ab_test006":[{"abExpCode":"310","configValue":""},{"abExpCode":"321","configValue":""},{"abExpCode":"331","configValue":""},{"abExpCode":"356","configValue":"{\\"playProgress\\": 10, \\"delayHide\\": 8}"},{"abExpCode":"371","configValue":"{\\"playIcon\\": \\"http://weapppiccdn.yishihui.com/wxicon/common/icon_play_btn_font.png?v=2\\", \\"width\\": 125}"}],"ab_test001":[{"abExpCode":"223","configValue":""},{"abExpCode":"201","configValue":""},{"abExpCode":"410","configValue":"{\\"layerStyle\\": 1, \\"oneDayShowCount\\": 3, \\"everyRecommendVideo\\": 5, \\"playProgress\\": 90, \\"closePosition\\": \\"top\\", \\"guideDialogText\\": [\\"您可以点击‘关注票圈公众号’\\", \\"每日最新资讯不错过\\"], \\"guideButtonText\\": \\"关注票圈公众号\\", \\"topImage\\": \\"https://weapppiccdn.yishihui.com/wxicon/common/img_cgi_image3.png\\", \\"jumpUrl\\": \\"https://mp.weixin.qq.com/s?__biz=MzIxMjg2MzE2Mg==&mid=2247483675&idx=1&sn=0338228015ba7a5b0a1937b14e610efc&chksm=97bed0cea0c959d81d90a5d9ce82502ca24fa418df70d6e619a88d4e007a8b14b2b3b3e62386#rd\\", \\"gzhId\\": 105}"}]}',
  75. "extParams": '{"eventIds":"22040202,ab100,ab100,ab100,ab100,ab100,ab100,ab100,ab100,ab100,ab100","eventInfos":{"ab_test001":"ab100","ab_test002":"ab100","ab_test003":"ab100","ab_test004":"ab100","ab_test005":"ab100","ab_test006":"ab100","ab_test007":"ab100","ab_test008":"ab100","ab_test009":"ab100","ab_test010":"ab100"}}'
  76. }
  77. basic_response = requests.request("POST", self.url, headers=self.headers, data=payload,
  78. ).json()
  79. self.process_video_list(basic_response['data'])
  80. def process_video_list(self, video_list):
  81. """
  82. 处理返回的视频列表
  83. :param video_list: 请求到的 video_list
  84. :return: None
  85. """
  86. for video_obj in video_list:
  87. # print(json.dumps(video_obj, ensure_ascii=False, indent=4))
  88. video_item = {
  89. "id": video_obj['id'],
  90. "status": video_obj['status'],
  91. "uid": video_obj['uid'],
  92. "playCount": video_obj['playCount'],
  93. "title": video_obj['title'],
  94. "titleId": video_obj['titleId'],
  95. "shareCount": video_obj['shareCountFriend'],
  96. "favorCount": video_obj['favoriteds'],
  97. "publish_date": video_obj['gmtCreateDescr']
  98. }
  99. time.sleep(12)
  100. self.aliyun_log.logging(
  101. code="7001",
  102. message="监控到一条视频",
  103. data=video_item
  104. )
  105. line = [video_item["id"], video_item["status"], video_item["uid"], video_item["title"], video_item["titleId"],video_item['playCount'], video_item["shareCount"], video_item["favorCount"], datetime.datetime.now().__str__(), video_item['publish_date']]
  106. self.feishu.insert_value(
  107. sheet_id=self.sheet_id,
  108. values=[line],
  109. ranges="A2:K2"
  110. )
  111. def run(self):
  112. """
  113. 一天抓取 24h, 每个小时的 0-15min 不抓取,每一个小时抓取条数为 110,每天抓取条数为 24 * 110 = 2640 条
  114. 110 / 4 =~ 28, 每一小时大抓取 28 页
  115. :return: None
  116. """
  117. while True:
  118. # 每一小时执行一次
  119. current_time = datetime.datetime.now()
  120. if 0 <= current_time.minute < 15:
  121. # 计算需要等待的秒数,直到15分钟过去
  122. wait_time = (15 - current_time.minute) * 60 - current_time.second
  123. time.sleep(wait_time)
  124. else:
  125. # 平均 96 秒抓一页,即 96秒抓 4 条,每条视频之间等待时间是 24s
  126. # 一共抓取 28 页
  127. for index in range(1, 29):
  128. try:
  129. self.send_request(index)
  130. # 随机休息 1 - 50 秒
  131. time.sleep(random.randint(4 * 10, 4 * 14))
  132. except Exception as e:
  133. self.aliyun_log.logging(
  134. code="3000",
  135. message="扫描第{}页失败, 原因是{}".format(index, e)
  136. )
  137. if __name__ == '__main__':
  138. P = PiaoQuanVlog()
  139. P.run()