piaoquan_vlog.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. """
  2. Created on Mon Feb 19 2024
  3. """
  4. import os
  5. import sys
  6. import json
  7. import random
  8. import time
  9. import requests
  10. import datetime
  11. sys.path.append(os.getcwd())
  12. from application.common.proxies import tunnel_proxies
  13. from application.common.log import AliyunLogger
  14. class PiaoQuanVlog(object):
  15. """
  16. 票圈 vlog 推荐监测
  17. 会影响推荐的参数
  18. 算法: app_type, mid, timestamp, machine_Info, abtest, extParam, pageSource, IP
  19. """
  20. def __init__(self):
  21. self.url = "https://vlogapi.piaoquantv.com/longvideoapi/video/distribute/category/videoList/v2"
  22. self.headers = {
  23. 'Host': 'vlogapi.piaoquantv.com',
  24. 'xweb_xhr': '1',
  25. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.6(0x13080610) XWEB/1156',
  26. 'content-type': 'application/x-www-form-urlencoded',
  27. 'accept': '*/*',
  28. 'sec-fetch-site': 'cross-site',
  29. 'sec-fetch-mode': 'cors',
  30. 'sec-fetch-dest': 'empty',
  31. 'referer': 'https://servicewechat.com/wx89e7eb06478361d7/735/page-frame.html',
  32. 'accept-language': 'en-US,en;q=0.9'
  33. }
  34. self.aliyun_log = AliyunLogger(platform="piaoquanVlog", mode="recommend")
  35. def send_request(self, page_num):
  36. """
  37. :param page_num: 第几页
  38. """
  39. payload = {
  40. "categoryJson": '{"categoryId":55}',
  41. "pageNo": page_num,
  42. "pageSize": 4,
  43. "sortField": 0,
  44. "pageSource": "vlog-pages/category",
  45. "sharePageVideoId": None,
  46. "shareId": "",
  47. "careModelStatus": 1,
  48. "token": "eacc679e9719307517de3d945479a545add3d48a",
  49. "loginUid": 64168088,
  50. "platform": "mac",
  51. "versionCode": 760,
  52. "machineCode": "weixin_openid_o0w175U16THwQmviRDIdGFmFgc6U",
  53. "appType": 0,
  54. "realAppType": 0,
  55. "system": "Mac OS X 14.2.1",
  56. "pageCategoryId": 55,
  57. "rootPageSource": "",
  58. "shareDepth": "",
  59. "rootPageCategoryId": "",
  60. "appId": "wx89e7eb06478361d7",
  61. "clientTimestamp": int(time.time() * 1000),
  62. "machineInfo": '{"sdkVersion":"3.3.4","brand":"apple","language":"zh_CN","model":"Mac14,2","platform":"mac","system":"Mac OS X 14.2.1","weChatVersion":"3.8.6","screenHeight":736,"screenWidth":414,"pixelRatio":2,"windowHeight":736,"windowWidth":414,"softVersion":"4.1.760"}',
  63. "networkType": "wifi",
  64. "network": "wifi",
  65. "sessionId": "1708320545204-7cfd21f2-2762-a2a8-3331-7fa04e844ab0",
  66. "subSessionId": "1708320545204-7cfd21f2-2762-a2a8-3331-7fa04e844ab0",
  67. "returnId": "",
  68. "jumpHomeVideoId": "",
  69. "senceType": 1089,
  70. "hotSenceType": 1089,
  71. "abExpInfo": '{"ab_test004":[{"abExpCode":"126","configValue":""},{"abExpCode":"211","configValue":""}],"ab_test005":[],"ab_test006":[{"abExpCode":"310","configValue":""},{"abExpCode":"321","configValue":""},{"abExpCode":"331","configValue":""},{"abExpCode":"356","configValue":"{\\"playProgress\\": 10, \\"delayHide\\": 8}"},{"abExpCode":"371","configValue":"{\\"playIcon\\": \\"http://weapppiccdn.yishihui.com/wxicon/common/icon_play_btn_font.png?v=2\\", \\"width\\": 125}"}],"ab_test001":[{"abExpCode":"223","configValue":""},{"abExpCode":"201","configValue":""},{"abExpCode":"410","configValue":"{\\"layerStyle\\": 1, \\"oneDayShowCount\\": 3, \\"everyRecommendVideo\\": 5, \\"playProgress\\": 90, \\"closePosition\\": \\"top\\", \\"guideDialogText\\": [\\"您可以点击‘关注票圈公众号’\\", \\"每日最新资讯不错过\\"], \\"guideButtonText\\": \\"关注票圈公众号\\", \\"topImage\\": \\"https://weapppiccdn.yishihui.com/wxicon/common/img_cgi_image3.png\\", \\"jumpUrl\\": \\"https://mp.weixin.qq.com/s?__biz=MzIxMjg2MzE2Mg==&mid=2247483675&idx=1&sn=0338228015ba7a5b0a1937b14e610efc&chksm=97bed0cea0c959d81d90a5d9ce82502ca24fa418df70d6e619a88d4e007a8b14b2b3b3e62386#rd\\", \\"gzhId\\": 105}"}]}',
  72. "extParams": '{"eventIds":"22040202,ab100,ab100,ab100,ab100,ab100,ab100,ab100,ab100,ab100,ab100","eventInfos":{"ab_test001":"ab100","ab_test002":"ab100","ab_test003":"ab100","ab_test004":"ab100","ab_test005":"ab100","ab_test006":"ab100","ab_test007":"ab100","ab_test008":"ab100","ab_test009":"ab100","ab_test010":"ab100"}}'
  73. }
  74. basic_response = requests.request("POST", self.url, headers=self.headers, data=payload,
  75. proxies=tunnel_proxies()).json()
  76. self.process_video_list(basic_response['data'])
  77. def process_video_list(self, video_list):
  78. """
  79. 处理返回的视频列表
  80. :param video_list: 请求到的 video_list
  81. :return: None
  82. """
  83. for video_obj in video_list:
  84. print(json.dumps(video_obj, ensure_ascii=False, indent=4))
  85. video_item = {
  86. "id": video_obj['id'],
  87. "status": video_obj['status'],
  88. "uid": video_obj['uid'],
  89. "playCount": video_obj['playCount'],
  90. "title": video_obj['title'],
  91. "titleId": video_obj['titleId'],
  92. "shareCount": video_obj['shareCountFriend'],
  93. "favorCount": video_obj['favoriteds'],
  94. "publish_date": video_obj['gmtCreateDescr']
  95. }
  96. time.sleep(12)
  97. # print(json.dumps(video_item, ensure_ascii=False, indent=4))
  98. self.aliyun_log.logging(
  99. code="7001",
  100. message="监控到一条视频",
  101. data=video_item
  102. )
  103. def run(self):
  104. """
  105. 一天抓取 24h, 每个小时的 0-15min 不抓取,每一个小时抓取条数为 110,每天抓取条数为 24 * 110 = 2640 条
  106. 110 / 4 =~ 28, 每一小时大抓取 28 页
  107. :return: None
  108. """
  109. while True:
  110. # 每一小时执行一次
  111. current_time = datetime.datetime.now()
  112. if 0 <= current_time.minute < 15:
  113. # 计算需要等待的秒数,直到15分钟过去
  114. wait_time = (15 - current_time.minute) * 60 - current_time.second
  115. time.sleep(wait_time)
  116. else:
  117. # 平均 96 秒抓一页,即 96秒抓 4 条,每条视频之间等待时间是 24s
  118. # 一共抓取 28 页
  119. for index in range(1, 29):
  120. try:
  121. self.send_request(index)
  122. # 随机休息 1 - 50 秒
  123. time.sleep(random.randint(4 * 10, 4 * 14))
  124. except Exception as e:
  125. self.aliyun_log.logging(
  126. code="3000",
  127. message="扫描第{}页失败, 原因是{}".format(index, e)
  128. )
  129. if __name__ == '__main__':
  130. P = PiaoQuanVlog()
  131. P.run()