|
@@ -7,6 +7,7 @@ import json
|
|
import random
|
|
import random
|
|
import time
|
|
import time
|
|
import requests
|
|
import requests
|
|
|
|
+import datetime
|
|
|
|
|
|
sys.path.append(os.getcwd())
|
|
sys.path.append(os.getcwd())
|
|
|
|
|
|
@@ -76,7 +77,8 @@ class PiaoQuanVlog(object):
|
|
"abExpInfo": '{"ab_test004":[{"abExpCode":"126","configValue":""},{"abExpCode":"211","configValue":""}],"ab_test005":[],"ab_test006":[{"abExpCode":"310","configValue":""},{"abExpCode":"321","configValue":""},{"abExpCode":"331","configValue":""},{"abExpCode":"356","configValue":"{\\"playProgress\\": 10, \\"delayHide\\": 8}"},{"abExpCode":"371","configValue":"{\\"playIcon\\": \\"http://weapppiccdn.yishihui.com/wxicon/common/icon_play_btn_font.png?v=2\\", \\"width\\": 125}"}],"ab_test001":[{"abExpCode":"223","configValue":""},{"abExpCode":"201","configValue":""},{"abExpCode":"410","configValue":"{\\"layerStyle\\": 1, \\"oneDayShowCount\\": 3, \\"everyRecommendVideo\\": 5, \\"playProgress\\": 90, \\"closePosition\\": \\"top\\", \\"guideDialogText\\": [\\"您可以点击‘关注票圈公众号’\\", \\"每日最新资讯不错过\\"], \\"guideButtonText\\": \\"关注票圈公众号\\", \\"topImage\\": \\"https://weapppiccdn.yishihui.com/wxicon/common/img_cgi_image3.png\\", \\"jumpUrl\\": \\"https://mp.weixin.qq.com/s?__biz=MzIxMjg2MzE2Mg==&mid=2247483675&idx=1&sn=0338228015ba7a5b0a1937b14e610efc&chksm=97bed0cea0c959d81d90a5d9ce82502ca24fa418df70d6e619a88d4e007a8b14b2b3b3e62386#rd\\", \\"gzhId\\": 105}"}]}',
|
|
"abExpInfo": '{"ab_test004":[{"abExpCode":"126","configValue":""},{"abExpCode":"211","configValue":""}],"ab_test005":[],"ab_test006":[{"abExpCode":"310","configValue":""},{"abExpCode":"321","configValue":""},{"abExpCode":"331","configValue":""},{"abExpCode":"356","configValue":"{\\"playProgress\\": 10, \\"delayHide\\": 8}"},{"abExpCode":"371","configValue":"{\\"playIcon\\": \\"http://weapppiccdn.yishihui.com/wxicon/common/icon_play_btn_font.png?v=2\\", \\"width\\": 125}"}],"ab_test001":[{"abExpCode":"223","configValue":""},{"abExpCode":"201","configValue":""},{"abExpCode":"410","configValue":"{\\"layerStyle\\": 1, \\"oneDayShowCount\\": 3, \\"everyRecommendVideo\\": 5, \\"playProgress\\": 90, \\"closePosition\\": \\"top\\", \\"guideDialogText\\": [\\"您可以点击‘关注票圈公众号’\\", \\"每日最新资讯不错过\\"], \\"guideButtonText\\": \\"关注票圈公众号\\", \\"topImage\\": \\"https://weapppiccdn.yishihui.com/wxicon/common/img_cgi_image3.png\\", \\"jumpUrl\\": \\"https://mp.weixin.qq.com/s?__biz=MzIxMjg2MzE2Mg==&mid=2247483675&idx=1&sn=0338228015ba7a5b0a1937b14e610efc&chksm=97bed0cea0c959d81d90a5d9ce82502ca24fa418df70d6e619a88d4e007a8b14b2b3b3e62386#rd\\", \\"gzhId\\": 105}"}]}',
|
|
"extParams": '{"eventIds":"22040202,ab100,ab100,ab100,ab100,ab100,ab100,ab100,ab100,ab100,ab100","eventInfos":{"ab_test001":"ab100","ab_test002":"ab100","ab_test003":"ab100","ab_test004":"ab100","ab_test005":"ab100","ab_test006":"ab100","ab_test007":"ab100","ab_test008":"ab100","ab_test009":"ab100","ab_test010":"ab100"}}'
|
|
"extParams": '{"eventIds":"22040202,ab100,ab100,ab100,ab100,ab100,ab100,ab100,ab100,ab100,ab100","eventInfos":{"ab_test001":"ab100","ab_test002":"ab100","ab_test003":"ab100","ab_test004":"ab100","ab_test005":"ab100","ab_test006":"ab100","ab_test007":"ab100","ab_test008":"ab100","ab_test009":"ab100","ab_test010":"ab100"}}'
|
|
}
|
|
}
|
|
- basic_response = requests.request("POST", self.url, headers=self.headers, data=payload, proxies=tunnel_proxies()).json()
|
|
|
|
|
|
+ basic_response = requests.request("POST", self.url, headers=self.headers, data=payload,
|
|
|
|
+ proxies=tunnel_proxies()).json()
|
|
self.process_video_list(basic_response['data'])
|
|
self.process_video_list(basic_response['data'])
|
|
|
|
|
|
def process_video_list(self, video_list):
|
|
def process_video_list(self, video_list):
|
|
@@ -96,6 +98,7 @@ class PiaoQuanVlog(object):
|
|
"shareCount": video_obj['shareCountFriend'],
|
|
"shareCount": video_obj['shareCountFriend'],
|
|
"favorCount": video_obj['favoriteds']
|
|
"favorCount": video_obj['favoriteds']
|
|
}
|
|
}
|
|
|
|
+ time.sleep(12)
|
|
# print(json.dumps(video_item, ensure_ascii=False, indent=4))
|
|
# print(json.dumps(video_item, ensure_ascii=False, indent=4))
|
|
self.aliyun_log.logging(
|
|
self.aliyun_log.logging(
|
|
code="7001",
|
|
code="7001",
|
|
@@ -105,22 +108,30 @@ class PiaoQuanVlog(object):
|
|
|
|
|
|
def run(self):
|
|
def run(self):
|
|
"""
|
|
"""
|
|
- 执行函数
|
|
|
|
|
|
+ 一天抓取 24h, 每个小时的 0-15min 不抓取,每一个小时抓取条数为 110,每天抓取条数为 24 * 110 = 2640 条
|
|
|
|
+ 110 / 4 =~ 28, 每一小时大抓取 28 页
|
|
:return: None
|
|
:return: None
|
|
"""
|
|
"""
|
|
while True:
|
|
while True:
|
|
- for index in range(1, 51):
|
|
|
|
- try:
|
|
|
|
- self.send_request(index)
|
|
|
|
- # 随机休息 1 - 50 秒
|
|
|
|
- time.sleep(random.randint(1, 50))
|
|
|
|
- except Exception as e:
|
|
|
|
- self.aliyun_log.logging(
|
|
|
|
- code="3000",
|
|
|
|
- message="扫描第{}页失败, 原因是{}".format(index, e)
|
|
|
|
- )
|
|
|
|
- # 抓完 50 页后休息 10 分钟
|
|
|
|
- time.sleep(60 * 10)
|
|
|
|
|
|
+ # 每一小时执行一次
|
|
|
|
+ current_time = datetime.datetime.now()
|
|
|
|
+ if 0 <= current_time.minute < 15:
|
|
|
|
+ # 计算需要等待的秒数,直到15分钟过去
|
|
|
|
+ wait_time = (15 - current_time.minute) * 60 - current_time.second
|
|
|
|
+ time.sleep(wait_time)
|
|
|
|
+ else:
|
|
|
|
+ # 平均 96 秒抓一页,即 96秒抓 4 条,每条视频之间等待时间是 24s
|
|
|
|
+ # 一共抓取 28 页
|
|
|
|
+ for index in range(1, 29):
|
|
|
|
+ try:
|
|
|
|
+ self.send_request(index)
|
|
|
|
+ # 随机休息 1 - 50 秒
|
|
|
|
+ time.sleep(random.randint(4 * 10, 4 * 14))
|
|
|
|
+ except Exception as e:
|
|
|
|
+ self.aliyun_log.logging(
|
|
|
|
+ code="3000",
|
|
|
|
+ message="扫描第{}页失败, 原因是{}".format(index, e)
|
|
|
|
+ )
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if __name__ == '__main__':
|