|
@@ -26,6 +26,8 @@ from common.scheduling_db import MysqlHelper
|
|
|
|
|
|
class HTZFRecommend:
|
|
|
platform = "海豚祝福"
|
|
|
+ i = 0
|
|
|
+ element_list = []
|
|
|
|
|
|
@classmethod
|
|
|
def today_download_cnt(cls, log_type, crawler, env):
|
|
@@ -144,10 +146,12 @@ class HTZFRecommend:
|
|
|
Common.logger(log_type, crawler).info('点击"推荐"列表成功\n')
|
|
|
|
|
|
# while True:
|
|
|
- for page in range(200):
|
|
|
+ for page in range(500):
|
|
|
Common.logger(log_type, crawler).info(f"正在抓取第{page+1}页")
|
|
|
if cls.search_elements(driver, '//*[@class="list"]') is None:
|
|
|
Common.logger(log_type, crawler).info("列表页窗口已销毁\n")
|
|
|
+ cls.element_list = []
|
|
|
+ cls.i = 0
|
|
|
return
|
|
|
for i in range(1):
|
|
|
cls.swipe_up(driver)
|
|
@@ -158,15 +162,29 @@ class HTZFRecommend:
|
|
|
soup.prettify()
|
|
|
|
|
|
video_list_elements = soup.findAll("wx-view", class_="img_bf")
|
|
|
+ # video_list_elements 有,cls.element_list 中没有的元素
|
|
|
+ video_list_elements = list(set(video_list_elements).difference(set(cls.element_list)))
|
|
|
+ # video_list_elements 与 cls.element_list 的并集
|
|
|
+ cls.element_list = list(set(video_list_elements) | set(cls.element_list))
|
|
|
Common.logger(log_type, crawler).info(f"第{page+1}页共:{len(video_list_elements)}条视频\n")
|
|
|
|
|
|
+ if len(video_list_elements) == 0:
|
|
|
+ for i in range(10):
|
|
|
+ Common.logger(log_type, crawler).info(f"向上滑动第{i + 1}次")
|
|
|
+ cls.swipe_up(driver)
|
|
|
+ time.sleep(0.5)
|
|
|
+ continue
|
|
|
+
|
|
|
for i, video_element in enumerate(video_list_elements):
|
|
|
try:
|
|
|
today_download = cls.today_download_cnt(log_type, crawler, env)
|
|
|
if today_download >= videos_cnt:
|
|
|
Common.logger(log_type, crawler).info(f"今日已下载视频数:{today_download}")
|
|
|
+ cls.element_list = []
|
|
|
+ cls.i = 0
|
|
|
return
|
|
|
- Common.logger(log_type, crawler).info(f"第{i+1}条视频")
|
|
|
+ cls.i += 1
|
|
|
+ Common.logger(log_type, crawler).info(f"第{cls.i}条视频")
|
|
|
video_title = video_element.find("wx-view", class_="title").text
|
|
|
play_str = video_element.find("wx-view", class_="wan").text
|
|
|
play_cnt = int(re.sub(r"\D", "", play_str)) * 10000 if "万" in play_str else play_str
|