|
@@ -297,23 +297,23 @@ class ShipinhaoSearch:
|
|
|
|
|
|
index = 0
|
|
index = 0
|
|
while True:
|
|
while True:
|
|
- try:
|
|
|
|
- if cls.search_elements(driver, '//*[@class="double-rich double-rich_vertical"]') is None:
|
|
|
|
- Common.logger(log_type, crawler).info('窗口已销毁\n')
|
|
|
|
- return
|
|
|
|
-
|
|
|
|
- Common.logger(log_type, crawler).info('获取视频列表\n')
|
|
|
|
- video_elements = cls.search_elements(driver, '//div[@class="vc active__mask"]')
|
|
|
|
- if video_elements is None:
|
|
|
|
- Common.logger(log_type, crawler).warning(f'video_elements:{video_elements}')
|
|
|
|
- return
|
|
|
|
-
|
|
|
|
- video_element_temp = video_elements[index:]
|
|
|
|
- if len(video_element_temp) == 0:
|
|
|
|
- Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
|
|
|
|
- return
|
|
|
|
-
|
|
|
|
- for i, video_element in enumerate(video_element_temp):
|
|
|
|
|
|
+ if cls.search_elements(driver, '//*[@class="double-rich double-rich_vertical"]') is None:
|
|
|
|
+ Common.logger(log_type, crawler).info('窗口已销毁\n')
|
|
|
|
+ return
|
|
|
|
+
|
|
|
|
+ Common.logger(log_type, crawler).info('获取视频列表\n')
|
|
|
|
+ video_elements = cls.search_elements(driver, '//div[@class="vc active__mask"]')
|
|
|
|
+ if video_elements is None:
|
|
|
|
+ Common.logger(log_type, crawler).warning(f'video_elements:{video_elements}')
|
|
|
|
+ return
|
|
|
|
+
|
|
|
|
+ video_element_temp = video_elements[index:]
|
|
|
|
+ if len(video_element_temp) == 0:
|
|
|
|
+ Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
|
|
|
|
+ return
|
|
|
|
+
|
|
|
|
+ for i, video_element in enumerate(video_element_temp):
|
|
|
|
+ try:
|
|
Common.logger(log_type, crawler).info(f"download_cnt:{cls.download_cnt}")
|
|
Common.logger(log_type, crawler).info(f"download_cnt:{cls.download_cnt}")
|
|
if cls.download_cnt >= cls.videos_cnt(log_type, crawler):
|
|
if cls.download_cnt >= cls.videos_cnt(log_type, crawler):
|
|
Common.logger(log_type, crawler).info(f'搜索词:"{word}",已抓取视频数:{cls.download_cnt}')
|
|
Common.logger(log_type, crawler).info(f'搜索词:"{word}",已抓取视频数:{cls.download_cnt}')
|
|
@@ -386,13 +386,12 @@ class ShipinhaoSearch:
|
|
video_dict=video_dict,
|
|
video_dict=video_dict,
|
|
our_uid=our_uid,
|
|
our_uid=our_uid,
|
|
env=env)
|
|
env=env)
|
|
|
|
+ except Exception as e:
|
|
|
|
+ Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
|
|
|
|
|
|
- Common.logger(log_type, crawler).info('已抓取完一组视频,休眠1秒\n')
|
|
|
|
- time.sleep(1)
|
|
|
|
- index = index + len(video_element_temp)
|
|
|
|
- except Exception as e:
|
|
|
|
- Common.logger(log_type, crawler).info(f"get_videoList:{e}\n")
|
|
|
|
- cls.i = 0
|
|
|
|
|
|
+ Common.logger(log_type, crawler).info('已抓取完一组视频,休眠1秒\n')
|
|
|
|
+ time.sleep(1)
|
|
|
|
+ index = index + len(video_element_temp)
|
|
|
|
|
|
@classmethod
|
|
@classmethod
|
|
def download_publish(cls, log_type, crawler, word, video_dict, our_uid, env):
|
|
def download_publish(cls, log_type, crawler, word, video_dict, our_uid, env):
|
|
@@ -660,19 +659,20 @@ class ShipinhaoSearch:
|
|
def get_search_videos(cls, log_type, crawler, env):
|
|
def get_search_videos(cls, log_type, crawler, env):
|
|
user_list = cls.get_users(log_type, crawler, "wNgi6Z", env)
|
|
user_list = cls.get_users(log_type, crawler, "wNgi6Z", env)
|
|
for user in user_list:
|
|
for user in user_list:
|
|
- cls.i = 0
|
|
|
|
- cls.download_cnt = 0
|
|
|
|
- search_word = user["search_word"]
|
|
|
|
- our_uid = user["our_uid"]
|
|
|
|
- Common.logger(log_type, crawler).info(f"开始抓取搜索词:{search_word}")
|
|
|
|
try:
|
|
try:
|
|
|
|
+ cls.i = 0
|
|
|
|
+ cls.download_cnt = 0
|
|
|
|
+ search_word = user["search_word"]
|
|
|
|
+ our_uid = user["our_uid"]
|
|
|
|
+ Common.logger(log_type, crawler).info(f"开始抓取搜索词:{search_word}")
|
|
|
|
+
|
|
cls.start_wechat(log_type=log_type,
|
|
cls.start_wechat(log_type=log_type,
|
|
crawler=crawler,
|
|
crawler=crawler,
|
|
word=search_word,
|
|
word=search_word,
|
|
our_uid=our_uid,
|
|
our_uid=our_uid,
|
|
env=env)
|
|
env=env)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
- Common.logger(log_type, crawler).error(f"search_video:{e}\n")
|
|
|
|
|
|
+ Common.logger(log_type, crawler).error(f"抓取{user['search_word']}时异常:{e}\n")
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if __name__ == '__main__':
|
|
@@ -681,12 +681,12 @@ if __name__ == '__main__':
|
|
# rule_dict='[{"videos_cnt":{"min":10,"max":0}},{"duration":{"min":30,"max":600}},{"share_cnt":{"min":3000,"max":0}},{"favorite_cnt":{"min":1000,"max":0}},{"publish_time":{"min":1672502400000,"max":0}}]',
|
|
# rule_dict='[{"videos_cnt":{"min":10,"max":0}},{"duration":{"min":30,"max":600}},{"share_cnt":{"min":3000,"max":0}},{"favorite_cnt":{"min":1000,"max":0}},{"publish_time":{"min":1672502400000,"max":0}}]',
|
|
# oss_endpoint="out",
|
|
# oss_endpoint="out",
|
|
# env="dev")
|
|
# env="dev")
|
|
- # print(ShipinhaoSearchScheduling.get_users("search", "shipinhao", "wNgi6Z", "dev"))
|
|
|
|
|
|
+ print(ShipinhaoSearch.get_users("search", "shipinhao", "wNgi6Z", "prod"))
|
|
# print((date.today() + timedelta(days=0)).strftime("%Y-%m-%d"))
|
|
# print((date.today() + timedelta(days=0)).strftime("%Y-%m-%d"))
|
|
# print(ShipinhaoSearchScheduling.repeat_out_video_id(log_type="search",
|
|
# print(ShipinhaoSearchScheduling.repeat_out_video_id(log_type="search",
|
|
# crawler="shipinhao",
|
|
# crawler="shipinhao",
|
|
# out_video_id="123",
|
|
# out_video_id="123",
|
|
# env="dev"))
|
|
# env="dev"))
|
|
# ShipinhaoSearch.download_rule(log_type="search", crawler="shipinhao", video_dict={})
|
|
# ShipinhaoSearch.download_rule(log_type="search", crawler="shipinhao", video_dict={})
|
|
- print(ShipinhaoSearch.rule_dict(log_type="search", crawler="shipinhao"))
|
|
|
|
|
|
+ # print(ShipinhaoSearch.rule_dict(log_type="search", crawler="shipinhao"))
|
|
pass
|
|
pass
|