|
@@ -17,6 +17,19 @@ class Recommend:
|
|
|
# 当日已下载数量
|
|
|
download_cnt = []
|
|
|
|
|
|
+ # 下载/上传指定数量的视频
|
|
|
+ @classmethod
|
|
|
+ def run_recommend(cls, log_type, env):
|
|
|
+ try:
|
|
|
+ if len(cls.download_cnt) >= 100:
|
|
|
+ Common.logger('recommend').info('已下载{}条视频\n', len(cls.download_cnt))
|
|
|
+ cls.download_cnt = []
|
|
|
+ return
|
|
|
+ else:
|
|
|
+ cls.start_wechat(log_type, env)
|
|
|
+ except Exception as e:
|
|
|
+ Common.logger(log_type).error('run_recommend异常:{}\n', e)
|
|
|
+
|
|
|
# 启动微信,并打开视频号
|
|
|
@classmethod
|
|
|
def start_wechat(cls, log_type, env):
|
|
@@ -87,149 +100,143 @@ class Recommend:
|
|
|
@classmethod
|
|
|
def get_feeds(cls, log_type, driver: WebDriver, env):
|
|
|
try:
|
|
|
- while True:
|
|
|
- for i in range(5):
|
|
|
- if len(cls.download_cnt) >= 100:
|
|
|
- Common.logger('recommend').info('已下载{}条视频\n', len(cls.download_cnt))
|
|
|
- cls.download_cnt = []
|
|
|
- return
|
|
|
- else:
|
|
|
- driver.implicitly_wait(10)
|
|
|
-
|
|
|
- # 视频标题
|
|
|
- try:
|
|
|
- title_id = driver.find_element(By.ID, 'com.tencent.mm:id/ki5')
|
|
|
- video_title = title_id.get_attribute('name').split('\n')[0].replace('#', '').strip()
|
|
|
- except NoSuchElementException:
|
|
|
- video_title = ''
|
|
|
- driver.swipe(10, 1600, 10, 300, 200)
|
|
|
+ for i in range(5):
|
|
|
+ driver.implicitly_wait(10)
|
|
|
+
|
|
|
+ # 视频标题
|
|
|
+ try:
|
|
|
+ title_id = driver.find_element(By.ID, 'com.tencent.mm:id/ki5')
|
|
|
+ video_title = title_id.get_attribute('name').split('\n')[0].replace('#', '').strip()
|
|
|
+ except NoSuchElementException:
|
|
|
+ video_title = ''
|
|
|
+ driver.swipe(10, 1600, 10, 300, 200)
|
|
|
+
|
|
|
+ # 点击播放器,获取视频时长
|
|
|
+ # Common.logger(log_type).info('暂停播放')
|
|
|
+ pause_btn = driver.find_element(By.ID, 'com.tencent.mm:id/eh4')
|
|
|
+ pause_btn.click()
|
|
|
+ start_time = driver.find_element(By.ID, 'com.tencent.mm:id/l59').get_attribute('name')
|
|
|
+ start_time = int(start_time.split(':')[0])*60 + int(start_time.split(':')[-1])
|
|
|
+ try:
|
|
|
+ end_time = driver.find_element(By.ID, 'com.tencent.mm:id/l7i').get_attribute('name')
|
|
|
+ except NoSuchElementException:
|
|
|
+ end_time = driver.find_element(By.ID, 'com.tencent.mm:id/g73').get_attribute('name')
|
|
|
+ end_time = int(end_time.split(':')[0]) * 60 + int(end_time.split(':')[-1])
|
|
|
+ duration = start_time + end_time
|
|
|
+
|
|
|
+ # 点赞
|
|
|
+ like_id = driver.find_element(By.ID, 'com.tencent.mm:id/k04')
|
|
|
+ like_cnt = like_id.get_attribute('name')
|
|
|
+ if like_cnt == "" or like_cnt == "喜欢":
|
|
|
+ like_cnt = 0
|
|
|
+ elif '万' in like_cnt:
|
|
|
+ like_cnt = float(like_cnt.split('万')[0]) * 10000
|
|
|
+ elif '万+' in like_cnt:
|
|
|
+ like_cnt = float(like_cnt.split('万+')[0]) * 10000
|
|
|
+ else:
|
|
|
+ like_cnt = float(like_cnt)
|
|
|
+
|
|
|
+ # 分享
|
|
|
+ share_id = driver.find_element(By.ID, 'com.tencent.mm:id/jhv')
|
|
|
+ share_cnt = share_id.get_attribute('name')
|
|
|
+ if share_cnt == "" or share_cnt == "转发":
|
|
|
+ share_cnt = 0
|
|
|
+ elif '万' in share_cnt:
|
|
|
+ share_cnt = float(share_cnt.split('万')[0]) * 10000
|
|
|
+ elif '万+' in share_cnt:
|
|
|
+ share_cnt = float(share_cnt.split('万+')[0]) * 10000
|
|
|
+ else:
|
|
|
+ share_cnt = float(share_cnt)
|
|
|
+
|
|
|
+ # 收藏
|
|
|
+ favorite_id = driver.find_element(By.ID, 'com.tencent.mm:id/fnp')
|
|
|
+ favorite_cnt = favorite_id.get_attribute('name')
|
|
|
+ if favorite_cnt == "" or favorite_cnt == "收藏":
|
|
|
+ favorite_cnt = 0
|
|
|
+ elif '万' in favorite_cnt:
|
|
|
+ favorite_cnt = float(favorite_cnt.split('万')[0]) * 10000
|
|
|
+ elif '万+' in favorite_cnt:
|
|
|
+ favorite_cnt = float(favorite_cnt.split('万+')[0]) * 10000
|
|
|
+ else:
|
|
|
+ favorite_cnt = float(favorite_cnt)
|
|
|
+
|
|
|
+ # 评论
|
|
|
+ comment_id = driver.find_element(By.ID, 'com.tencent.mm:id/bje')
|
|
|
+ comment_cnt = comment_id.get_attribute('name')
|
|
|
+ if comment_cnt == "" or comment_cnt == "评论":
|
|
|
+ comment_cnt = 0
|
|
|
+ elif '万' in comment_cnt:
|
|
|
+ comment_cnt = float(comment_cnt.split('万')[0]) * 10000
|
|
|
+ elif '万+' in comment_cnt:
|
|
|
+ comment_cnt = float(comment_cnt.split('万+')[0]) * 10000
|
|
|
+ else:
|
|
|
+ comment_cnt = float(comment_cnt)
|
|
|
+
|
|
|
+ # 用户名
|
|
|
+ username_id = driver.find_element(By.ID, 'com.tencent.mm:id/hft')
|
|
|
+ user_name = username_id.get_attribute('name')
|
|
|
+
|
|
|
+ Common.logger(log_type).info('video_title:{}', video_title)
|
|
|
+ Common.logger(log_type).info('duration:{}', duration)
|
|
|
+ Common.logger(log_type).info('like_cnt:{}', like_cnt)
|
|
|
+ Common.logger(log_type).info('share_cnt:{}', share_cnt)
|
|
|
+ Common.logger(log_type).info('favorite_cnt:{}', favorite_cnt)
|
|
|
+ Common.logger(log_type).info('comment_cnt:{}', comment_cnt)
|
|
|
+ Common.logger(log_type).info('user_name:{}', user_name)
|
|
|
+
|
|
|
+ # 判断无效视频
|
|
|
+ if video_title == '' or user_name == '':
|
|
|
+ Common.logger(log_type).info('无效视频,滑动到下一个视频\n')
|
|
|
+ driver.swipe(10, 1600, 10, 300, 200)
|
|
|
+
|
|
|
+ # 判断下载规则
|
|
|
+ elif cls.download_rule(duration, like_cnt, share_cnt, favorite_cnt, comment_cnt) is False:
|
|
|
+ Common.logger(log_type).info('不满足抓取规则,滑动到下一个视频\n')
|
|
|
+ driver.swipe(10, 1600, 10, 300, 200)
|
|
|
+
|
|
|
+ # 已下载表去重
|
|
|
+ elif str(video_title) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'c77cf9') for x in y]:
|
|
|
+ Common.logger(log_type).info('视频已下载,滑动到下一个视频\n')
|
|
|
+ driver.swipe(10, 1600, 10, 300, 200)
|
|
|
+
|
|
|
+ # feeds 表去重
|
|
|
+ elif str(video_title) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy') for x in y]:
|
|
|
+ Common.logger(log_type).info('视频已存在,滑动到下一个视频\n')
|
|
|
+ driver.swipe(10, 1600, 10, 300, 200)
|
|
|
+
|
|
|
+ # 分享给 windows 爬虫机
|
|
|
+ else:
|
|
|
+ share_id.click()
|
|
|
+ driver.find_element(By.XPATH, '//*[@text="转发给朋友"]').click()
|
|
|
+ driver.find_element(By.XPATH, '//*[@text="爬虫群"]').click()
|
|
|
+ driver.find_element(By.ID, 'com.tencent.mm:id/guw').click()
|
|
|
+
|
|
|
+ # 把视频信息写入飞书feeds文档
|
|
|
+ Feishu.insert_columns(log_type, 'shipinhao', 'FSDlBy', 'ROWS', 1, 2)
|
|
|
+ get_feeds_time = int(time.time())
|
|
|
+ values = [[time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(get_feeds_time)),
|
|
|
+ '推荐榜',
|
|
|
+ str(video_title),
|
|
|
+ duration,
|
|
|
+ like_cnt,
|
|
|
+ share_cnt,
|
|
|
+ favorite_cnt,
|
|
|
+ comment_cnt,
|
|
|
+ str(user_name)]]
|
|
|
+ time.sleep(1)
|
|
|
+ Feishu.update_values(log_type, 'shipinhao', 'FSDlBy', 'A2:Z2', values)
|
|
|
+ Common.logger(log_type).info('视频信息写入飞书文档成功\n')
|
|
|
|
|
|
- # 点击播放器,获取视频时长
|
|
|
- # Common.logger(log_type).info('暂停播放')
|
|
|
- pause_btn = driver.find_element(By.ID, 'com.tencent.mm:id/eh4')
|
|
|
- pause_btn.click()
|
|
|
- start_time = driver.find_element(By.ID, 'com.tencent.mm:id/l59').get_attribute('name')
|
|
|
- start_time = int(start_time.split(':')[0])*60 + int(start_time.split(':')[-1])
|
|
|
- try:
|
|
|
- end_time = driver.find_element(By.ID, 'com.tencent.mm:id/l7i').get_attribute('name')
|
|
|
- except NoSuchElementException:
|
|
|
- end_time = driver.find_element(By.ID, 'com.tencent.mm:id/g73').get_attribute('name')
|
|
|
- end_time = int(end_time.split(':')[0]) * 60 + int(end_time.split(':')[-1])
|
|
|
- duration = start_time + end_time
|
|
|
-
|
|
|
- # 点赞
|
|
|
- like_id = driver.find_element(By.ID, 'com.tencent.mm:id/k04')
|
|
|
- like_cnt = like_id.get_attribute('name')
|
|
|
- if like_cnt == "" or like_cnt == "喜欢":
|
|
|
- like_cnt = 0
|
|
|
- elif '万' in like_cnt:
|
|
|
- like_cnt = float(like_cnt.split('万')[0]) * 10000
|
|
|
- elif '万+' in like_cnt:
|
|
|
- like_cnt = float(like_cnt.split('万+')[0]) * 10000
|
|
|
- else:
|
|
|
- like_cnt = float(like_cnt)
|
|
|
-
|
|
|
- # 分享
|
|
|
- share_id = driver.find_element(By.ID, 'com.tencent.mm:id/jhv')
|
|
|
- share_cnt = share_id.get_attribute('name')
|
|
|
- if share_cnt == "" or share_cnt == "转发":
|
|
|
- share_cnt = 0
|
|
|
- elif '万' in share_cnt:
|
|
|
- share_cnt = float(share_cnt.split('万')[0]) * 10000
|
|
|
- elif '万+' in share_cnt:
|
|
|
- share_cnt = float(share_cnt.split('万+')[0]) * 10000
|
|
|
- else:
|
|
|
- share_cnt = float(share_cnt)
|
|
|
-
|
|
|
- # 收藏
|
|
|
- favorite_id = driver.find_element(By.ID, 'com.tencent.mm:id/fnp')
|
|
|
- favorite_cnt = favorite_id.get_attribute('name')
|
|
|
- if favorite_cnt == "" or favorite_cnt == "收藏":
|
|
|
- favorite_cnt = 0
|
|
|
- elif '万' in favorite_cnt:
|
|
|
- favorite_cnt = float(favorite_cnt.split('万')[0]) * 10000
|
|
|
- elif '万+' in favorite_cnt:
|
|
|
- favorite_cnt = float(favorite_cnt.split('万+')[0]) * 10000
|
|
|
+ while True:
|
|
|
+ if Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')[1][11] is None:
|
|
|
+ Common.logger(log_type).info('等待更新 URL 信息')
|
|
|
+ time.sleep(10)
|
|
|
else:
|
|
|
- favorite_cnt = float(favorite_cnt)
|
|
|
-
|
|
|
- # 评论
|
|
|
- comment_id = driver.find_element(By.ID, 'com.tencent.mm:id/bje')
|
|
|
- comment_cnt = comment_id.get_attribute('name')
|
|
|
- if comment_cnt == "" or comment_cnt == "评论":
|
|
|
- comment_cnt = 0
|
|
|
- elif '万' in comment_cnt:
|
|
|
- comment_cnt = float(comment_cnt.split('万')[0]) * 10000
|
|
|
- elif '万+' in comment_cnt:
|
|
|
- comment_cnt = float(comment_cnt.split('万+')[0]) * 10000
|
|
|
- else:
|
|
|
- comment_cnt = float(comment_cnt)
|
|
|
-
|
|
|
- # 用户名
|
|
|
- username_id = driver.find_element(By.ID, 'com.tencent.mm:id/hft')
|
|
|
- user_name = username_id.get_attribute('name')
|
|
|
-
|
|
|
- Common.logger(log_type).info('video_title:{}', video_title)
|
|
|
- Common.logger(log_type).info('duration:{}', duration)
|
|
|
- Common.logger(log_type).info('like_cnt:{}', like_cnt)
|
|
|
- Common.logger(log_type).info('share_cnt:{}', share_cnt)
|
|
|
- Common.logger(log_type).info('favorite_cnt:{}', favorite_cnt)
|
|
|
- Common.logger(log_type).info('comment_cnt:{}', comment_cnt)
|
|
|
- Common.logger(log_type).info('user_name:{}', user_name)
|
|
|
-
|
|
|
- # 判断无效视频
|
|
|
- if video_title == '' or user_name == '':
|
|
|
- Common.logger(log_type).info('无效视频,滑动到下一个视频\n')
|
|
|
- driver.swipe(10, 1600, 10, 300, 200)
|
|
|
-
|
|
|
- # 判断下载规则
|
|
|
- elif cls.download_rule(duration, like_cnt, share_cnt, favorite_cnt, comment_cnt) is False:
|
|
|
- Common.logger(log_type).info('不满足抓取规则,滑动到下一个视频\n')
|
|
|
+ Common.logger(log_type).info('URL 信息已更新,滑动到下一个视频\n')
|
|
|
driver.swipe(10, 1600, 10, 300, 200)
|
|
|
+ break
|
|
|
|
|
|
- # 已下载表去重
|
|
|
- elif str(video_title) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'c77cf9') for x in y]:
|
|
|
- Common.logger(log_type).info('视频已下载,滑动到下一个视频\n')
|
|
|
- driver.swipe(10, 1600, 10, 300, 200)
|
|
|
-
|
|
|
- # feeds 表去重
|
|
|
- elif str(video_title) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy') for x in y]:
|
|
|
- Common.logger(log_type).info('视频已存在,滑动到下一个视频\n')
|
|
|
- driver.swipe(10, 1600, 10, 300, 200)
|
|
|
-
|
|
|
- # 分享给 windows 爬虫机
|
|
|
- else:
|
|
|
- share_id.click()
|
|
|
- driver.find_element(By.XPATH, '//*[@text="转发给朋友"]').click()
|
|
|
- driver.find_element(By.XPATH, '//*[@text="爬虫群"]').click()
|
|
|
- driver.find_element(By.ID, 'com.tencent.mm:id/guw').click()
|
|
|
-
|
|
|
- # 把视频信息写入飞书feeds文档
|
|
|
- Feishu.insert_columns(log_type, 'shipinhao', 'FSDlBy', 'ROWS', 1, 2)
|
|
|
- get_feeds_time = int(time.time())
|
|
|
- values = [[time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(get_feeds_time)),
|
|
|
- '推荐榜',
|
|
|
- str(video_title),
|
|
|
- duration,
|
|
|
- like_cnt,
|
|
|
- share_cnt,
|
|
|
- favorite_cnt,
|
|
|
- comment_cnt,
|
|
|
- str(user_name)]]
|
|
|
- time.sleep(1)
|
|
|
- Feishu.update_values(log_type, 'shipinhao', 'FSDlBy', 'A2:Z2', values)
|
|
|
- Common.logger(log_type).info('视频信息写入飞书文档成功\n')
|
|
|
-
|
|
|
- while True:
|
|
|
- if Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')[1][11] is None:
|
|
|
- Common.logger(log_type).info('等待更新 URL 信息')
|
|
|
- time.sleep(10)
|
|
|
- else:
|
|
|
- Common.logger(log_type).info('URL 信息已更新,滑动到下一个视频\n')
|
|
|
- driver.swipe(10, 1600, 10, 300, 200)
|
|
|
- break
|
|
|
-
|
|
|
- cls.download_publish(log_type, env)
|
|
|
+ cls.download_publish(log_type, env)
|
|
|
|
|
|
except Exception as e:
|
|
|
Common.logger(log_type).error('get_feeds异常,滑动到下一个视频\n', e)
|