|
@@ -1,6 +1,7 @@
|
|
|
# -*- coding: utf-8 -*-
|
|
|
# @Author: wangkun
|
|
|
# @Time: 2022/12/14
|
|
|
+import difflib
|
|
|
import os
|
|
|
import sys
|
|
|
import time
|
|
@@ -262,6 +263,8 @@ class Follow:
|
|
|
# 视频号定向_已下载表
|
|
|
elif video_title in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'KsVtLe') for x in y]:
|
|
|
Common.logger(log_type).info('视频已下载\n')
|
|
|
+ elif cls.title_like(log_type, video_title) is True:
|
|
|
+ Common.logger(log_type).info('标题相似度>=90%')
|
|
|
# feeds 表去重
|
|
|
elif video_title in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy') for x in y]:
|
|
|
Common.logger(log_type).info('视频已存在\n')
|
|
@@ -285,6 +288,18 @@ class Follow:
|
|
|
except Exception as e:
|
|
|
Common.logger(log_type).error(f'get_video_info异常:{e}\n')
|
|
|
|
|
|
+ @classmethod
|
|
|
+ def title_like(cls, log_type, title):
|
|
|
+ sheet = Feishu.get_values_batch(log_type, 'shipinhao', 'KsVtLe')
|
|
|
+ for i in range(1, len(sheet)):
|
|
|
+ video_title = sheet[i][7]
|
|
|
+ if video_title is None:
|
|
|
+ pass
|
|
|
+ elif difflib.SequenceMatcher(None, title, video_title).quick_ratio() >= 0.9:
|
|
|
+ return True
|
|
|
+ else:
|
|
|
+ pass
|
|
|
+
|
|
|
@classmethod
|
|
|
def share_to_windows(cls, log_type, driver: WebDriver, video_dict, env):
|
|
|
Common.logger(log_type).info('分享给 windows 爬虫机器')
|
|
@@ -325,13 +340,7 @@ class Follow:
|
|
|
try:
|
|
|
follow_feeds_sheet = Feishu.get_values_batch(log_type, 'shipinhao', 'qzDljJ')
|
|
|
for i in range(1, len(follow_feeds_sheet)):
|
|
|
- download_title = follow_feeds_sheet[i][2].strip().replace('"', '') \
|
|
|
- .replace('“', '').replace('“', '…').replace("\n", "") \
|
|
|
- .replace("/", "").replace("\r", "") \
|
|
|
- .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
|
|
|
- .replace(":", "").replace("*", "").replace("?", "") \
|
|
|
- .replace("?", "").replace('"', "").replace("<", "") \
|
|
|
- .replace(">", "").replace("|", "").replace(" ", "")
|
|
|
+ download_title = follow_feeds_sheet[i][2]
|
|
|
download_duration = follow_feeds_sheet[i][3]
|
|
|
download_like_cnt = follow_feeds_sheet[i][4]
|
|
|
download_share_cnt = follow_feeds_sheet[i][5]
|