|
@@ -90,6 +90,38 @@ class DyRecommend(object):
|
|
except Exception as e:
|
|
except Exception as e:
|
|
Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
|
|
Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
|
|
|
|
|
|
|
|
+ @classmethod
|
|
|
|
+ def video_title(cls, log_type, crawler, title):
|
|
|
|
+ title_split1 = title.split(" #")
|
|
|
|
+ if title_split1[0] != "":
|
|
|
|
+ title1 = title_split1[0]
|
|
|
|
+ else:
|
|
|
|
+ title1 = title_split1[-1]
|
|
|
|
+
|
|
|
|
+ title_split2 = title1.split(" #")
|
|
|
|
+ if title_split2[0] != "":
|
|
|
|
+ title2 = title_split2[0]
|
|
|
|
+ else:
|
|
|
|
+ title2 = title_split2[-1]
|
|
|
|
+
|
|
|
|
+ title_split3 = title2.split("@")
|
|
|
|
+ if title_split3[0] != "":
|
|
|
|
+ title3 = title_split3[0]
|
|
|
|
+ else:
|
|
|
|
+ title3 = title_split3[-1]
|
|
|
|
+
|
|
|
|
+ video_title = title3.strip().replace("\n", "") \
|
|
|
|
+ .replace("/", "").replace("抖音", "").replace(" ", "") \
|
|
|
|
+ .replace(" ", "").replace("&NBSP", "").replace("\r", "") \
|
|
|
|
+ .replace("#", "").replace(".", "。").replace("\\", "") \
|
|
|
|
+ .replace(":", "").replace("*", "").replace("?", "") \
|
|
|
|
+ .replace("?", "").replace('"', "").replace("<", "") \
|
|
|
|
+ .replace(">", "").replace("|", "").replace("@", "")[:40]
|
|
|
|
+ if video_title.replace(" ", "") == "" or video_title == "。。。" or video_title == "...":
|
|
|
|
+ return cls.random_title(log_type, crawler)
|
|
|
|
+ else:
|
|
|
|
+ return video_title
|
|
|
|
+
|
|
@classmethod
|
|
@classmethod
|
|
def random_title(cls, log_type, crawler):
|
|
def random_title(cls, log_type, crawler):
|
|
try:
|
|
try:
|
|
@@ -142,7 +174,7 @@ class DyRecommend(object):
|
|
if not info['desc']:
|
|
if not info['desc']:
|
|
video_title = cls.random_title(log_type, crawler)
|
|
video_title = cls.random_title(log_type, crawler)
|
|
else:
|
|
else:
|
|
- video_title = info['desc']
|
|
|
|
|
|
+ video_title = cls.video_title(log_type, crawler, video_title)
|
|
|
|
|
|
video_dict = {'video_title': video_title,
|
|
video_dict = {'video_title': video_title,
|
|
'video_id': info['aweme_id'],
|
|
'video_id': info['aweme_id'],
|