lierqiang 2 gadi atpakaļ
vecāks
revīzija
02bc2b53bd
1 mainītis faili ar 33 papildinājumiem un 1 dzēšanām
  1. 33 1
      douyin/douyin_recommend/recommend_dy.py

+ 33 - 1
douyin/douyin_recommend/recommend_dy.py

@@ -90,6 +90,38 @@ class DyRecommend(object):
         except Exception as e:
         except Exception as e:
             Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
             Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
 
 
+    @classmethod
+    def video_title(cls, log_type, crawler, title):
+        title_split1 = title.split(" #")
+        if title_split1[0] != "":
+            title1 = title_split1[0]
+        else:
+            title1 = title_split1[-1]
+
+        title_split2 = title1.split(" #")
+        if title_split2[0] != "":
+            title2 = title_split2[0]
+        else:
+            title2 = title_split2[-1]
+
+        title_split3 = title2.split("@")
+        if title_split3[0] != "":
+            title3 = title_split3[0]
+        else:
+            title3 = title_split3[-1]
+
+        video_title = title3.strip().replace("\n", "") \
+                          .replace("/", "").replace("抖音", "").replace(" ", "") \
+                          .replace(" ", "").replace("&NBSP", "").replace("\r", "") \
+                          .replace("#", "").replace(".", "。").replace("\\", "") \
+                          .replace(":", "").replace("*", "").replace("?", "") \
+                          .replace("?", "").replace('"', "").replace("<", "") \
+                          .replace(">", "").replace("|", "").replace("@", "")[:40]
+        if video_title.replace(" ", "") == "" or video_title == "。。。" or video_title == "...":
+            return cls.random_title(log_type, crawler)
+        else:
+            return video_title
+
     @classmethod
     @classmethod
     def random_title(cls, log_type, crawler):
     def random_title(cls, log_type, crawler):
         try:
         try:
@@ -142,7 +174,7 @@ class DyRecommend(object):
                     if not info['desc']:
                     if not info['desc']:
                         video_title = cls.random_title(log_type, crawler)
                         video_title = cls.random_title(log_type, crawler)
                     else:
                     else:
-                        video_title = info['desc']
+                        video_title = cls.video_title(log_type, crawler, video_title)
 
 
                     video_dict = {'video_title': video_title,
                     video_dict = {'video_title': video_title,
                                   'video_id': info['aweme_id'],
                                   'video_id': info['aweme_id'],