Browse Source

update 增加从mysql获取配置信息

lierqiang 2 years ago
parent
commit
30a9bb9e11

+ 0 - 0
douyin/douyin_follow/__init__.py


+ 16 - 29
douyin/douyin_follow/follow_dy.py

@@ -10,15 +10,16 @@ import time
 import requests
 import requests
 from hashlib import md5
 from hashlib import md5
 
 
-from common.public import get_user_from_mysql
 from douyin.douyin_recommend import get_xb
 from douyin.douyin_recommend import get_xb
 
 
 sys.path.append(os.getcwd())
 sys.path.append(os.getcwd())
+from common.common import Common
 from common.db import MysqlHelper
 from common.db import MysqlHelper
 from common.feishu import Feishu
 from common.feishu import Feishu
 from common.publish import Publish
 from common.publish import Publish
+from common.public import random_title
 from common.userAgent import get_random_user_agent
 from common.userAgent import get_random_user_agent
-from common.common import Common
+from common.public import get_user_from_mysql, get_config_from_mysql
 
 
 
 
 class DyFollow(object):
 class DyFollow(object):
@@ -89,7 +90,7 @@ class DyFollow(object):
             Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
             Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
 
 
     @classmethod
     @classmethod
-    def video_title(cls, log_type, crawler, title):
+    def video_title(cls, log_type, env, crawler, title):
         title_split1 = title.split(" #")
         title_split1 = title.split(" #")
         if title_split1[0] != "":
         if title_split1[0] != "":
             title1 = title_split1[0]
             title1 = title_split1[0]
@@ -116,29 +117,10 @@ class DyFollow(object):
                           .replace("?", "").replace('"', "").replace("<", "") \
                           .replace("?", "").replace('"', "").replace("<", "") \
                           .replace(">", "").replace("|", "").replace("@", "").replace('"', '').replace("'", '')[:40]
                           .replace(">", "").replace("|", "").replace("@", "").replace('"', '').replace("'", '')[:40]
         if video_title.replace(" ", "") == "" or video_title == "。。。" or video_title == "...":
         if video_title.replace(" ", "") == "" or video_title == "。。。" or video_title == "...":
-            return cls.random_title(log_type, crawler)
+            return random_title(log_type, crawler, env, text='title')
         else:
         else:
             return video_title
             return video_title
 
 
-    @classmethod
-    def random_title(cls, log_type, crawler):
-        try:
-            while True:
-                random_title_sheet = Feishu.get_values_batch(log_type, crawler, 'sPK2oY')
-                if random_title_sheet is None:
-                    Common.logger(log_type, crawler).warning(f"filter_words_sheet:{random_title_sheet} 10秒钟后重试")
-                    continue
-                random_title_list = []
-                for x in random_title_sheet:
-                    for y in x:
-                        if y is None:
-                            pass
-                        else:
-                            random_title_list.append(y)
-                return random.choice(random_title_list)
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'random_title:{e}\n')
-
     @classmethod
     @classmethod
     def get_videoList(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine, rule_dict):
     def get_videoList(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine, rule_dict):
 
 
@@ -175,13 +157,15 @@ class DyFollow(object):
         for info in aweme_list:
         for info in aweme_list:
             if info.get('is_ads'):
             if info.get('is_ads'):
                 continue
                 continue
-            publish_time = info['create_time']
+            publish_time = info.get('create_time')
+            if not publish_time:
+                continue
             publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time))
             publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time))
             publish_day = int((int(time.time()) - publish_time) / (3600 * 24))
             publish_day = int((int(time.time()) - publish_time) / (3600 * 24))
 
 
-            video_title = cls.video_title(log_type, crawler, info['desc'])
+            video_title = cls.video_title(log_type, env, crawler, info['desc'])
             if not video_title:
             if not video_title:
-                video_title = cls.random_title(log_type, crawler)
+                video_title = random_title(log_type, crawler, env, text='title')
 
 
             video_dict = {'video_title': video_title,
             video_dict = {'video_title': video_title,
                           'video_id': info['aweme_id'],
                           'video_id': info['aweme_id'],
@@ -226,11 +210,13 @@ class DyFollow(object):
     @classmethod
     @classmethod
     def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
     def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
         try:
         try:
+            filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
+            for filter_word in filter_words:
+                if filter_word in video_dict['video_title']:
+                    Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
+                    return
             if cls.download_rule(video_dict, rule_dict) is False:
             if cls.download_rule(video_dict, rule_dict) is False:
                 Common.logger(log_type, crawler).info('不满足抓取规则\n')
                 Common.logger(log_type, crawler).info('不满足抓取规则\n')
-            elif any(word if word in video_dict['video_title'] else False for word in
-                     cls.filter_words(log_type, crawler)) is True:
-                Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
             elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
             elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
                 Common.logger(log_type, crawler).info('视频已下载\n')
                 Common.logger(log_type, crawler).info('视频已下载\n')
             else:
             else:
@@ -349,6 +335,7 @@ class DyFollow(object):
     def get_follow_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine):
     def get_follow_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine):
         user_list = get_user_from_mysql(log_type, crawler, crawler, env)
         user_list = get_user_from_mysql(log_type, crawler, crawler, env)
         rule_dict = cls.get_rule(log_type, crawler)
         rule_dict = cls.get_rule(log_type, crawler)
+
         for user in user_list:
         for user in user_list:
             spider_link = user["spider_link"]
             spider_link = user["spider_link"]
             out_uid = spider_link
             out_uid = spider_link

+ 17 - 26
douyin/douyin_recommend/recommend_dy.py

@@ -9,14 +9,17 @@ import sys
 import time
 import time
 import requests
 import requests
 from hashlib import md5
 from hashlib import md5
+
 from douyin.douyin_recommend import get_xb
 from douyin.douyin_recommend import get_xb
 
 
 sys.path.append(os.getcwd())
 sys.path.append(os.getcwd())
-from common.db import MysqlHelper
+from common.common import Common
 from common.feishu import Feishu
 from common.feishu import Feishu
+from common.db import MysqlHelper
 from common.publish import Publish
 from common.publish import Publish
+from common.public import get_config_from_mysql
+from common.public import random_title
 from common.userAgent import get_random_user_agent
 from common.userAgent import get_random_user_agent
-from common.common import Common
 
 
 
 
 class DyRecommend(object):
 class DyRecommend(object):
@@ -92,7 +95,7 @@ class DyRecommend(object):
             Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
             Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
 
 
     @classmethod
     @classmethod
-    def video_title(cls, log_type, crawler, title):
+    def video_title(cls, log_type, crawler, env, title):
         title_split1 = title.split(" #")
         title_split1 = title.split(" #")
         if title_split1[0] != "":
         if title_split1[0] != "":
             title1 = title_split1[0]
             title1 = title_split1[0]
@@ -119,29 +122,10 @@ class DyRecommend(object):
                           .replace("?", "").replace('"', "").replace("<", "") \
                           .replace("?", "").replace('"', "").replace("<", "") \
                           .replace(">", "").replace("|", "").replace("@", "").replace('"', '').replace("'", '')[:40]
                           .replace(">", "").replace("|", "").replace("@", "").replace('"', '').replace("'", '')[:40]
         if video_title.replace(" ", "") == "" or video_title == "。。。" or video_title == "...":
         if video_title.replace(" ", "") == "" or video_title == "。。。" or video_title == "...":
-            return cls.random_title(log_type, crawler)
+            return random_title(log_type, crawler, env, text='title')
         else:
         else:
             return video_title
             return video_title
 
 
-    @classmethod
-    def random_title(cls, log_type, crawler):
-        try:
-            while True:
-                random_title_sheet = Feishu.get_values_batch(log_type, crawler, 'sPK2oY')
-                if random_title_sheet is None:
-                    Common.logger(log_type, crawler).warning(f"filter_words_sheet:{random_title_sheet} 10秒钟后重试")
-                    continue
-                random_title_list = []
-                for x in random_title_sheet:
-                    for y in x:
-                        if y is None:
-                            pass
-                        else:
-                            random_title_list.append(y)
-                return random.choice(random_title_list)
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'random_title:{e}\n')
-
     @classmethod
     @classmethod
     def get_videolist(cls, log_type, crawler, strategy, our_id, oss_endpoint, env, machine):
     def get_videolist(cls, log_type, crawler, strategy, our_id, oss_endpoint, env, machine):
         rule_dict = cls.get_rule(log_type, crawler)
         rule_dict = cls.get_rule(log_type, crawler)
@@ -171,13 +155,15 @@ class DyRecommend(object):
             for info in aweme_list:
             for info in aweme_list:
                 if info.get('is_ads'):
                 if info.get('is_ads'):
                     continue
                     continue
-                publish_time = info['create_time']
+                publish_time = info.get('create_time')
+                if not publish_time:
+                    continue
                 publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time))
                 publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time))
                 publish_day = int((int(time.time()) - publish_time) / (3600 * 24))
                 publish_day = int((int(time.time()) - publish_time) / (3600 * 24))
                 if not info['desc']:
                 if not info['desc']:
-                    video_title = cls.random_title(log_type, crawler)
+                    video_title = random_title(log_type, crawler, env, text='title')
                 else:
                 else:
-                    video_title = cls.video_title(log_type, crawler, info['desc'])
+                    video_title = cls.video_title(log_type, crawler, env, info['desc'])
 
 
                 video_dict = {'video_title': video_title,
                 video_dict = {'video_title': video_title,
                               'video_id': info['aweme_id'],
                               'video_id': info['aweme_id'],
@@ -222,6 +208,11 @@ class DyRecommend(object):
     @classmethod
     @classmethod
     def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
     def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
         try:
         try:
+            filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
+            for filter_word in filter_words:
+                if filter_word in video_dict['video_title']:
+                    Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
+                    return
             if cls.download_rule(video_dict, rule_dict) is False:
             if cls.download_rule(video_dict, rule_dict) is False:
                 Common.logger(log_type, crawler).info('不满足抓取规则\n')
                 Common.logger(log_type, crawler).info('不满足抓取规则\n')
             elif any(word if word in video_dict['video_title'] else False for word in
             elif any(word if word in video_dict['video_title'] else False for word in

+ 19 - 37
kuaishou/kuaishou_follow/kuaishou_follow.py

@@ -20,6 +20,7 @@ from common.feishu import Feishu
 from common.getuser import getUser
 from common.getuser import getUser
 from common.db import MysqlHelper
 from common.db import MysqlHelper
 from common.publish import Publish
 from common.publish import Publish
+from common.public import random_title, get_config_from_mysql
 from common.public import get_user_from_mysql
 from common.public import get_user_from_mysql
 from common.userAgent import get_random_user_agent
 from common.userAgent import get_random_user_agent
 
 
@@ -93,26 +94,6 @@ class KuaiShouFollow:
         except Exception as e:
         except Exception as e:
             Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
             Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
 
 
-    # 万能标题
-    @classmethod
-    def random_title(cls, log_type, crawler):
-        try:
-            while True:
-                random_title_sheet = Feishu.get_values_batch(log_type, crawler, '0DiyXe')
-                if random_title_sheet is None:
-                    Common.logger(log_type, crawler).warning(f"filter_words_sheet:{random_title_sheet} 10秒钟后重试")
-                    continue
-                random_title_list = []
-                for x in random_title_sheet:
-                    for y in x:
-                        if y is None:
-                            pass
-                        else:
-                            random_title_list.append(y)
-                return random.choice(random_title_list)
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'random_title:{e}\n')
-
     # 获取站外用户信息
     # 获取站外用户信息
     @classmethod
     @classmethod
     def get_out_user_info(cls, log_type, crawler, out_uid):
     def get_out_user_info(cls, log_type, crawler, out_uid):
@@ -261,7 +242,7 @@ class KuaiShouFollow:
 
 
     # 处理视频标题
     # 处理视频标题
     @classmethod
     @classmethod
-    def video_title(cls, log_type, crawler, title):
+    def video_title(cls, log_type, crawler, env, title):
         title_split1 = title.split(" #")
         title_split1 = title.split(" #")
         if title_split1[0] != "":
         if title_split1[0] != "":
             title1 = title_split1[0]
             title1 = title_split1[0]
@@ -288,7 +269,7 @@ class KuaiShouFollow:
                           .replace("?", "").replace('"', "").replace("<", "") \
                           .replace("?", "").replace('"', "").replace("<", "") \
                           .replace(">", "").replace("|", "").replace("@", "").replace('"', '').replace("'", '')[:40]
                           .replace(">", "").replace("|", "").replace("@", "").replace('"', '').replace("'", '')[:40]
         if video_title.replace(" ", "") == "" or video_title == "。。。" or video_title == "...":
         if video_title.replace(" ", "") == "" or video_title == "。。。" or video_title == "...":
-            return cls.random_title(log_type, crawler)
+            return random_title(log_type, crawler, env, text='title')
         else:
         else:
             return video_title
             return video_title
 
 
@@ -359,11 +340,11 @@ class KuaiShouFollow:
 
 
                 # video_title
                 # video_title
                 if 'caption' not in feeds[i]['photo']:
                 if 'caption' not in feeds[i]['photo']:
-                    video_title = cls.random_title(log_type, crawler)
+                    video_title = random_title(log_type, crawler, env, text='title')
                 elif feeds[i]['photo']['caption'].strip() == "":
                 elif feeds[i]['photo']['caption'].strip() == "":
-                    video_title = cls.random_title(log_type, crawler)
+                    video_title = random_title(log_type, crawler, env, text='title')
                 else:
                 else:
-                    video_title = cls.video_title(log_type, crawler, feeds[i]['photo']['caption'])
+                    video_title = cls.video_title(log_type, crawler, env, feeds[i]['photo']['caption'])
 
 
                 if 'videoResource' not in feeds[i]['photo'] \
                 if 'videoResource' not in feeds[i]['photo'] \
                         and 'manifest' not in feeds[i]['photo'] \
                         and 'manifest' not in feeds[i]['photo'] \
@@ -546,15 +527,15 @@ class KuaiShouFollow:
     @classmethod
     @classmethod
     def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
     def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
         try:
         try:
+            filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
+            for filter_word in filter_words:
+                if filter_word in video_dict['video_title']:
+                    Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
+                    return
             download_finished = False
             download_finished = False
             if cls.repeat_video(log_type, crawler, video_dict['video_id'], video_dict['video_title'],
             if cls.repeat_video(log_type, crawler, video_dict['video_id'], video_dict['video_title'],
                                 video_dict['publish_time_str'], env, machine) != 0:
                                 video_dict['publish_time_str'], env, machine) != 0:
                 Common.logger(log_type, crawler).info('视频已下载\n')
                 Common.logger(log_type, crawler).info('视频已下载\n')
-            # elif video_dict['video_id'] in [x for y in Feishu.get_values_batch(log_type, crawler, "3cd128") for x in y]:
-            #     Common.logger(log_type, crawler).info('视频已下载\n')
-            elif any(word if word in video_dict['video_title'] else False for word in
-                     cls.filter_words(log_type, crawler)) is True:
-                Common.logger(log_type, crawler).info('标题已中过滤词\n')
             else:
             else:
                 # 下载视频
                 # 下载视频
                 Common.download_method(log_type=log_type, crawler=crawler, text='video',
                 Common.download_method(log_type=log_type, crawler=crawler, text='video',
@@ -674,13 +655,14 @@ class KuaiShouFollow:
             our_uid = user["media_id"]
             our_uid = user["media_id"]
             Common.logger(log_type, crawler).info(f"开始抓取 {user_name} 用户主页视频\n")
             Common.logger(log_type, crawler).info(f"开始抓取 {user_name} 用户主页视频\n")
             cls.get_videoList(log_type=log_type,
             cls.get_videoList(log_type=log_type,
-                                  crawler=crawler,
+                              crawler=crawler,
-                                  strategy=strategy,
+                              strategy=strategy,
-                                  our_uid=our_uid,
+                              our_uid=our_uid,
-                                  out_uid=out_uid,
+                              out_uid=out_uid,
-                                  oss_endpoint=oss_endpoint,
+                              oss_endpoint=oss_endpoint,
-                                  env=env,
+                              env=env,
-                                  machine=machine)
+                              machine=machine)
+
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
     KuaiShouFollow.get_videoList(log_type="follow",
     KuaiShouFollow.get_videoList(log_type="follow",

+ 28 - 0
kuaishou/kuaishou_follow/test.py

@@ -0,0 +1,28 @@
+import pymysql
+connection = pymysql.connect(
+                host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com",# 数据库IP地址,内网地址
+                # host="rm-bp1k5853td1r25g3ndo.mysql.rds.aliyuncs.com",  # 数据库IP地址,外网地址
+                port=3306,  # 端口号
+                user="crawler",  # mysql用户名
+                passwd="crawler123456@",  # mysql用户登录密码
+                db="piaoquan-crawler",  # 数据库名
+                # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+                charset="utf8mb4")
+
+m_con = connection.cursor(cursor=pymysql.cursors.DictCursor)
+
+sql = 'select * from crawler_config where id =6'
+
+a  = m_con.execute(sql)
+data = m_con.fetchall()
+
+# 关闭数据库连接
+connection.close()
+n_data = data[0]
+emo = n_data['config']
+a = '😝'
+em = eval(emo)['emoji']
+if a in em:
+    print(11111)
+else:
+    print(222222)

+ 400 - 0
kuaishou/kuaishou_follow/videoid

@@ -0,0 +1,400 @@
+13938824
+13938809
+13938803
+13938798
+13938791
+13938726
+13938721
+13938713
+13938711
+13938708
+13938701
+13938697
+13938691
+13938686
+13938681
+13938664
+13938659
+13938657
+13938632
+13938628
+13938621
+13938607
+13938605
+13938602
+13938599
+13938597
+13938590
+13938551
+13938521
+13938514
+13938502
+13938497
+13938492
+13938411
+13938407
+13938403
+13938393
+13938382
+13938378
+13938091
+13938085
+13938076
+13938069
+13937967
+13937931
+13934789
+13934788
+13934787
+13934781
+13934778
+13934777
+13934774
+13934769
+13934768
+13934765
+13934764
+13934756
+13934753
+13934732
+13934729
+13934724
+13934719
+13934709
+13934705
+13934703
+13934701
+13934687
+13934682
+13934634
+13934618
+13934615
+13934606
+13934597
+13934593
+13934590
+13934560
+13934552
+13934551
+13934549
+13934544
+13934542
+13934539
+13934538
+13934536
+13934534
+13934532
+13934524
+13934503
+13934502
+13934500
+13934499
+13934498
+13934492
+13934491
+13934488
+13934484
+13934482
+13934481
+13934479
+13934476
+13934473
+13934472
+13934471
+13934467
+13934464
+13934462
+13934461
+13934457
+13934453
+13934451
+13934448
+13934446
+13934445
+13934441
+13934434
+13934431
+13934429
+13934426
+13934423
+13934421
+13934419
+13934417
+13934416
+13934415
+13934413
+13934411
+13934409
+13934405
+13934404
+13934399
+13934397
+13934394
+13934392
+13934379
+13934378
+13934364
+13934361
+13934359
+13934358
+13934355
+13934351
+13934350
+13934347
+13934343
+13934339
+13934338
+13934325
+13934321
+13934318
+13934315
+13934313
+13934309
+13934305
+13934303
+13934299
+13934296
+13934293
+13934290
+13934288
+13934284
+13934282
+13934278
+13934274
+13934270
+13934266
+13934259
+13934257
+13934245
+13934238
+13934232
+13934228
+13934224
+13934221
+13934217
+13934214
+13934213
+13934206
+13934204
+13934194
+13934189
+13934184
+13934183
+13934181
+13934178
+13934177
+13934153
+13934152
+13934151
+13934149
+13934146
+13934122
+13934120
+13934113
+13934112
+13934108
+13934103
+13934100
+13934093
+13934088
+13934086
+13934083
+13934080
+13934079
+13934077
+13934072
+13934071
+13934069
+13934065
+13934061
+13934060
+13934058
+13934052
+13934047
+13934046
+13934045
+13934044
+13934041
+13934039
+13934037
+13934029
+13934025
+13934024
+13934022
+13934019
+13934016
+13934014
+13934011
+13934010
+13934007
+13934005
+13934001
+13933995
+13933992
+13933991
+13933990
+13933984
+13933982
+13933962
+13933959
+13933958
+13933955
+13933951
+13933948
+13933943
+13933940
+13933937
+13933936
+13933933
+13933932
+13933931
+13933927
+13933924
+13933921
+13933919
+13933917
+13933915
+13933910
+13933903
+13933901
+13933900
+13933896
+13933895
+13933894
+13933890
+13933889
+13933884
+13933882
+13933876
+13933874
+13933871
+13933856
+13933852
+13933837
+13933836
+13933835
+13933833
+13933832
+13933827
+13933822
+13933821
+13933818
+13933816
+13933813
+13933812
+13933809
+13933806
+13933798
+13933796
+13933795
+13933794
+13933785
+13933779
+13933776
+13933774
+13933769
+13933767
+13933765
+13933762
+13933761
+13933758
+13933756
+13933755
+13933751
+13933750
+13933740
+13933739
+13933737
+13933734
+13933730
+13933727
+13933725
+13933723
+13933719
+13933712
+13933708
+13933706
+13933704
+13933700
+13933685
+13933684
+13933681
+13933678
+13933674
+13933668
+13933666
+13933664
+13933653
+13933649
+13933639
+13933620
+13933618
+13933612
+13933608
+13933604
+13933601
+13933600
+13933590
+13933583
+13933571
+13933564
+13933562
+13933559
+13933554
+13933553
+13933549
+13933546
+13933543
+13933539
+13933535
+13933533
+13933529
+13933519
+13933517
+13933515
+13933512
+13933509
+13933502
+13933498
+13933494
+13933487
+13933484
+13933482
+13933481
+13933477
+13933469
+13933464
+13933453
+13933448
+13933444
+13933426
+13933421
+13933418
+13933413
+13933410
+13933404
+13933403
+13933401
+13933399
+13933396
+13933392
+13933390
+13933386
+13933382
+13933381
+13933377
+13933371
+13933369
+13933366
+13933365
+13933363
+13933348
+13933345
+13933342
+13933341
+13933339
+13933337
+13933334
+13933332
+13933328
+13922716

+ 0 - 0
kuaishou/kuaishou_recommend/__init__.py


BIN
kuaishou/kuaishou_recommend/kuaishou/videos/e20c6fbcda1169932c94bf0bd3683f70/image.jpg


BIN
kuaishou/kuaishou_recommend/kuaishou/videos/e20c6fbcda1169932c94bf0bd3683f70/video.mp4


BIN
kuaishou/kuaishou_recommend/kuaishou/videos/fe80d0368da2d1c26909d1a875d6e09b/image.jpg


+ 14 - 0
kuaishou/kuaishou_recommend/kuaishou/videos/fe80d0368da2d1c26909d1a875d6e09b/info.txt

@@ -0,0 +1,14 @@
+e4d3640ed1cdc03e
+不是很漂亮主打就是自信
+26
+128164
+0
+5539
+0
+720*1280
+1680277502
+不是圆
+https://p1-pro.a.yximgs.com/uhead/AB/2023/03/21/22/BMjAyMzAzMjEyMjQ1MjRfMzExNTcwODIzNV8xX2hkMTQyXzE=_s.jpg
+https://v2.kwaicdn.com/upic/2023/03/31/23/BMjAyMzAzMzEyMzQ0NDFfMzExNTcwODIzNV85OTQ2Mjg4NTg1NV8xXzM=_b_Babf4d3be147b2b708814052850880ffd.mp4?pkey=AAWWo2TmOyovU3wpcw-qxZEfCzeNLzDtHBE7tLd15z28cmpRxMuIRZjWmjStQwVKc3qjb0_vB8h1BHRng7r1rArjbSzb7ZVX4aio9ACty6MyhQMEMcUsiNRmvlQLVhTywBg&tag=1-1681130915-unknown-0-ml3z9lxblm-de2b236a3eab1bc2&clientCacheKey=3xuzaf2zdga4is9_b.mp4&di=73e2f18f&bp=14944&tt=b&ss=vp
+https://p1.a.yximgs.com/upic/2023/03/31/23/BMjAyMzAzMzEyMzQ0NDFfMzExNTcwODIzNV85OTQ2Mjg4NTg1NV8xXzM=_Bfe4e513ea9e56dd55ff3258b92b7945f.jpg?tag=1-1681130915-unknown-0-zgsrlzqiab-8448deb29683667c&clientCacheKey=3xuzaf2zdga4is9.jpg&di=73e2f18f&bp=14944
+kuaishou1681180829

+ 7 - 0
kuaishou/kuaishou_recommend/kuaishou/videos/fe80d0368da2d1c26909d1a875d6e09b/video.mp4

@@ -0,0 +1,7 @@
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
+<html>
+<head><title>403 Forbidden</title></head>
+<body>
+<h1>403 Forbidden</h1>
+<p>You don't have permission to access the URL on this server.<hr/>Powered by Tengine</body>
+</html>

+ 12 - 30
kuaishou/kuaishou_recommend/recommend_kuaishou.py

@@ -21,7 +21,7 @@ from common.feishu import Feishu
 from common.getuser import getUser
 from common.getuser import getUser
 from common.db import MysqlHelper
 from common.db import MysqlHelper
 from common.publish import Publish
 from common.publish import Publish
-from common.public import get_user_from_mysql
+from common.public import get_user_from_mysql, random_title, get_config_from_mysql
 from common.userAgent import get_random_user_agent
 from common.userAgent import get_random_user_agent
 
 
 
 
@@ -77,26 +77,6 @@ class KuaiShouRecommend:
         except Exception as e:
         except Exception as e:
             Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
             Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
 
 
-    # 万能标题
-    @classmethod
-    def random_title(cls, log_type, crawler):
-        try:
-            while True:
-                random_title_sheet = Feishu.get_values_batch(log_type, crawler, '0DiyXe')
-                if random_title_sheet is None:
-                    Common.logger(log_type, crawler).warning(f"filter_words_sheet:{random_title_sheet} 10秒钟后重试")
-                    continue
-                random_title_list = []
-                for x in random_title_sheet:
-                    for y in x:
-                        if y is None:
-                            pass
-                        else:
-                            random_title_list.append(y)
-                return random.choice(random_title_list)
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'random_title:{e}\n')
-
     # 获取用户信息列表
     # 获取用户信息列表
     @classmethod
     @classmethod
     def get_user_list(cls, log_type, crawler, sheetid, env, machine):
     def get_user_list(cls, log_type, crawler, sheetid, env, machine):
@@ -155,7 +135,7 @@ class KuaiShouRecommend:
 
 
     # 处理视频标题
     # 处理视频标题
     @classmethod
     @classmethod
-    def video_title(cls, log_type, crawler, title):
+    def video_title(cls, log_type, crawler, env, title):
         title_split1 = title.split(" #")
         title_split1 = title.split(" #")
         if title_split1[0] != "":
         if title_split1[0] != "":
             title1 = title_split1[0]
             title1 = title_split1[0]
@@ -182,7 +162,7 @@ class KuaiShouRecommend:
                           .replace("?", "").replace('"', "").replace("<", "") \
                           .replace("?", "").replace('"', "").replace("<", "") \
                           .replace(">", "").replace("|", "").replace("@", "").replace('"', '').replace("'", '')[:40]
                           .replace(">", "").replace("|", "").replace("@", "").replace('"', '').replace("'", '')[:40]
         if video_title.replace(" ", "") == "" or video_title == "。。。" or video_title == "...":
         if video_title.replace(" ", "") == "" or video_title == "。。。" or video_title == "...":
-            return cls.random_title(log_type, crawler)
+            return random_title(log_type, crawler, env, text='title')
         else:
         else:
             return video_title
             return video_title
 
 
@@ -255,7 +235,6 @@ class KuaiShouRecommend:
                 continue
                 continue
             else:
             else:
                 feeds = response.json()['data']['visionNewRecoFeed']['feeds']
                 feeds = response.json()['data']['visionNewRecoFeed']['feeds']
-                # pcursor = response.json()['data']['visionNewRecoFeed']['pcursor']
                 for i in range(len(feeds)):
                 for i in range(len(feeds)):
                     if 'photo' not in feeds[i]:
                     if 'photo' not in feeds[i]:
                         Common.logger(log_type, crawler).warning(f"get_videoList:{feeds[i]}\n")
                         Common.logger(log_type, crawler).warning(f"get_videoList:{feeds[i]}\n")
@@ -263,11 +242,12 @@ class KuaiShouRecommend:
 
 
                     # video_title
                     # video_title
                     if 'caption' not in feeds[i]['photo']:
                     if 'caption' not in feeds[i]['photo']:
-                        video_title = cls.random_title(log_type, crawler)
+                        video_title = random_title(log_type, crawler, env, text='title')
+
                     elif feeds[i]['photo']['caption'].strip() == "":
                     elif feeds[i]['photo']['caption'].strip() == "":
-                        video_title = cls.random_title(log_type, crawler)
+                        video_title = random_title(log_type, crawler, env, text='title')
                     else:
                     else:
-                        video_title = cls.video_title(log_type, crawler, feeds[i]['photo']['caption'])
+                        video_title = cls.video_title(log_type, crawler, env, feeds[i]['photo']['caption'])
 
 
                     if 'videoResource' not in feeds[i]['photo'] \
                     if 'videoResource' not in feeds[i]['photo'] \
                             and 'manifest' not in feeds[i]['photo'] \
                             and 'manifest' not in feeds[i]['photo'] \
@@ -407,13 +387,15 @@ class KuaiShouRecommend:
     @classmethod
     @classmethod
     def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
     def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
         try:
         try:
+            filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
+            for filter_word in filter_words:
+                if filter_word in video_dict['video_title']:
+                    Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
+                    return
             download_finished = False
             download_finished = False
             if cls.repeat_video(log_type, crawler, video_dict['video_id'], video_dict['video_title'],
             if cls.repeat_video(log_type, crawler, video_dict['video_id'], video_dict['video_title'],
                                 video_dict['publish_time_str'], env, machine) != 0:
                                 video_dict['publish_time_str'], env, machine) != 0:
                 Common.logger(log_type, crawler).info('视频已下载\n')
                 Common.logger(log_type, crawler).info('视频已下载\n')
-            elif any(word if word in video_dict['video_title'] else False for word in
-                     cls.filter_words(log_type, crawler)) is True:
-                Common.logger(log_type, crawler).info('标题已中过滤词\n')
             else:
             else:
                 # 下载视频
                 # 下载视频
                 Common.download_method(log_type=log_type, crawler=crawler, text='video',
                 Common.download_method(log_type=log_type, crawler=crawler, text='video',

+ 14 - 18
xigua/xigua_follow/xigua_follow.py

@@ -27,7 +27,7 @@ from common.getuser import getUser
 from common.common import Common
 from common.common import Common
 from common.feishu import Feishu
 from common.feishu import Feishu
 from common.publish import Publish
 from common.publish import Publish
-from common.public import get_user_from_mysql
+from common.public import get_user_from_mysql, random_title, get_config_from_mysql
 
 
 
 
 class Follow:
 class Follow:
@@ -413,11 +413,11 @@ class Follow:
                         video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list']) != 0:
                         video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list']) != 0:
 
 
                         video_url = \
                         video_url = \
-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
+                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                            'backup_url_1']
+                                'backup_url_1']
                         audio_url = \
                         audio_url = \
-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list'][-1][
+                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list'][-1][
-                            'backup_url_1']
+                                'backup_url_1']
                         if len(video_url) % 3 == 1:
                         if len(video_url) % 3 == 1:
                             video_url += '=='
                             video_url += '=='
                         elif len(video_url) % 3 == 2:
                         elif len(video_url) % 3 == 2:
@@ -429,9 +429,11 @@ class Follow:
                         video_url = base64.b64decode(video_url).decode('utf8')
                         video_url = base64.b64decode(video_url).decode('utf8')
                         audio_url = base64.b64decode(audio_url).decode('utf8')
                         audio_url = base64.b64decode(audio_url).decode('utf8')
                         video_width = \
                         video_width = \
-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1]['vwidth']
+                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
+                                'vwidth']
                         video_height = \
                         video_height = \
-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1]['vheight']
+                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
+                                'vheight']
                         video_url_dict["video_url"] = video_url
                         video_url_dict["video_url"] = video_url
                         video_url_dict["audio_url"] = audio_url
                         video_url_dict["audio_url"] = audio_url
                         video_url_dict["video_width"] = video_width
                         video_url_dict["video_width"] = video_width
@@ -893,21 +895,15 @@ class Follow:
     @classmethod
     @classmethod
     def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
     def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
         try:
         try:
+            filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
+            for filter_word in filter_words:
+                if filter_word in video_dict['video_title']:
+                    Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
+                    return
             if cls.download_rule(video_dict, rule_dict) is False:
             if cls.download_rule(video_dict, rule_dict) is False:
                 Common.logger(log_type, crawler).info('不满足抓取规则\n')
                 Common.logger(log_type, crawler).info('不满足抓取规则\n')
-            elif any(word if word in video_dict['video_title'] else False for word in
-                     cls.filter_words(log_type, crawler)) is True:
-                Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
             elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
             elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
                 Common.logger(log_type, crawler).info('视频已下载\n')
                 Common.logger(log_type, crawler).info('视频已下载\n')
-            # elif str(video_dict['video_id']) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'e075e9') for x in y]:
-            #     Common.logger(log_type, crawler).info('视频已下载\n')
-            # elif str(video_dict['video_id']) in [x for y in Feishu.get_values_batch(log_type, 'xigua', '3Ul6wZ') for x in y]:
-            #     Common.logger(log_type, crawler).info('视频已下载\n')
-            # elif str(video_dict['video_id']) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'QOWqMo') for x in y]:
-            #     Common.logger(log_type, crawler).info('视频已下载\n')
-            # elif str(video_dict['video_id']) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'wjhpDs') for x in y]:
-            #     Common.logger(log_type, crawler).info('视频已存在\n')
             else:
             else:
                 # 下载视频
                 # 下载视频
                 Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
                 Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',

+ 14 - 5
xigua/xigua_search/xigua_search.py

@@ -16,12 +16,14 @@ import urllib3
 from urllib.parse import quote
 from urllib.parse import quote
 from requests.adapters import HTTPAdapter
 from requests.adapters import HTTPAdapter
 
 
+
 sys.path.append(os.getcwd())
 sys.path.append(os.getcwd())
 from common.db import MysqlHelper
 from common.db import MysqlHelper
 from common.getuser import getUser
 from common.getuser import getUser
 from common.common import Common
 from common.common import Common
 from common.feishu import Feishu
 from common.feishu import Feishu
 from common.publish import Publish
 from common.publish import Publish
+from common.public import get_config_from_mysql
 from common.userAgent import get_random_user_agent, get_random_header
 from common.userAgent import get_random_user_agent, get_random_header
 
 
 
 
@@ -602,7 +604,7 @@ class XiguaSearch:
         item_counter = data['h5_extra']['itemCell']['itemCounter']
         item_counter = data['h5_extra']['itemCell']['itemCounter']
         user_info = data['user_info']
         user_info = data['user_info']
         detail_info = data['video_detail_info']
         detail_info = data['video_detail_info']
-        video_dict = {'video_title': data['title'].replace('"' ,'').replace("'", ''),
+        video_dict = {'video_title': data['title'].replace('"', '').replace("'", ''),
                       'video_id': detail_info['video_id'],
                       'video_id': detail_info['video_id'],
                       'gid': data['group_id'],
                       'gid': data['group_id'],
                       'play_cnt': item_counter['videoWatchCount'],
                       'play_cnt': item_counter['videoWatchCount'],
@@ -687,10 +689,17 @@ class XiguaSearch:
                         video_dict['video_url'] = video_url_dict["video_url"]
                         video_dict['video_url'] = video_url_dict["video_url"]
                         video_dict['session'] = signature
                         video_dict['session'] = signature
                     except Exception as e:
                     except Exception as e:
-                        Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},获取详情失败,原因:{e}')
+                        Common.logger(log_type, crawler).error(
+                            f'关键词:{search_word},视频:{item_id},获取详情失败,原因:{e}')
                         continue
                         continue
+                    filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
+                    for filter_word in filter_words:
+                        if filter_word in video_dict['video_title']:
+                            Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
+                            continue
                     if cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
                     if cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
-                        Common.logger(log_type, crawler).info(f'关键词:{search_word},gid:{video_dict["gid"]},视频已下载,无需重复下载\n')
+                        Common.logger(log_type, crawler).info(
+                            f'关键词:{search_word},gid:{video_dict["gid"]},视频已下载,无需重复下载\n')
                         continue
                         continue
                     for k, v in video_dict.items():
                     for k, v in video_dict.items():
                         Common.logger(log_type, crawler).info(f"{k}:{v}")
                         Common.logger(log_type, crawler).info(f"{k}:{v}")
@@ -715,7 +724,8 @@ class XiguaSearch:
                         Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},下载失败,原因:{e}')
                         Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},下载失败,原因:{e}')
                         continue
                         continue
                     total_count += 1
                     total_count += 1
-                    Common.logger(log_type, crawler).info(f'search_word:{search_word},title:{video_dict["video_title"]},gid:{video_dict["gid"]},offset:{offset}, total:{total_count}')
+                    Common.logger(log_type, crawler).info(
+                        f'search_word:{search_word},title:{video_dict["video_title"]},gid:{video_dict["gid"]},offset:{offset}, total:{total_count}')
                     if total_count >= 30:
                     if total_count >= 30:
                         return
                         return
                 # elif v_type == 'pseries':
                 # elif v_type == 'pseries':
@@ -790,7 +800,6 @@ class XiguaSearch:
     @classmethod
     @classmethod
     def download_publish(cls, log_type, crawler, search_word, strategy, video_dict, rule_dict, our_uid, oss_endpoint,
     def download_publish(cls, log_type, crawler, search_word, strategy, video_dict, rule_dict, our_uid, oss_endpoint,
                          env, machine):
                          env, machine):
-
         Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
         Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
                                title=video_dict['video_title'], url=video_dict['video_url'])
                                title=video_dict['video_title'], url=video_dict['video_url'])
         # 下载音频
         # 下载音频

+ 7 - 61
youtube/youtube_follow/youtube_follow_api.py

@@ -21,7 +21,7 @@ from common.feishu import Feishu
 from common.getuser import getUser
 from common.getuser import getUser
 from common.publish import Publish
 from common.publish import Publish
 from common.translate import Translate
 from common.translate import Translate
-from common.public import get_user_from_mysql
+from common.public import get_user_from_mysql, get_config_from_mysql
 
 
 headers = {
 headers = {
     'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
     'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
@@ -52,63 +52,6 @@ class YoutubeFollow:
         'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
         'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
     }
     }
 
 
-    # @classmethod
-    # def get_browse_id(cls, log_type, crawler, out_user_id, machine):
-    #     """
-    #     获取每个用户的 browse_id
-    #     :param log_type: 日志
-    #     :param crawler: 哪款爬虫
-    #     :param out_user_id: 站外用户 UID
-    #     :param machine: 部署机器,阿里云填写 aliyun / aliyun_hk,线下分别填写 macpro,macair,local
-    #     :return: browse_id
-    #     """
-    #     try:
-    #         # 打印请求配置
-    #         ca = DesiredCapabilities.CHROME
-    #         ca["goog:loggingPrefs"] = {"performance": "ALL"}
-    #
-    #         # 不打开浏览器运行
-    #         chrome_options = webdriver.ChromeOptions()
-    #         chrome_options.add_argument("--headless")
-    #         chrome_options.add_argument(
-    #             '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
-    #         chrome_options.add_argument("--no-sandbox")
-    #
-    #         # driver初始化
-    #         if machine == 'aliyun' or machine == 'aliyun_hk':
-    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
-    #         elif machine == 'macpro':
-    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
-    #                                       service=Service('/Users/lieyunye/Downloads/chromedriver_v86/chromedriver'))
-    #         elif machine == 'macair':
-    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
-    #                                       service=Service('/Users/piaoquan/Downloads/chromedriver'))
-    #         else:
-    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
-    #                 '/Users/wangkun/Downloads/chromedriver/chromedriver_v110/chromedriver'))
-    #
-    #         driver.implicitly_wait(10)
-    #         url = f'https://www.youtube.com/{out_user_id}/videos'
-    #         driver.get(url)
-    #         # driver.save_screenshot("./1.png")
-    #         # 向上滑动 1000 个像素
-    #         # driver.execute_script('window.scrollBy(0, 2000)')
-    #         # driver.save_screenshot("./2.png")
-    #         time.sleep(3)
-    #         accept_btns = driver.find_elements(By.XPATH, '//span[text()="全部接受"]')
-    #         accept_btns_eng = driver.find_elements(By.XPATH, '//span[text()="Accept all"]')
-    #         if len(accept_btns) != 0:
-    #             accept_btns[0].click()
-    #             time.sleep(2)
-    #         elif len(accept_btns_eng) != 0:
-    #             accept_btns_eng[0].click()
-    #             time.sleep(2)
-    #         browse_id = driver.find_element(By.XPATH, '//meta[@itemprop="channelId"]').get_attribute('content')
-    #         driver.quit()
-    #         return browse_id
-    #     except Exception as e:
-    #         Common.logger(log_type, crawler).error(f'get_browse_id异常:{e}\n')
-
     @classmethod
     @classmethod
     def get_out_user_info(cls, log_type, crawler, browse_id, out_user_id):
     def get_out_user_info(cls, log_type, crawler, browse_id, out_user_id):
         """
         """
@@ -922,7 +865,7 @@ class YoutubeFollow:
                 if 'title' not in videoDetails:
                 if 'title' not in videoDetails:
                     video_title = ''
                     video_title = ''
                 else:
                 else:
-                    video_title = videoDetails['title'].replace('"' ,'').replace("'", '')
+                    video_title = videoDetails['title'].replace('"', '').replace("'", '')
                 video_title = cls.filter_emoji(video_title)
                 video_title = cls.filter_emoji(video_title)
                 if not cls.is_contain_chinese(video_title):
                 if not cls.is_contain_chinese(video_title):
                     video_title = Translate.google_translate(video_title, machine) \
                     video_title = Translate.google_translate(video_title, machine) \
@@ -1016,8 +959,11 @@ class YoutubeFollow:
     @classmethod
     @classmethod
     def download_publish(cls, log_type, crawler, video_dict, strategy, our_uid, env, oss_endpoint, machine):
     def download_publish(cls, log_type, crawler, video_dict, strategy, our_uid, env, oss_endpoint, machine):
         try:
         try:
-            # sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_dict['video_id']}" """
+            filter_words = get_config_from_mysql(log_type, crawler, env, text='filter', action='get_author_map')
-            # repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
+            for filter_word in filter_words:
+                if filter_word in video_dict['video_title']:
+                    Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
+                    return
             if video_dict['video_title'] == '' or video_dict['video_url'] == '':
             if video_dict['video_title'] == '' or video_dict['video_url'] == '':
                 Common.logger(log_type, crawler).info('无效视频\n')
                 Common.logger(log_type, crawler).info('无效视频\n')
             elif video_dict['duration'] > 1200 or video_dict['duration'] < 60:
             elif video_dict['duration'] > 1200 or video_dict['duration'] < 60: