2 년 전 · 76e43985e2
--- a/common/public.py
+++ b/common/public.py
@@ -4,7 +4,6 @@
 
				 import os, sys
			
 
				 import time
			
 
				 import random
			
 
				-
			
 
				 sys.path.append(os.getcwd())
			
 
				 from common.common import Common
			
 
				 from common.scheduling_db import MysqlHelper
			
@@ -12,27 +11,6 @@ from common.scheduling_db import MysqlHelper
 
				 # from scheduling_db import MysqlHelper
			
 
				 
			
 
				 
			
 
				-# 过滤词库
			
 
				-def filter_word(log_type, crawler, source, env):
			
 
				-    """
			
 
				-    过滤词库
			
 
				-    :param log_type: 日志
			
 
				-    :param crawler: 哪款爬虫，如：xiaoniangao
			
 
				-    :param source: 哪款爬虫，如：小年糕
			
 
				-    :param env: 环境
			
 
				-    :return: word_list
			
 
				-    """
			
 
				-    select_sql = f""" select * from crawler_filter_word where source="{source}" """
			
 
				-    words = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='')
			
 
				-    word_list = []
			
 
				-    if len(words) == 0:
			
 
				-        return word_list
			
 
				-    for word in words:
			
 
				-        word_list.append(word['filter_word'])
			
 
				-
			
 
				-    return word_list
			
 
				-
			
 
				-
			
 
				 def get_user_from_mysql(log_type, crawler, source, env, action=''):
			
 
				     sql = f"select * from crawler_user_v3 where source='{source}' and mode='{log_type}'"
			
 
				     results = MysqlHelper.get_values(log_type, crawler, sql, env, action=action)
			
@@ -109,89 +87,38 @@ def download_rule(log_type, crawler, video_dict, rule_dict):
 
				     :param rule_dict: 规则信息，字典格式
			
 
				     :return: 满足规则，返回 True；反之，返回 False
			
 
				     """
			
 
				-    rule_playCnt_min = rule_dict.get('playCnt', {}).get('min', 0)
			
 
				-    rule_playCnt_max = rule_dict.get('playCnt', {}).get('max', 100000000)
			
 
				-    if rule_playCnt_max == 0:
			
 
				-        rule_playCnt_max = 100000000
			
 
				-
			
 
				-    rule_duration_min = rule_dict.get('duration', {}).get('min', 0)
			
 
				-    rule_duration_max = rule_dict.get('duration', {}).get('max', 100000000)
			
 
				-    if rule_duration_max == 0:
			
 
				-        rule_duration_max = 100000000
			
 
				-
			
 
				-    rule_period_min = rule_dict.get('period', {}).get('min', 0)
			
 
				-    # rule_period_max = rule_dict.get('period', {}).get('max', 100000000)
			
 
				-    # if rule_period_max == 0:
			
 
				-    #     rule_period_max = 100000000
			
 
				-    #
			
 
				-    # rule_fans_min = rule_dict.get('fans', {}).get('min', 0)
			
 
				-    # rule_fans_max = rule_dict.get('fans', {}).get('max', 100000000)
			
 
				-    # if rule_fans_max == 0:
			
 
				-    #     rule_fans_max = 100000000
			
 
				-    #
			
 
				-    # rule_videos_min = rule_dict.get('videos', {}).get('min', 0)
			
 
				-    # rule_videos_max = rule_dict.get('videos', {}).get('max', 100000000)
			
 
				-    # if rule_videos_max == 0:
			
 
				-    #     rule_videos_max = 100000000
			
 
				-
			
 
				-    rule_like_min = rule_dict.get('like', {}).get('min', 0)
			
 
				-    rule_like_max = rule_dict.get('like', {}).get('max', 100000000)
			
 
				-    if rule_like_max == 0:
			
 
				-        rule_like_max = 100000000
			
 
				-
			
 
				-    rule_videoWidth_min = rule_dict.get('videoWidth', {}).get('min', 0)
			
 
				-    rule_videoWidth_max = rule_dict.get('videoWidth', {}).get('max', 100000000)
			
 
				-    if rule_videoWidth_max == 0:
			
 
				-        rule_videoWidth_max = 100000000
			
 
				-
			
 
				-    rule_videoHeight_min = rule_dict.get('videoHeight', {}).get('min', 0)
			
 
				-    rule_videoHeight_max = rule_dict.get('videoHeight', {}).get('max', 100000000)
			
 
				-    if rule_videoHeight_max == 0:
			
 
				-        rule_videoHeight_max = 100000000
			
 
				-
			
 
				-    rule_shareCnt_min = rule_dict.get('shareCnt', {}).get('min', 0)
			
 
				-    rule_shareCnt_max = rule_dict.get('shareCnt', {}).get('max', 100000000)
			
 
				-    if rule_shareCnt_max == 0:
			
 
				-        rule_shareCnt_max = 100000000
			
 
				-
			
 
				-    rule_commentCnt_min = rule_dict.get('commentCnt', {}).get('min', 0)
			
 
				-    rule_commentCnt_max = rule_dict.get('commentCnt', {}).get('max', 100000000)
			
 
				-    if rule_commentCnt_max == 0:
			
 
				-        rule_commentCnt_max = 100000000
			
 
				-
			
 
				-    Common.logger(log_type, crawler).info(
			
 
				-        f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
			
 
				-    Common.logger(log_type, crawler).info(
			
 
				-        f'rule_playCnt_max:{int(rule_playCnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_playCnt_min:{int(rule_playCnt_min)}')
			
 
				-    Common.logger(log_type, crawler).info(
			
 
				-        f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
			
 
				-    Common.logger(log_type, crawler).info(
			
 
				-        f'rule_like_max:{int(rule_like_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_min:{int(rule_like_min)}')
			
 
				-    Common.logger(log_type, crawler).info(
			
 
				-        f'rule_commentCnt_max:{int(rule_commentCnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_commentCnt_min:{int(rule_commentCnt_min)}')
			
 
				-    Common.logger(log_type, crawler).info(
			
 
				-        f'rule_shareCnt_max:{int(rule_shareCnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_shareCnt_min:{int(rule_shareCnt_min)}')
			
 
				-    Common.logger(log_type, crawler).info(
			
 
				-        f'rule_videoWidth_max:{int(rule_videoWidth_max)} >= video_width:{int(video_dict["video_width"])} >= rule_videoWidth_min:{int(rule_videoWidth_min)}')
			
 
				-    Common.logger(log_type, crawler).info(
			
 
				-        f'rule_videoHeight_max:{int(rule_videoHeight_max)} >= video_height:{int(video_dict["video_height"])} >= rule_videoHeight_min:{int(rule_videoHeight_min)}')
			
 
				-
			
 
				-    if int(rule_duration_max) >= int(float(video_dict["duration"])) >= int(rule_duration_min) \
			
 
				-            and int(rule_playCnt_max) >= int(video_dict['play_cnt']) >= int(rule_playCnt_min) \
			
 
				-            and int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * int(rule_period_min) \
			
 
				-            and int(rule_like_max) >= int(video_dict['like_cnt']) >= int(rule_like_min) \
			
 
				-            and int(rule_commentCnt_max) >= int(video_dict['comment_cnt']) >= int(rule_commentCnt_min) \
			
 
				-            and int(rule_shareCnt_max) >= int(video_dict['share_cnt']) >= int(rule_shareCnt_min) \
			
 
				-            and int(rule_videoWidth_max) >= int(video_dict['video_width']) >= int(rule_videoWidth_min) \
			
 
				-            and int(rule_videoHeight_max) >= int(video_dict['video_height']) >= int(rule_videoHeight_min):
			
 
				-        return True
			
 
				-    else:
			
 
				-        return False
			
 
				+    # 格式化 video_dict:publish_time_stamp
			
 
				+    if "publish_time_stamp" in video_dict.keys():
			
 
				+        video_dict["publish_time"] = video_dict["publish_time_stamp"] * 1000
			
 
				+    # 格式化 video_dict:period
			
 
				+    if "period" not in video_dict.keys() and "publish_time" in video_dict.keys():
			
 
				+        video_dict["period"] = int((int(time.time() * 1000) - video_dict["publish_time"]) / (3600 * 24 * 1000))
			
 
				+    # 格式化 rule_dict 最大值取值为 0 的问题
			
 
				+    for rule_value in rule_dict.values():
			
 
				+        if rule_value["max"] == 0:
			
 
				+            rule_value["max"] = 999999999999999
			
 
				+    # 格式化 rule_dict 有的 key，video_dict 中没有的问题
			
 
				+    for rule_key in rule_dict.keys():
			
 
				+        if rule_key not in video_dict.keys():
			
 
				+            video_dict[rule_key] = int(rule_dict[rule_key]["max"] / 2)
			
 
				+    # 比较结果，输出：True / False
			
 
				+    for video_key, video_value in video_dict.items():
			
 
				+        for rule_key, rule_value in rule_dict.items():
			
 
				+            if video_key == rule_key:
			
 
				+                result = rule_value["min"] <= video_value <= rule_value["max"]
			
 
				+                # print(f'{video_key}: {rule_value["min"]} <= {video_value} <= {rule_value["max"]}，{result}')
			
 
				+                Common.logger(log_type, crawler).info(f'{video_key}: {rule_value["min"]} <= {video_value} <= {rule_value["max"]}，{result}')
			
 
				+                if result is False:
			
 
				+                    return False
			
 
				+                else:
			
 
				+                    continue
			
 
				+    return True
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     # print(filter_word('public', 'xiaoniangao', '小年糕', 'prod'))
			
 
				-    print(get_config_from_mysql('hour', 'xiaoniangao', 'prod', 'emoji'))
			
 
				+    # print(get_config_from_mysql('test', 'gongzhonghao', 'prod', 'filter'))
			
 
				+    # print(filter_word('test', 'gongzhonghao', '公众号', 'prod'))
			
 
				     # task_str = "[('task_id','11')," \
			
 
				     #            "('task_name','小年糕小时榜')," \
			
 
				     #            "('source','xiaoniangao')," \
			
--- a/gongzhonghao/gongzhonghao_follow/gongzhonghao_follow.py
+++ b/gongzhonghao/gongzhonghao_follow/gongzhonghao_follow.py
@@ -20,7 +20,7 @@ from selenium import webdriver
 
				 sys.path.append(os.getcwd())
			
 
				 from common.common import Common
			
 
				 from common.feishu import Feishu
			
 
				-from common.public import filter_word
			
 
				+from common.public import get_config_from_mysql
			
 
				 from common.publish import Publish
			
 
				 from common.scheduling_db import MysqlHelper
			
 
				 
			
@@ -372,8 +372,12 @@ class GongzhonghaoFollow:
 
				         if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
			
 
				             Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
			
 
				         # 标题敏感词过滤
			
 
				-        elif any(word if word in video_dict['video_title'] else False for word in
			
 
				-                 filter_word(log_type, crawler, "公众号", env)) is True:
			
 
				+        elif any(word if word in video_dict['video_title']
			
 
				+                 else False for word in get_config_from_mysql(log_type=log_type,
			
 
				+                                                              source=crawler,
			
 
				+                                                              env=env,
			
 
				+                                                              text="filter",
			
 
				+                                                              action="")) is True:
			
 
				             Common.logger(log_type, crawler).info("标题已中过滤词\n")
			
 
				         # 已下载判断
			
 
				         elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
			
@@ -534,11 +538,16 @@ class GongzhonghaoFollow:
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     # GongzhonghaoFollow.get_token(log_type="follow", crawler="gongzhonghao")
			
 
				-    GongzhonghaoFollow.get_users()
			
 
				+    # GongzhonghaoFollow.get_users()
			
 
				     # GongzhonghaoFollow.get_videoList(log_type="follow",
			
 
				     #                                  crawler="gongzhonghao",
			
 
				     #                                  user="香音难忘",
			
 
				     #                                  index=1,
			
 
				     #                                  oss_endpoint="out",
			
 
				     #                                  env="dev")
			
 
				+    print(get_config_from_mysql(log_type="test",
			
 
				+                              source="gongzhonghao",
			
 
				+                              env="prod",
			
 
				+                              text="filter",
			
 
				+                              action=""))
			
 
				     pass
			
--- a/gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_2.py
+++ b/gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_2.py
@@ -21,7 +21,7 @@ from selenium import webdriver
 
				 sys.path.append(os.getcwd())
			
 
				 from common.common import Common
			
 
				 from common.feishu import Feishu
			
 
				-from common.public import filter_word
			
 
				+from common.public import get_config_from_mysql
			
 
				 from common.publish import Publish
			
 
				 from common.scheduling_db import MysqlHelper
			
 
				 
			
@@ -373,8 +373,12 @@ class GongzhonghaoFollow2:
 
				         if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
			
 
				             Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
			
 
				         # 标题敏感词过滤
			
 
				-        elif any(word if word in video_dict['video_title'] else False for word in
			
 
				-                 filter_word(log_type, crawler, "公众号", env)) is True:
			
 
				+        elif any(word if word in video_dict['video_title']
			
 
				+                 else False for word in get_config_from_mysql(log_type=log_type,
			
 
				+                                                              source=crawler,
			
 
				+                                                              env=env,
			
 
				+                                                              text="filter",
			
 
				+                                                              action="")) is True:
			
 
				             Common.logger(log_type, crawler).info("标题已中过滤词\n")
			
 
				         # 已下载判断
			
 
				         elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
			
--- a/gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_3.py
+++ b/gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_3.py
@@ -21,7 +21,7 @@ from selenium import webdriver
 
				 sys.path.append(os.getcwd())
			
 
				 from common.common import Common
			
 
				 from common.feishu import Feishu
			
 
				-from common.public import filter_word
			
 
				+from common.public import get_config_from_mysql
			
 
				 from common.publish import Publish
			
 
				 from common.scheduling_db import MysqlHelper
			
 
				 
			
@@ -373,8 +373,12 @@ class GongzhonghaoFollow3:
 
				         if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
			
 
				             Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
			
 
				         # 标题敏感词过滤
			
 
				-        elif any(word if word in video_dict['video_title'] else False for word in
			
 
				-                 filter_word(log_type, crawler, "公众号", env)) is True:
			
 
				+        elif any(word if word in video_dict['video_title']
			
 
				+                 else False for word in get_config_from_mysql(log_type=log_type,
			
 
				+                                                              source=crawler,
			
 
				+                                                              env=env,
			
 
				+                                                              text="filter",
			
 
				+                                                              action="")) is True:
			
 
				             Common.logger(log_type, crawler).info("标题已中过滤词\n")
			
 
				         # 已下载判断
			
 
				         elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
			
--- a/main/process.sh
+++ b/main/process.sh
@@ -68,35 +68,35 @@ else
 
				   echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕定向爬虫策略 进程状态正常" >> ${log_path}
			
 
				 fi
			
 
				 
			
 
				-## 小年糕小时榜爬虫策略
			
 
				-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 小年糕小时榜爬虫策略 进程状态" >> ${log_path}
			
 
				-#ps -ef | grep "run_xiaoniangao_hour.py" | grep -v "grep"
			
 
				-#if [ "$?" -eq 1 ];then
			
 
				-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
			
 
				-#  if [ ${env} = "dev" ];then
			
 
				-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py --log_type="hour" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-hour.log
			
 
				-#  else
			
 
				-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py --log_type="hour" --crawler="xiaoniangao" --env="prod" xiaoniangao/logs/nohup-hour.log
			
 
				-#  fi
			
 
				-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
			
 
				-#else
			
 
				-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕小时榜爬虫策略 进程状态正常" >> ${log_path}
			
 
				-#fi
			
 
				+# 小年糕小时榜爬虫策略
			
 
				+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 小年糕小时榜爬虫策略 进程状态" >> ${log_path}
			
 
				+ps -ef | grep "run_xiaoniangao_hour.py" | grep -v "grep"
			
 
				+if [ "$?" -eq 1 ];then
			
 
				+  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
			
 
				+  if [ ${env} = "dev" ];then
			
 
				+    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py --log_type="hour" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-hour.log
			
 
				+  else
			
 
				+    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py --log_type="hour" --crawler="xiaoniangao" --env="prod" xiaoniangao/logs/nohup-hour.log
			
 
				+  fi
			
 
				+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
			
 
				+else
			
 
				+  echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕小时榜爬虫策略 进程状态正常" >> ${log_path}
			
 
				+fi
			
 
				 
			
 
				-## 小年糕播放量榜爬虫策略
			
 
				-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 播放量榜爬虫策略 进程状态" >> ${log_path}
			
 
				-#ps -ef | grep "run_xiaoniangao_play.py" | grep -v "grep"
			
 
				-#if [ "$?" -eq 1 ];then
			
 
				-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
			
 
				-#  if [ ${env} = "dev" ];then
			
 
				-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py --log_type="play" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-play.log
			
 
				-#  else
			
 
				-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py --log_type="play" --crawler="xiaoniangao" --env="prod" xiaoniangao/logs/nohup-play.log
			
 
				-#  fi
			
 
				-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
			
 
				-#else
			
 
				-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 播放量榜爬虫策略 进程状态正常" >> ${log_path}
			
 
				-#fi
			
 
				+# 小年糕播放量榜爬虫策略
			
 
				+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 播放量榜爬虫策略 进程状态" >> ${log_path}
			
 
				+ps -ef | grep "run_xiaoniangao_play.py" | grep -v "grep"
			
 
				+if [ "$?" -eq 1 ];then
			
 
				+  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
			
 
				+  if [ ${env} = "dev" ];then
			
 
				+    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py --log_type="play" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-play.log
			
 
				+  else
			
 
				+    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py --log_type="play" --crawler="xiaoniangao" --env="prod" xiaoniangao/logs/nohup-play.log
			
 
				+  fi
			
 
				+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
			
 
				+else
			
 
				+  echo "$(date "+%Y-%m-%d %H:%M:%S") 播放量榜爬虫策略 进程状态正常" >> ${log_path}
			
 
				+fi
			
 
				 
			
 
				 
			
 
				 # 快手定向爬虫策略
			
--- a/scheduling/scheduling_v3/demo.py
+++ b/scheduling/scheduling_v3/demo.py
@@ -1,105 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-# @Author: wangkun
			
 
				-# @Time: 2023/4/19
			
 
				-import time
			
 
				-from datetime import date, timedelta
			
 
				-
			
 
				-# import ast
			
 
				-# task_str = "[('task_id','11')," \
			
 
				-#            "('task_name','小年糕小时榜')," \
			
 
				-#            "('source','xiaoniangao')," \
			
 
				-#            "('start_time','1681834560000')," \
			
 
				-#            "('interval','1'),('mode','hour')," \
			
 
				-#            "('rule','[{'duration':{'min':40,'max':0}},{'playCnt':{'min':4000,'max':0}},{'period':{'min':10,'max':0}},{'fans':{'min':0,'max':0}},{'videos':{'min':0,'max':0}},{'like':{'min':0,'max':0}},{'videoWidth':{'min':0,'max':0}},{'videoHeight':{'min':0,'max':0}}]')," \
			
 
				-#            "('spider_name','')," \
			
 
				-#            "('machine','')," \
			
 
				-#            "('status','0')," \
			
 
				-#            "('create_time','1681889875288')," \
			
 
				-#            "('update_time','1681889904908')," \
			
 
				-#            "('operator','王坤')]"
			
 
				-# task_str = task_str.replace("'[{", '[{').replace("}}]'", '}}]')
			
 
				-# print(task_str)
			
 
				-# task_list = eval(task_str)
			
 
				-# print(task_list)
			
 
				-# print(type(task_list))
			
 
				-# task_dict = dict(task_list)
			
 
				-# print(task_dict)
			
 
				-# print(type(task_dict))
			
 
				-#
			
 
				-#
			
 
				-# rule = task_dict['rule']
			
 
				-# print(type(rule))
			
 
				-# print(rule)
			
 
				-# print(task_dict)
			
 
				-# task_dict['rule'] = dict()
			
 
				-# for item in rule:
			
 
				-#     for k, val in item.items():
			
 
				-#         task_dict['rule'][k] = val
			
 
				-# print('\n')
			
 
				-# print(task_dict['rule'])
			
 
				-# print('\n')
			
 
				-# print(task_dict)
			
 
				-
			
 
				-# str1 = task_str.split(",('rule',")[0]+"]"
			
 
				-# print(type(str1))
			
 
				-# print(str1)
			
 
				-# eval1 = eval(str1)
			
 
				-# print(type(eval1), eval1)
			
 
				-# dict1 = dict(eval1)
			
 
				-# print(type(dict1), dict1)
			
 
				-# print("============\n")
			
 
				-#
			
 
				-# "[{'duration':{'min':40,'max':0}},{'playCnt':{'min':4000,'max':0}},{'period':{'min':10,'max':0}},{'fans':{'min':0,'max':0}},{'videos':{'min':0,'max':0}},{'like':{'min':0,'max':0}},{'videoWidth':{'min':0,'max':0}},{'videoHeight':{'min':0,'max':0}}]"
			
 
				-# print(task_str.split(",('rule',")[-1].split(",('spider_name'")[0].replace(")", ""))
			
 
				-# # rule_str = task_str.split(",('rule',")[-1].split(",('spider_name'")[0].replace(")", "").replace("'[{", '[{').replace("}}]'", '}}]')
			
 
				-# rule_str = task_str.split(",('rule',")[-1].split(",('spider_name'")[0].replace(")", "")[1:-1]
			
 
				-# print(type(rule_str))
			
 
				-# print(rule_str)
			
 
				-#
			
 
				-#
			
 
				-# rule_list = eval(rule_str)
			
 
				-# print(type(rule_list))
			
 
				-# print(rule_list)
			
 
				-
			
 
				-# rule_dict = {'duration': {'min': 40, 'max': 0}, 'playCnt': {'min': 4000, 'max': 0}, 'period': {'min': 10, 'max': 0}, 'fans': {'min': 0, 'max': 0}, 'videos': {'min': 0, 'max': 0}, 'like': {'min': 0, 'max': 0}, 'videoWidth': {'min': 0, 'max': 0}, 'videoHeight': {'min': 0, 'max': 0}}
			
 
				-# rule_dict = {}
			
 
				-#
			
 
				-# for k, v in rule_dict.items():
			
 
				-#     print(f"{k}:{v}")
			
 
				-#
			
 
				-# rule_duration_min = rule_dict.get('duration', {}).get('min', 0)
			
 
				-# rule_duration_max = rule_dict.get('duration', {}).get('max', 100000000)
			
 
				-# rule_playCnt_min = rule_dict.get('playCnt', {}).get('min', 0)
			
 
				-# rule_playCnt_max = rule_dict.get('playCnt', {}).get('max', 100000000)
			
 
				-# rule_period_min = rule_dict.get('period', {}).get('min', 0)
			
 
				-# rule_period_max = rule_dict.get('period', {}).get('max', 100000000)
			
 
				-# rule_fans_min = rule_dict.get('fans', {}).get('min', 0)
			
 
				-# rule_fans_max = rule_dict.get('fans', {}).get('max', 100000000)
			
 
				-# rule_videos_min = rule_dict.get('videos', {}).get('min', 0)
			
 
				-# rule_videos_max = rule_dict.get('videos', {}).get('max', 100000000)
			
 
				-# rule_like_min = rule_dict.get('like', {}).get('min', 0)
			
 
				-# rule_like_max = rule_dict.get('like', {}).get('max', 100000000)
			
 
				-# rule_videoWidth_min = rule_dict.get('videoWidth', {}).get('min', 0)
			
 
				-# rule_videoWidth_max = rule_dict.get('videoWidth', {}).get('max', 100000000)
			
 
				-# rule_videoHeight_min = rule_dict.get('videoWidth', {}).get('min', 0)
			
 
				-# rule_videoHeight_max = rule_dict.get('videoWidth', {}).get('max', 100000000)
			
 
				-#
			
 
				-# print(f"rule_duration_min:{rule_duration_min}")
			
 
				-# print(f"rule_duration_max:{rule_duration_max}")
			
 
				-# print(f"rule_playCnt_min:{rule_playCnt_min}")
			
 
				-# print(f"rule_playCnt_max:{rule_playCnt_max}")
			
 
				-# print(f"rule_period_min:{rule_period_min}")
			
 
				-# print(f"rule_period_max:{rule_period_max}")
			
 
				-# print(f"rule_fans_min:{rule_fans_min}")
			
 
				-# print(f"rule_fans_max:{rule_fans_max}")
			
 
				-# print(f"rule_videos_min:{rule_videos_min}")
			
 
				-# print(f"rule_videos_max:{rule_videos_max}")
			
 
				-# print(f"rule_videoWidth_min:{rule_videoWidth_min}")
			
 
				-# print(f"rule_videoWidth_max:{rule_videoWidth_max}")
			
 
				-# print(f"rule_videoHeight_min:{rule_videoHeight_min}")
			
 
				-# print(f"rule_videoHeight_max:{rule_videoHeight_max}")
			
 
				-
			
 
				-time_str = (date.today() + timedelta(days=-10)).strftime("%Y-%m-%d %H:%M:%S")
			
 
				-time_stamp = int(time.mktime(time.strptime(time_str, "%Y-%m-%d %H:%M:%S")))
			
 
				-print(time_str)
			
 
				-print(time_stamp)
			
--- a/suisuiniannianyingfuqi/.DS_Store
+++ b/suisuiniannianyingfuqi/.DS_Store
--- a/suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/demo.py
+++ b/suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/demo.py
@@ -31,19 +31,20 @@ class Demo:
 
				             "publish_time_stamp": 1683648000,  # 2023-05-10 00:00:00
			
 
				             "video_url": "www.baidu.com"
			
 
				         }
			
 
				-        rule_dict = {"play_cnt": {"min": 0, "max": 0},
			
 
				-                     "fans_cnt": {"min": 0, "max": 0},
			
 
				-                     "videos_cnt": {"min": 0, "max": 0},
			
 
				-                     "like_cnt": {"min": 0, "max": 0},
			
 
				-                     "video_width": {"min": 0, "max": 0},
			
 
				-                     "video_height": {"min": 0, "max": 0},
			
 
				-                     "duration": {"min": 0, "max": 0},
			
 
				-                     "share_cnt": {"min": 0, "max": 0},
			
 
				-                     "comment_cnt": {"min": 0, "max": 0},
			
 
				-                     "favorite_cnt": {"min": 0, "max": 0},
			
 
				-                     # "period": {"min": 10, "max": 0},
			
 
				-                     "publish_time": {"min": 1673734400000, "max": 0}
			
 
				-                     }
			
 
				+        rule_dict = {
			
 
				+             # "play_cnt": {"min": 0, "max": 0},
			
 
				+             # "fans_cnt": {"min": 0, "max": 0},
			
 
				+             # "videos_cnt": {"min": 0, "max": 0},
			
 
				+             # "like_cnt": {"min": 0, "max": 0},
			
 
				+             # "video_width": {"min": 0, "max": 0},
			
 
				+             # "video_height": {"min": 0, "max": 0},
			
 
				+             # "duration": {"min": 0, "max": 0},
			
 
				+             # "share_cnt": {"min": 0, "max": 0},
			
 
				+             # "comment_cnt": {"min": 0, "max": 0},
			
 
				+             # "favorite_cnt": {"min": 0, "max": 0},
			
 
				+             # "period": {"min": 10, "max": 0},
			
 
				+             # "publish_time": {"min": 1673734400000, "max": 0}
			
 
				+        }
			
 
				 
			
 
				         # 格式化 video_dict:publish_time_stamp
			
 
				         if "publish_time_stamp" in video_dict.keys():
			
--- a/suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_suisuiniannianyingfuqi_recommend_scheduling.py
+++ b/suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_suisuiniannianyingfuqi_recommend_scheduling.py
@@ -32,7 +32,7 @@ def main(log_type, crawler, task, env):
 
				                                                             rule_dict=rule_dict,
			
 
				                                                             env=env)
			
 
				     Common.del_logs(log_type, crawler)
			
 
				-    Common.logger(log_type, crawler).info('抓取完一轮，休眠 1 分钟\n')
			
 
				+    Common.logger(log_type, crawler).info('抓取完一轮\n')
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
--- a/suisuiniannianyingfuqi/suisuiniannianyingfuqi_recommend/suisuiniannianyingfuqi_recommend_scheduling.py
+++ b/suisuiniannianyingfuqi/suisuiniannianyingfuqi_recommend/suisuiniannianyingfuqi_recommend_scheduling.py
@@ -13,6 +13,7 @@ sys.path.append(os.getcwd())
 
				 from common.common import Common
			
 
				 from common.feishu import Feishu
			
 
				 from common.publish import Publish
			
 
				+from common.public import download_rule
			
 
				 from common.scheduling_db import MysqlHelper
			
 
				 
			
 
				 
			
@@ -68,39 +69,30 @@ class SuisuiniannianyingfuqiRecommendScheduling:
 
				                     feeds = response.json()['data']['video_list']['data']
			
 
				                     for i in range(len(feeds)):
			
 
				                         try:
			
 
				-                            video_title = feeds[i].get('title', "").replace("'", "").replace('"', '')
			
 
				-                            video_id = str(feeds[i].get('id', ''))
			
 
				-                            play_cnt = feeds[i].get('browse', 0)
			
 
				-                            comment_cnt = 0
			
 
				-                            like_cnt = 0
			
 
				-                            share_cnt = 0
			
 
				                             publish_time_str = feeds[i].get('createtime', '')
			
 
				                             publish_time_stamp = int(time.mktime(time.strptime(publish_time_str, "%Y-%m-%d")))
			
 
				-                            user_name = "岁岁年年迎福气"
			
 
				-                            user_id = "suisuiniannianyingfuqi"
			
 
				-                            cover_url = feeds[i].get('thumb', '')
			
 
				-                            video_url = feeds[i].get('url', '')
			
 
				-
			
 
				-                            video_dict = {'video_title': video_title,
			
 
				-                                          'video_id': video_id,
			
 
				-                                          'play_cnt': play_cnt,
			
 
				-                                          'comment_cnt': comment_cnt,
			
 
				-                                          'like_cnt': like_cnt,
			
 
				-                                          'share_cnt': share_cnt,
			
 
				+                            video_dict = {'video_title': feeds[i].get('title', "").replace("'", "").replace('"', ''),
			
 
				+                                          'video_id': str(feeds[i].get('id', '')),
			
 
				+                                          'play_cnt': feeds[i].get('browse', 0),
			
 
				+                                          'comment_cnt': 0,
			
 
				+                                          'like_cnt': 0,
			
 
				+                                          'share_cnt': 0,
			
 
				                                           'publish_time_stamp': publish_time_stamp,
			
 
				                                           'publish_time_str': publish_time_str,
			
 
				-                                          'user_name': user_name,
			
 
				-                                          'user_id': user_id,
			
 
				-                                          'avatar_url': cover_url,
			
 
				-                                          'cover_url': cover_url,
			
 
				-                                          'video_url': video_url,
			
 
				+                                          'user_name': "岁岁年年迎福气",
			
 
				+                                          'user_id': "suisuiniannianyingfuqi",
			
 
				+                                          'avatar_url': feeds[i].get('thumb', ''),
			
 
				+                                          'cover_url': feeds[i].get('thumb', ''),
			
 
				+                                          'video_url': feeds[i].get('url', ''),
			
 
				                                           'session': f"suisuiniannianyingfuqi-{int(time.time())}"}
			
 
				                             for k, v in video_dict.items():
			
 
				                                 Common.logger(log_type, crawler).info(f"{k}:{v}")
			
 
				 
			
 
				-                            if video_id == '' or video_title == '' or cover_url == '' or video_url == '':
			
 
				+                            if video_dict["video_id"] == '' or video_dict["video_title"] == '' or video_dict["cover_url"] == '' or video_dict["video_url"] == '':
			
 
				                                 Common.logger(log_type, crawler).info('无效视频\n')
			
 
				-                            elif cls.repeat_video(log_type, crawler, video_id, env) != 0:
			
 
				+                            elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
			
 
				+                                Common.logger(log_type, crawler).info("不满足抓取规则\n")
			
 
				+                            elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
			
 
				                                 Common.logger(log_type, crawler).info('视频已下载\n')
			
 
				                             else:
			
 
				                                 cls.download_publish(log_type=log_type,
			
--- a/xiaoniangao/xiaoniangao_follow/xiaoniangao_follow.py
+++ b/xiaoniangao/xiaoniangao_follow/xiaoniangao_follow.py
@@ -14,7 +14,7 @@ from common.common import Common
 
				 from common.scheduling_db import MysqlHelper
			
 
				 from common.publish import Publish
			
 
				 from common.feishu import Feishu
			
 
				-from common.public import filter_word
			
 
				+from common.public import get_config_from_mysql
			
 
				 proxies = {"http": None, "https": None}
			
 
				 
			
 
				 
			
@@ -309,7 +309,11 @@ class XiaoniangaoFollow:
 
				         elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
			
 
				             Common.logger(log_type, crawler).info('视频已下载\n')
			
 
				         elif any(str(word) if str(word) in video_dict['video_title'] else False for word in
			
 
				-                 filter_word(log_type, crawler, "小年糕", env)) is True:
			
 
				+                 get_config_from_mysql(log_type=log_type,
			
 
				+                                       source=crawler,
			
 
				+                                       env=env,
			
 
				+                                       text="filter",
			
 
				+                                       action="")) is True:
			
 
				             Common.logger(log_type, crawler).info("视频已中过滤词\n")
			
 
				         else:
			
 
				             # 下载封面
			
--- a/xiaoniangao/xiaoniangao_hour/xiaoniangao_hour.py
+++ b/xiaoniangao/xiaoniangao_hour/xiaoniangao_hour.py
@@ -16,7 +16,7 @@ from common.common import Common
 
				 from common.feishu import Feishu
			
 
				 from common.publish import Publish
			
 
				 from common.scheduling_db import MysqlHelper
			
 
				-from common.public import filter_word
			
 
				+from common.public import get_config_from_mysql
			
 
				 
			
 
				 proxies = {"http": None, "https": None}
			
 
				 
			
@@ -329,9 +329,12 @@ class XiaoniangaoHour:
 
				                     Common.logger(log_type, crawler).info("不满足基础门槛规则\n")
			
 
				                 elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
			
 
				                     Common.logger(log_type, crawler).info('视频已下载\n')
			
 
				-                # 过滤敏感词
			
 
				-                elif any(str(word) if str(word) in video_title else False for word in
			
 
				-                         filter_word(log_type, crawler, "小年糕", env)) is True:
			
 
				+                elif any(str(word) if str(word) in video_dict['video_title'] else False for word in
			
 
				+                         get_config_from_mysql(log_type=log_type,
			
 
				+                                               source=crawler,
			
 
				+                                               env=env,
			
 
				+                                               text="filter",
			
 
				+                                               action="")) is True:
			
 
				                     Common.logger(log_type, crawler).info("视频已中过滤词\n")
			
 
				                     time.sleep(1)
			
 
				                 else:
			
--- a/xiaoniangao/xiaoniangao_play/xiaoniangao_play.py
+++ b/xiaoniangao/xiaoniangao_play/xiaoniangao_play.py
@@ -9,14 +9,12 @@ import sys
 
				 import time
			
 
				 import requests
			
 
				 import urllib3
			
 
				-
			
 
				-from common.public import filter_word
			
 
				-from common.scheduling_db import MysqlHelper
			
 
				-
			
 
				 sys.path.append(os.getcwd())
			
 
				 from common.common import Common
			
 
				 from common.feishu import Feishu
			
 
				 from common.publish import Publish
			
 
				+from common.public import get_config_from_mysql
			
 
				+from common.scheduling_db import MysqlHelper
			
 
				 proxies = {"http": None, "https": None}
			
 
				 
			
 
				 
			
@@ -326,8 +324,12 @@ class XiaoniangaoPlay:
 
				         # 去重
			
 
				         elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
			
 
				             Common.logger(log_type, crawler).info("视频已下载\n")
			
 
				-        # 过滤词库
			
 
				-        elif any(str(word) if str(word) in video_dict['video_title'] else False for word in filter_word(log_type, crawler, "小年糕", env)) is True:
			
 
				+        elif any(str(word) if str(word) in video_dict['video_title'] else False for word in
			
 
				+                 get_config_from_mysql(log_type=log_type,
			
 
				+                                       source=crawler,
			
 
				+                                       env=env,
			
 
				+                                       text="filter",
			
 
				+                                       action="")) is True:
			
 
				             Common.logger(log_type, crawler).info("视频已中过滤词\n")
			
 
				         else:
			
 
				             # 下载封面