public.py 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/3/27
  4. import os, sys
  5. sys.path.append(os.getcwd())
  6. from common.common import Common
  7. from common.scheduling_db import MysqlHelper
  8. # 过滤词库
  9. def filter_word(log_type, crawler, source, env):
  10. """
  11. 过滤词库
  12. :param log_type: 日志
  13. :param crawler: 哪款爬虫,如:xiaoniangao
  14. :param source: 哪款爬虫,如:小年糕
  15. :param env: 环境
  16. :return: word_list
  17. """
  18. select_sql = f""" select * from crawler_filter_word where source="{source}" """
  19. words = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='')
  20. word_list = []
  21. if len(words) == 0:
  22. return word_list
  23. for word in words:
  24. word_list.append(word['filter_word'])
  25. return word_list
  26. def get_user_from_mysql(log_type, crawler, source, env, action=''):
  27. sql = f"select * from crawler_author_map where source='{source}' and is_del=1"
  28. results = MysqlHelper.get_values(log_type, crawler, sql, env, action=action)
  29. if results:
  30. return results
  31. else:
  32. Common.logger(log_type, crawler).warning(f"爬虫:{crawler},没有查到抓取名单")
  33. return []
  34. if __name__ == "__main__":
  35. print(filter_word('public', 'xiaoniangao', '小年糕', 'prod'))