Server
/
piaoquan_crawler


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243
							# -*- coding: utf-8 -*-
# @Author: wangkun
# @Time: 2023/3/27
import os, sys

sys.path.append(os.getcwd())
from common.common import Common
from common.scheduling_db import MysqlHelper


# 过滤词库
def filter_word(log_type, crawler, source, env):
    """
    过滤词库
    :param log_type: 日志
    :param crawler: 哪款爬虫，如：xiaoniangao
    :param source: 哪款爬虫，如：小年糕
    :param env: 环境
    :return: word_list
    """
    select_sql = f""" select * from crawler_filter_word where source="{source}" """
    words = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='')
    word_list = []
    if len(words) == 0:
        return word_list
    for word in words:
        word_list.append(word['filter_word'])

    return word_list


def get_user_from_mysql(log_type, crawler, source, env, action=''):
    sql = f"select * from crawler_author_map where source='{source}' and is_del=1"
    results = MysqlHelper.get_values(log_type, crawler, sql, env, action=action)
    if results:
        return results
    else:
        Common.logger(log_type, crawler).warning(f"爬虫:{crawler},没有查到抓取名单")
        return []


if __name__ == "__main__":
    print(filter_word('public', 'xiaoniangao', '小年糕', 'prod'))