Server
/
piaoquan_crawler


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
							# -*- coding: utf-8 -*-
# @Author: wangkun
# @Time: 2023/3/27
import os, sys
import random

sys.path.append(os.getcwd())
from common.common import Common
from common.scheduling_db import MysqlHelper
# from common import Common
# from scheduling_db import MysqlHelper


# 过滤词库
def filter_word(log_type, crawler, source, env):
    """
    过滤词库
    :param log_type: 日志
    :param crawler: 哪款爬虫，如：xiaoniangao
    :param source: 哪款爬虫，如：小年糕
    :param env: 环境
    :return: word_list
    """
    select_sql = f""" select * from crawler_filter_word where source="{source}" """
    words = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='')
    word_list = []
    if len(words) == 0:
        return word_list
    for word in words:
        word_list.append(word['filter_word'])

    return word_list


def get_user_from_mysql(log_type, crawler, source, env, action=''):
    sql = f"select * from crawler_user_v3 where source='{source}' and mode='{log_type}'"
    results = MysqlHelper.get_values(log_type, crawler, sql, env, action=action)
    if results:
        return results
    else:
        Common.logger(log_type, crawler).warning(f"爬虫:{crawler},没有查到抓取名单")
        return []


def get_config_from_mysql(log_type, source, env, text, action=''):
    select_sql = f"""select * from crawler_config where source="{source}" """
    contents = MysqlHelper.get_values(log_type, source, select_sql, env, action=action)
    title_list = []
    filter_list = []
    emoji_list = []
    for content in contents:
        config = content['config']
        config_dict = eval(config)
        for k, v in config_dict.items():
            if k == "title":
                title_list_config = v.split(",")
                for title in title_list_config:
                    title_list.append(title)
            if k == "filter":
                filter_list_config = v.split(",")
                for filter_word in filter_list_config:
                    filter_list.append(filter_word)
            if k == "emoji":
                emoji_list_config = v.split(",")
                for emoji in emoji_list_config:
                    emoji_list.append(emoji)
    if text == "title":
        return title_list
    elif text == "filter":
        return filter_list
    elif text == "emoji":
        return emoji_list


def random_title(log_type, crawler, env, text):
    random_title_list = get_config_from_mysql(log_type, crawler, env, text)
    return random.choice(random_title_list)


def task_fun(task_str):
    task_str = task_str.replace("'[{", '[{').replace("}}]'", '}}]')
    task_dict = dict(eval(task_str))

    rule = task_dict['rule']
    task_dict['rule'] = dict()
    for item in rule:
        for k, val in item.items():
            task_dict['rule'][k] = val
    rule_dict = task_dict['rule']

    task_dict = {
        "task_dict": task_dict,
        "rule_dict": rule_dict
    }
    return task_dict

if __name__ == "__main__":
    # print(filter_word('public', 'xiaoniangao', '小年糕', 'prod'))
    print(get_config_from_mysql('hour', 'xiaoniangao', 'dev', 'emoji'))
    # task_str = "[('task_id','11')," \
    #            "('task_name','小年糕小时榜')," \
    #            "('source','xiaoniangao')," \
    #            "('start_time','1681834560000')," \
    #            "('interval','1'),('mode','hour')," \
    #            "('rule','[{'duration':{'min':40,'max':0}},{'playCnt':{'min':4000,'max':0}},{'period':{'min':10,'max':0}},{'fans':{'min':0,'max':0}},{'videos':{'min':0,'max':0}},{'like':{'min':0,'max':0}},{'videoWidth':{'min':0,'max':0}},{'videoHeight':{'min':0,'max':0}}]')," \
    #            "('spider_name','')," \
    #            "('machine','')," \
    #            "('status','0')," \
    #            "('create_time','1681889875288')," \
    #            "('update_time','1681889904908')," \
    #            "('operator','王坤')]"
    # print(task(task_str))
    pass