| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239 | import osimport sysimport timeimport requestsfrom flask import Flask, requestfrom flask import jsonifysys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))from common.db.mysql_help import MysqlHelperfrom user_spider.user_info import *app = Flask(__name__)app.config['JSON_AS_ASCII'] = False# 只接受get方法访问@app.route("/v1/crawler/source/getall", methods=["GET"])def getSource():    try:        # 获取传入的params参数        get_data = request.args.to_dict()        # # 对参数进行操作        sql = 'select source, task_type, spider_name, machine, source_desc, task_type_desc, spider_name_desc from crawler_source'        result = MysqlHelper.get_values(sql)        if not result:            return jsonify({'code': '200', 'result': [], 'message': '没有更多数据'})        source_list = list()        for source, task_type, spider_name, machine, source_desc, task_type_desc, spider_name_desc in result:            source_dict = {                'task_type': [                    {                        'type': task_type,                        'description': task_type_desc,                        'spider': {                            'spider_name': spider_name,                            'description': spider_name_desc                        }                    }                ],                'description': source_desc,                'source': source,                'machine': machine            }            source_list.append(source_dict)    except Exception as e:        return jsonify({'code': '400', 'message': '获取数据源信息失败'})    return jsonify({'code': '200', 'result': source_list})@app.route("/v1/crawler/task/insert", methods=["POST"])def insertTask():    try:        data = request.json        outer_info = data.get('spider_link')        source = data.get('source')        exist_outer_info = list()        for link in outer_info:            s_sql = f"""select spider_link from crawler_task where source={source}"""            result = MysqlHelper.get_values(s_sql)            if not result:                continue            if link in eval(result[0]):                exist_outer_info.append(link)        if exist_outer_info:            return jsonify({'code': 200, 'message': '名单重复', 'repeat_list': exist_outer_info})        # 字段转换        tag_name_list = []        content_tag_list = []        user_tag = data['user_tag']        user_content_tag = data['user_content_tag']        for tag in user_tag:            tag_name_list.append(tag['tagName'])        for tag in user_content_tag:            content_tag_list.append(tag['tagName'])        data['min_publish_time'] = int(data['min_publish_time'] / 1000)        data['next_time'] = int(data['next_time'] / 1000)        data['insert_time'] = int(time.time())        data['update_time'] = int(time.time())        data['spider_link'] = str(data['spider_link'])        data['spider_rule'] = str(data['spider_rule'])        data['user_tag'] = ','.join(str(i) for i in tag_name_list)        data['user_content_tag'] = ','.join(str(i) for i in content_tag_list)        # data['crawler_interval'] = data.pop('interval')        # 获取到一个以键且为逗号分隔的字符串,返回一个字符串        keys = ','.join(data.keys())        values = ','.join(['%s'] * len(data))        sql = 'insert into {table}({keys}) VALUES({values})'.format(table='crawler_task', keys=keys, values=values)        MysqlHelper.insert_values(sql, tuple(data.values()))    except Exception as e:        return jsonify({'code': 400, 'message': '任务写入失败'})    return jsonify({'code': 200, 'message': 'task create success'})@app.route("/v1/crawler/task/gettask", methods=["GET"])def getAllTask():    try:        get_data = request.args.to_dict()        page = int(get_data.get('page', 1))        offset = int(get_data.get('offset', 10))        start_count = (page * offset) - offset        end_count = page * offset        sql = f"""select task_id, task_name from crawler_task limit {start_count}, {end_count}"""        result = MysqlHelper.get_values(sql)        if not result:            return jsonify({'code': '200', 'result': [], 'message': 'no data'})        source_list = list()        for task_id, task_name in result:            data = dict(                task_id=task_id,                task_name=task_name,            )            source_list.append(data)    except Exception as e:        return jsonify({"code": "400", 'message': "任务列表获取失败"})    return jsonify({'code': '200', 'result': source_list})@app.route("/v1/crawler/task/getone", methods=["GET"])def getOneTask():    try:        get_data = request.args.to_dict()        task_id = get_data['task_id']        sql = f'select task_id, spider_link from crawler_task where task_id={task_id}'        result = MysqlHelper.get_values(sql)        if not result:            return jsonify({'code': '200', 'result': [], 'message': 'no data'})        for task_id, spider_link in result:            data = dict(                task_id=task_id,                spider_link=spider_link,            )    except Exception as e:        return jsonify({'code': '400', "message": "获取任务信息失败"})    return jsonify({'code': '200', 'result': data})@app.route("/v1/crawler/task/update", methods=["POST"])def updateTask():    try:        task_id = request.json.get('task_id')        spider_link = request.json.get('spider_link')        print(spider_link, task_id)        sql = f"""UPDATE crawler_task SET spider_link='{spider_link}' where task_id = {task_id}"""        print(sql)        result = MysqlHelper.update_values(sql)        if result:            return jsonify({'code': 200, 'message': 'task update success'})        else:            return jsonify({'code': 400, 'message': 'task update faild'})    except Exception as e:        return jsonify({'code': 400, 'message': '任务更新失败'})def get_user_info(source):    source_spider = {        'xigua': xigua_user_info    }    return source_spider.get(source)@app.route("/v1/crawler/author/create", methods=["POST"])def createUser():    get_media_url = 'http://videotest-internal.yishihui.com/longvideoapi/user/virtual/crawler/registerVirtualUser'    spider_link = request.json.get('spider_link')    source = request.json.get('source')    task_type = request.json.get('task_type')    applets_status = request.json.get('applets_status')    app_status = request.json.get('app_status')    user_tag = request.json.get('user_tag')    user_content_tag = request.json.get('user_content_tag')    success_list = list()    fail_list = list()    for author_url in spider_link:        try:            post_data = {                # 'count': 1,     # (必须)账号个数:传1                # 'accountType': 4,   # (必须)账号类型 :传 4 app虚拟账号                'pwd': '',  # 密码 默认 12346                'nickName': '',  # 昵称  默认 vuser......                'avatarUrl': '',                # 头像Url  默认 http://weapppiccdn.yishihui.com/resources/images/pic_normal.png                'tagName': user_tag,  # 多条数据用英文逗号分割            }            response = requests.post(url=get_media_url, params=post_data)            media_id = response.json()['data']            f_sql = f"""select spider_link from crawler_author_map where spider_link="{author_url}" """            result = MysqlHelper.get_values(f_sql)            if result:                success_list.append(author_url)                continue            else:                tag_name_list = []                content_tag_list = []                for tag in user_tag:                    tag_name_list.append(tag['tagName'])                for tag in user_content_tag:                    content_tag_list.append(tag['tagName'])                data = dict(                    spider_link=author_url,                    media_id=media_id,                    source=source,                    task_type=task_type,                    applets_status=applets_status,                    app_status=app_status,                    user_tag=','.join(str(i) for i in tag_name_list),                    user_content_tag=','.join(str(i) for i in content_tag_list),                    insert_time=int(time.time()),                    update_time=int(time.time())                )                keys = ','.join(data.keys())                values = ','.join(['%s'] * len(data))                table = 'crawler_author_map'                sql = f"""insert into {table}({keys}) VALUES({values})"""                result = MysqlHelper.insert_values(sql, tuple(data.values()))                if not result:                    fail_list.append(author_url)                else:                    success_list.append(author_url)        except Exception as e:            fail_list.append(author_url)            continue    return jsonify({'code': 200, 'result': {'success': success_list, 'fail': fail_list}})if __name__ == "__main__":    app.run(debug=True, port=5050)
 |