|  | @@ -1,7 +1,12 @@
 | 
	
		
			
				|  |  | -import json
 | 
	
		
			
				|  |  | +import os
 | 
	
		
			
				|  |  | +import sys
 | 
	
		
			
				|  |  | +import time
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  import requests
 | 
	
		
			
				|  |  |  from flask import Flask, request
 | 
	
		
			
				|  |  |  from flask import jsonify
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
 | 
	
		
			
				|  |  |  from common.db.mysql_help import MysqlHelper
 | 
	
		
			
				|  |  |  from user_spider.user_info import *
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -16,17 +21,18 @@ def getSource():
 | 
	
		
			
				|  |  |      get_data = request.args.to_dict()
 | 
	
		
			
				|  |  |      fields = get_data.get('fields')
 | 
	
		
			
				|  |  |      # # 对参数进行操作
 | 
	
		
			
				|  |  | -    sql = 'select source, task_type, spider_name from crawler_source'
 | 
	
		
			
				|  |  | +    sql = 'select source, task_type, spider_name, machine from crawler_source'
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      result = MysqlHelper.get_values(sql)
 | 
	
		
			
				|  |  |      if not result:
 | 
	
		
			
				|  |  |          return jsonify({'return_code': '200', 'result': [], 'message': 'no data'})
 | 
	
		
			
				|  |  |      source_list = list()
 | 
	
		
			
				|  |  | -    for source, task_type, spider_name in result:
 | 
	
		
			
				|  |  | +    for source, task_type, spider_name, machine in result:
 | 
	
		
			
				|  |  |          data = dict(
 | 
	
		
			
				|  |  |              source=source,
 | 
	
		
			
				|  |  |              task_type=task_type,
 | 
	
		
			
				|  |  | -            spider_name=spider_name
 | 
	
		
			
				|  |  | +            spider_name=spider_name,
 | 
	
		
			
				|  |  | +            machine=machine
 | 
	
		
			
				|  |  |          )
 | 
	
		
			
				|  |  |          source_list.append(data)
 | 
	
		
			
				|  |  |      return jsonify({'return_code': '200', 'result': source_list})
 | 
	
	
		
			
				|  | @@ -39,7 +45,7 @@ def insertTask():
 | 
	
		
			
				|  |  |      source = data.get('source')
 | 
	
		
			
				|  |  |      exist_outer_info = list()
 | 
	
		
			
				|  |  |      for link in outer_info:
 | 
	
		
			
				|  |  | -        s_sql = f'select spider_link from crawler_task where source={source}'
 | 
	
		
			
				|  |  | +        s_sql = f"""select spider_link from crawler_task where source={source}"""
 | 
	
		
			
				|  |  |          result = MysqlHelper.get_values(s_sql)
 | 
	
		
			
				|  |  |          if link in eval(result[0]):
 | 
	
		
			
				|  |  |              exist_outer_info.append(link)
 | 
	
	
		
			
				|  | @@ -57,14 +63,14 @@ def insertTask():
 | 
	
		
			
				|  |  |  @app.route("/v1/crawler/task/gettask", methods=["GET"])
 | 
	
		
			
				|  |  |  def getAllTask():
 | 
	
		
			
				|  |  |      get_data = request.args.to_dict()
 | 
	
		
			
				|  |  | -    page = get_data.get('page', 1)
 | 
	
		
			
				|  |  | -    offset = get_data.get('offset', 10)
 | 
	
		
			
				|  |  | +    page = int(get_data.get('page', 1))
 | 
	
		
			
				|  |  | +    offset = int(get_data.get('offset', 10))
 | 
	
		
			
				|  |  |      start_count = (page * offset) - offset
 | 
	
		
			
				|  |  |      end_count = page * offset
 | 
	
		
			
				|  |  |      sql = f"""select task_id, task_name from crawler_task limit {start_count}, {end_count}"""
 | 
	
		
			
				|  |  |      result = MysqlHelper.get_values(sql)
 | 
	
		
			
				|  |  |      if not result:
 | 
	
		
			
				|  |  | -        return jsonify({'return_code': '200', 'result': [], 'message': 'no data'})
 | 
	
		
			
				|  |  | +        return jsonify({'code': '200', 'result': [], 'message': 'no data'})
 | 
	
		
			
				|  |  |      source_list = list()
 | 
	
		
			
				|  |  |      for task_id, task_name in result:
 | 
	
		
			
				|  |  |          data = dict(
 | 
	
	
		
			
				|  | @@ -116,27 +122,62 @@ def get_user_info(source):
 | 
	
		
			
				|  |  |  @app.route("/v1/crawler/author/create", methods=["POST"])
 | 
	
		
			
				|  |  |  def createUser():
 | 
	
		
			
				|  |  |      get_media_url = 'http://videotest-internal.yishihui.com/longvideoapi/user/virtual/crawler/registerVirtualUser'
 | 
	
		
			
				|  |  | -    data = request.form.get('spider_link')
 | 
	
		
			
				|  |  | +    print(request.form.to_dict())
 | 
	
		
			
				|  |  | +    spider_link = request.form.get('spider_link')
 | 
	
		
			
				|  |  | +    print(111111,spider_link,type(spider_link))
 | 
	
		
			
				|  |  |      source = request.form.get('source')
 | 
	
		
			
				|  |  | +    task_type = request.form.get('task_type')
 | 
	
		
			
				|  |  | +    applets_status = request.form.get('applets_status')
 | 
	
		
			
				|  |  | +    app_status = request.form.get('app_status')
 | 
	
		
			
				|  |  |      user_tag = request.form.get('user_tag')
 | 
	
		
			
				|  |  | -    for author_url in eval(data):
 | 
	
		
			
				|  |  | -        # crawler = get_user_info(source)
 | 
	
		
			
				|  |  | -        # user_info = crawler(author_url)
 | 
	
		
			
				|  |  | -        post_data = {
 | 
	
		
			
				|  |  | -            # 'count': 1,     # (必须)账号个数:传1
 | 
	
		
			
				|  |  | -            # 'accountType': 4,   # (必须)账号类型 :传 4 app虚拟账号
 | 
	
		
			
				|  |  | -            'pwd': '',  # 密码 默认 12346
 | 
	
		
			
				|  |  | -            'nickName': '',  # 昵称  默认 vuser......
 | 
	
		
			
				|  |  | -            'avatarUrl': '',
 | 
	
		
			
				|  |  | -            # 头像Url  默认 http://weapppiccdn.yishihui.com/resources/images/pic_normal.png
 | 
	
		
			
				|  |  | -            'tagName': user_tag,  # 多条数据用英文逗号分割
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        response = requests.post(url=get_media_url, params=post_data)
 | 
	
		
			
				|  |  | -        # print(response.text)
 | 
	
		
			
				|  |  | -        media_id = response.json()['data']
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    return jsonify({'data': data})
 | 
	
		
			
				|  |  | +    user_content_tag = request.form.get('user_content_tag')
 | 
	
		
			
				|  |  | +    success_list = list()
 | 
	
		
			
				|  |  | +    fail_list = list()
 | 
	
		
			
				|  |  | +    for author_url in eval(spider_link):
 | 
	
		
			
				|  |  | +        try:
 | 
	
		
			
				|  |  | +            post_data = {
 | 
	
		
			
				|  |  | +                # 'count': 1,     # (必须)账号个数:传1
 | 
	
		
			
				|  |  | +                # 'accountType': 4,   # (必须)账号类型 :传 4 app虚拟账号
 | 
	
		
			
				|  |  | +                'pwd': '',  # 密码 默认 12346
 | 
	
		
			
				|  |  | +                'nickName': '',  # 昵称  默认 vuser......
 | 
	
		
			
				|  |  | +                'avatarUrl': '',
 | 
	
		
			
				|  |  | +                # 头像Url  默认 http://weapppiccdn.yishihui.com/resources/images/pic_normal.png
 | 
	
		
			
				|  |  | +                'tagName': user_tag,  # 多条数据用英文逗号分割
 | 
	
		
			
				|  |  | +            }
 | 
	
		
			
				|  |  | +            response = requests.post(url=get_media_url, params=post_data)
 | 
	
		
			
				|  |  | +            media_id = response.json()['data']
 | 
	
		
			
				|  |  | +            f_sql = f"""select spider_link from crawler_author_map where spider_link="{author_url}" """
 | 
	
		
			
				|  |  | +            result = MysqlHelper.get_values(f_sql)
 | 
	
		
			
				|  |  | +            if result:
 | 
	
		
			
				|  |  | +                success_list.append(author_url)
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  | +            else:
 | 
	
		
			
				|  |  | +                data = dict(
 | 
	
		
			
				|  |  | +                    spider_link=author_url,
 | 
	
		
			
				|  |  | +                    media_id=media_id,
 | 
	
		
			
				|  |  | +                    source=source,
 | 
	
		
			
				|  |  | +                    task_type=task_type,
 | 
	
		
			
				|  |  | +                    applets_status=applets_status,
 | 
	
		
			
				|  |  | +                    app_status=app_status,
 | 
	
		
			
				|  |  | +                    user_tag=user_tag,
 | 
	
		
			
				|  |  | +                    user_content_tag=user_content_tag,
 | 
	
		
			
				|  |  | +                    insert_time=int(time.time()),
 | 
	
		
			
				|  |  | +                    update_time=int(time.time())
 | 
	
		
			
				|  |  | +                )
 | 
	
		
			
				|  |  | +                keys = ','.join(data.keys())
 | 
	
		
			
				|  |  | +                values = ','.join(['%s'] * len(data))
 | 
	
		
			
				|  |  | +                table = 'crawler_author_map'
 | 
	
		
			
				|  |  | +                sql = f"""insert into {table}({keys}) VALUES({values})"""
 | 
	
		
			
				|  |  | +                result = MysqlHelper.insert_values(sql, tuple(data.values()))
 | 
	
		
			
				|  |  | +                if not result:
 | 
	
		
			
				|  |  | +                    fail_list.append(author_url)
 | 
	
		
			
				|  |  | +                else:
 | 
	
		
			
				|  |  | +                    success_list.append(author_url)
 | 
	
		
			
				|  |  | +        except Exception as e:
 | 
	
		
			
				|  |  | +            fail_list.append(author_url)
 | 
	
		
			
				|  |  | +            continue
 | 
	
		
			
				|  |  | +    return jsonify({'code': 200, 'result': {'success': success_list, 'fail': fail_list}})
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  if __name__ == "__main__":
 | 
	
		
			
				|  |  | -    app.run(debug=True,port=5050)
 | 
	
		
			
				|  |  | +    app.run(debug=True, port=5050)
 |