|
@@ -1,14 +1,16 @@
|
|
|
import json
|
|
|
+import requests
|
|
|
from flask import Flask, request
|
|
|
from flask import jsonify
|
|
|
from common.db.mysql_help import MysqlHelper
|
|
|
+from user_spider.user_info import *
|
|
|
|
|
|
app = Flask(__name__)
|
|
|
app.config['JSON_AS_ASCII'] = False
|
|
|
|
|
|
|
|
|
# 只接受get方法访问
|
|
|
-@app.route("/v1/source/getinfo", methods=["GET"])
|
|
|
+@app.route("/v1/crawler/source/getall", methods=["GET"])
|
|
|
def getSource():
|
|
|
# 获取传入的params参数
|
|
|
get_data = request.args.to_dict()
|
|
@@ -30,21 +32,36 @@ def getSource():
|
|
|
return jsonify({'return_code': '200', 'result': source_list})
|
|
|
|
|
|
|
|
|
-@app.route("/v1/task/insert", methods=["POST"])
|
|
|
-def inerttask():
|
|
|
- pass
|
|
|
- # 获取传入的参数
|
|
|
- # get_data = request.args.to_dict()
|
|
|
- # 传入的参数为bytes类型,需要转化成json
|
|
|
+@app.route("/v1/crawler/task/insert", methods=["POST"])
|
|
|
+def insertTask():
|
|
|
+ data = request.form
|
|
|
+ outer_info = data.get(('spider_link'))
|
|
|
+ source = data.get('source')
|
|
|
+ exist_outer_info = list()
|
|
|
+ for link in outer_info:
|
|
|
+ s_sql = f'select spider_link from crawler_task where source={source}'
|
|
|
+ result = MysqlHelper.get_values(s_sql)
|
|
|
+ if link in eval(result[0]):
|
|
|
+ exist_outer_info.append(link)
|
|
|
+ if exist_outer_info:
|
|
|
+ return jsonify({'code': 200, 'message': '名单重复', 'spider_link': exist_outer_info})
|
|
|
+ # 获取到一个以键且为逗号分隔的字符串,返回一个字符串
|
|
|
+ keys = ','.join(data.keys())
|
|
|
+ values = ','.join(['%s'] * len(data))
|
|
|
+ sql = 'insert into {table}({keys}) VALUES({values})'.format(table='crawler_task', keys=keys, values=values)
|
|
|
+ MysqlHelper.insert_values(sql, tuple(data.values()))
|
|
|
|
|
|
- # return json.dumps(return_dict, ensure_ascii=False)
|
|
|
+ return jsonify({'code': 200, 'message': 'task create success'})
|
|
|
|
|
|
|
|
|
-@app.route("/v1/task/gettask", methods=["GET"])
|
|
|
-def getTask():
|
|
|
+@app.route("/v1/crawler/task/gettask", methods=["GET"])
|
|
|
+def getAllTask():
|
|
|
get_data = request.args.to_dict()
|
|
|
-
|
|
|
- sql = 'select task_id, task_name from crawler_task'
|
|
|
+ page = get_data.get('page', 1)
|
|
|
+ offset = get_data.get('offset', 10)
|
|
|
+ start_count = (page * offset) - offset
|
|
|
+ end_count = page * offset
|
|
|
+ sql = f"""select task_id, task_name from crawler_task limit {start_count}, {end_count}"""
|
|
|
result = MysqlHelper.get_values(sql)
|
|
|
if not result:
|
|
|
return jsonify({'return_code': '200', 'result': [], 'message': 'no data'})
|
|
@@ -55,16 +72,71 @@ def getTask():
|
|
|
task_name=task_name,
|
|
|
)
|
|
|
source_list.append(data)
|
|
|
- return jsonify({'return_code': '200', 'result': source_list})
|
|
|
+ return jsonify({'code': '200', 'result': source_list})
|
|
|
+
|
|
|
+
|
|
|
+# @app.route("/v1/crawler/task/getone", methods=["GET"])
|
|
|
+# def getOneTask():
|
|
|
+# get_data = request.args.to_dict()
|
|
|
+# task_id = get_data['task_id']
|
|
|
+# sql = f'select task_id, spider_link from crawler_task where task_id={task_id}'
|
|
|
+# result = MysqlHelper.get_values(sql)
|
|
|
+# if not result:
|
|
|
+# return jsonify({'code': '200', 'result': [], 'message': 'no data'})
|
|
|
+# for task_id, spider_link in result:
|
|
|
+# data = dict(
|
|
|
+# task_id=task_id,
|
|
|
+# spider_link=spider_link,
|
|
|
+# )
|
|
|
+# return jsonify({'code': '200', 'result': data})
|
|
|
+
|
|
|
+
|
|
|
+@app.route("/v1/crawler/task/update", methods=["POST"])
|
|
|
+def updateTask():
|
|
|
+ task_id = request.form.get('task_id')
|
|
|
+ spider_link = request.form.get('spider_link')
|
|
|
+ print(spider_link, task_id)
|
|
|
|
|
|
+ sql = f"""UPDATE crawler_task SET spider_link='{spider_link}' where task_id = {task_id}"""
|
|
|
+ print(sql)
|
|
|
+ result = MysqlHelper.update_values(sql)
|
|
|
+ if result:
|
|
|
+ return jsonify({'code': 200, 'message': 'task update success'})
|
|
|
+ else:
|
|
|
+ return jsonify({'code': 400, 'message': 'task update faild'})
|
|
|
|
|
|
-@app.route("/v1/author/getuser", methods=["POST"])
|
|
|
+
|
|
|
+def get_user_info(source):
|
|
|
+ source_spider = {
|
|
|
+ 'xigua': xigua_user_info
|
|
|
+ }
|
|
|
+ return source_spider.get(source)
|
|
|
+
|
|
|
+
|
|
|
+@app.route("/v1/crawler/author/create", methods=["POST"])
|
|
|
def createUser():
|
|
|
- data = request.form.get('author')
|
|
|
- print(eval(data))
|
|
|
- for i in eval(data):
|
|
|
- print(i)
|
|
|
- return jsonify({'data':data})
|
|
|
+ get_media_url = 'http://videotest-internal.yishihui.com/longvideoapi/user/virtual/crawler/registerVirtualUser'
|
|
|
+ data = request.form.get('spider_link')
|
|
|
+ source = request.form.get('source')
|
|
|
+ user_tag = request.form.get('user_tag')
|
|
|
+ for author_url in eval(data):
|
|
|
+ # crawler = get_user_info(source)
|
|
|
+ # user_info = crawler(author_url)
|
|
|
+ post_data = {
|
|
|
+ # 'count': 1, # (必须)账号个数:传1
|
|
|
+ # 'accountType': 4, # (必须)账号类型 :传 4 app虚拟账号
|
|
|
+ 'pwd': '', # 密码 默认 12346
|
|
|
+ 'nickName': '', # 昵称 默认 vuser......
|
|
|
+ 'avatarUrl': '',
|
|
|
+ # 头像Url 默认 http://weapppiccdn.yishihui.com/resources/images/pic_normal.png
|
|
|
+ 'tagName': user_tag, # 多条数据用英文逗号分割
|
|
|
+ }
|
|
|
+ response = requests.post(url=get_media_url, params=post_data)
|
|
|
+ # print(response.text)
|
|
|
+ media_id = response.json()['data']
|
|
|
+
|
|
|
+ return jsonify({'data': data})
|
|
|
+
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
- app.run(debug=True)
|
|
|
+ app.run(debug=True,port=5050)
|