|
@@ -1,7 +1,12 @@
|
|
-import json
|
|
|
|
|
|
+import os
|
|
|
|
+import sys
|
|
|
|
+import time
|
|
|
|
+
|
|
import requests
|
|
import requests
|
|
from flask import Flask, request
|
|
from flask import Flask, request
|
|
from flask import jsonify
|
|
from flask import jsonify
|
|
|
|
+
|
|
|
|
+sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
|
|
from common.db.mysql_help import MysqlHelper
|
|
from common.db.mysql_help import MysqlHelper
|
|
from user_spider.user_info import *
|
|
from user_spider.user_info import *
|
|
|
|
|
|
@@ -16,17 +21,18 @@ def getSource():
|
|
get_data = request.args.to_dict()
|
|
get_data = request.args.to_dict()
|
|
fields = get_data.get('fields')
|
|
fields = get_data.get('fields')
|
|
# # 对参数进行操作
|
|
# # 对参数进行操作
|
|
- sql = 'select source, task_type, spider_name from crawler_source'
|
|
|
|
|
|
+ sql = 'select source, task_type, spider_name, machine from crawler_source'
|
|
|
|
|
|
result = MysqlHelper.get_values(sql)
|
|
result = MysqlHelper.get_values(sql)
|
|
if not result:
|
|
if not result:
|
|
return jsonify({'return_code': '200', 'result': [], 'message': 'no data'})
|
|
return jsonify({'return_code': '200', 'result': [], 'message': 'no data'})
|
|
source_list = list()
|
|
source_list = list()
|
|
- for source, task_type, spider_name in result:
|
|
|
|
|
|
+ for source, task_type, spider_name, machine in result:
|
|
data = dict(
|
|
data = dict(
|
|
source=source,
|
|
source=source,
|
|
task_type=task_type,
|
|
task_type=task_type,
|
|
- spider_name=spider_name
|
|
|
|
|
|
+ spider_name=spider_name,
|
|
|
|
+ machine=machine
|
|
)
|
|
)
|
|
source_list.append(data)
|
|
source_list.append(data)
|
|
return jsonify({'return_code': '200', 'result': source_list})
|
|
return jsonify({'return_code': '200', 'result': source_list})
|
|
@@ -39,7 +45,7 @@ def insertTask():
|
|
source = data.get('source')
|
|
source = data.get('source')
|
|
exist_outer_info = list()
|
|
exist_outer_info = list()
|
|
for link in outer_info:
|
|
for link in outer_info:
|
|
- s_sql = f'select spider_link from crawler_task where source={source}'
|
|
|
|
|
|
+ s_sql = f"""select spider_link from crawler_task where source={source}"""
|
|
result = MysqlHelper.get_values(s_sql)
|
|
result = MysqlHelper.get_values(s_sql)
|
|
if link in eval(result[0]):
|
|
if link in eval(result[0]):
|
|
exist_outer_info.append(link)
|
|
exist_outer_info.append(link)
|
|
@@ -57,14 +63,14 @@ def insertTask():
|
|
@app.route("/v1/crawler/task/gettask", methods=["GET"])
|
|
@app.route("/v1/crawler/task/gettask", methods=["GET"])
|
|
def getAllTask():
|
|
def getAllTask():
|
|
get_data = request.args.to_dict()
|
|
get_data = request.args.to_dict()
|
|
- page = get_data.get('page', 1)
|
|
|
|
- offset = get_data.get('offset', 10)
|
|
|
|
|
|
+ page = int(get_data.get('page', 1))
|
|
|
|
+ offset = int(get_data.get('offset', 10))
|
|
start_count = (page * offset) - offset
|
|
start_count = (page * offset) - offset
|
|
end_count = page * offset
|
|
end_count = page * offset
|
|
sql = f"""select task_id, task_name from crawler_task limit {start_count}, {end_count}"""
|
|
sql = f"""select task_id, task_name from crawler_task limit {start_count}, {end_count}"""
|
|
result = MysqlHelper.get_values(sql)
|
|
result = MysqlHelper.get_values(sql)
|
|
if not result:
|
|
if not result:
|
|
- return jsonify({'return_code': '200', 'result': [], 'message': 'no data'})
|
|
|
|
|
|
+ return jsonify({'code': '200', 'result': [], 'message': 'no data'})
|
|
source_list = list()
|
|
source_list = list()
|
|
for task_id, task_name in result:
|
|
for task_id, task_name in result:
|
|
data = dict(
|
|
data = dict(
|
|
@@ -116,27 +122,62 @@ def get_user_info(source):
|
|
@app.route("/v1/crawler/author/create", methods=["POST"])
|
|
@app.route("/v1/crawler/author/create", methods=["POST"])
|
|
def createUser():
|
|
def createUser():
|
|
get_media_url = 'http://videotest-internal.yishihui.com/longvideoapi/user/virtual/crawler/registerVirtualUser'
|
|
get_media_url = 'http://videotest-internal.yishihui.com/longvideoapi/user/virtual/crawler/registerVirtualUser'
|
|
- data = request.form.get('spider_link')
|
|
|
|
|
|
+ print(request.form.to_dict())
|
|
|
|
+ spider_link = request.form.get('spider_link')
|
|
|
|
+ print(111111,spider_link,type(spider_link))
|
|
source = request.form.get('source')
|
|
source = request.form.get('source')
|
|
|
|
+ task_type = request.form.get('task_type')
|
|
|
|
+ applets_status = request.form.get('applets_status')
|
|
|
|
+ app_status = request.form.get('app_status')
|
|
user_tag = request.form.get('user_tag')
|
|
user_tag = request.form.get('user_tag')
|
|
- for author_url in eval(data):
|
|
|
|
- # crawler = get_user_info(source)
|
|
|
|
- # user_info = crawler(author_url)
|
|
|
|
- post_data = {
|
|
|
|
- # 'count': 1, # (必须)账号个数:传1
|
|
|
|
- # 'accountType': 4, # (必须)账号类型 :传 4 app虚拟账号
|
|
|
|
- 'pwd': '', # 密码 默认 12346
|
|
|
|
- 'nickName': '', # 昵称 默认 vuser......
|
|
|
|
- 'avatarUrl': '',
|
|
|
|
- # 头像Url 默认 http://weapppiccdn.yishihui.com/resources/images/pic_normal.png
|
|
|
|
- 'tagName': user_tag, # 多条数据用英文逗号分割
|
|
|
|
- }
|
|
|
|
- response = requests.post(url=get_media_url, params=post_data)
|
|
|
|
- # print(response.text)
|
|
|
|
- media_id = response.json()['data']
|
|
|
|
-
|
|
|
|
- return jsonify({'data': data})
|
|
|
|
|
|
+ user_content_tag = request.form.get('user_content_tag')
|
|
|
|
+ success_list = list()
|
|
|
|
+ fail_list = list()
|
|
|
|
+ for author_url in eval(spider_link):
|
|
|
|
+ try:
|
|
|
|
+ post_data = {
|
|
|
|
+ # 'count': 1, # (必须)账号个数:传1
|
|
|
|
+ # 'accountType': 4, # (必须)账号类型 :传 4 app虚拟账号
|
|
|
|
+ 'pwd': '', # 密码 默认 12346
|
|
|
|
+ 'nickName': '', # 昵称 默认 vuser......
|
|
|
|
+ 'avatarUrl': '',
|
|
|
|
+ # 头像Url 默认 http://weapppiccdn.yishihui.com/resources/images/pic_normal.png
|
|
|
|
+ 'tagName': user_tag, # 多条数据用英文逗号分割
|
|
|
|
+ }
|
|
|
|
+ response = requests.post(url=get_media_url, params=post_data)
|
|
|
|
+ media_id = response.json()['data']
|
|
|
|
+ f_sql = f"""select spider_link from crawler_author_map where spider_link="{author_url}" """
|
|
|
|
+ result = MysqlHelper.get_values(f_sql)
|
|
|
|
+ if result:
|
|
|
|
+ success_list.append(author_url)
|
|
|
|
+ continue
|
|
|
|
+ else:
|
|
|
|
+ data = dict(
|
|
|
|
+ spider_link=author_url,
|
|
|
|
+ media_id=media_id,
|
|
|
|
+ source=source,
|
|
|
|
+ task_type=task_type,
|
|
|
|
+ applets_status=applets_status,
|
|
|
|
+ app_status=app_status,
|
|
|
|
+ user_tag=user_tag,
|
|
|
|
+ user_content_tag=user_content_tag,
|
|
|
|
+ insert_time=int(time.time()),
|
|
|
|
+ update_time=int(time.time())
|
|
|
|
+ )
|
|
|
|
+ keys = ','.join(data.keys())
|
|
|
|
+ values = ','.join(['%s'] * len(data))
|
|
|
|
+ table = 'crawler_author_map'
|
|
|
|
+ sql = f"""insert into {table}({keys}) VALUES({values})"""
|
|
|
|
+ result = MysqlHelper.insert_values(sql, tuple(data.values()))
|
|
|
|
+ if not result:
|
|
|
|
+ fail_list.append(author_url)
|
|
|
|
+ else:
|
|
|
|
+ success_list.append(author_url)
|
|
|
|
+ except Exception as e:
|
|
|
|
+ fail_list.append(author_url)
|
|
|
|
+ continue
|
|
|
|
+ return jsonify({'code': 200, 'result': {'success': success_list, 'fail': fail_list}})
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
- app.run(debug=True,port=5050)
|
|
|
|
|
|
+ app.run(debug=True, port=5050)
|