conf_task.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. import json
  2. import requests
  3. from flask import Flask, request
  4. from flask import jsonify
  5. from common.db.mysql_help import MysqlHelper
  6. from user_spider.user_info import *
  7. app = Flask(__name__)
  8. app.config['JSON_AS_ASCII'] = False
  9. # 只接受get方法访问
  10. @app.route("/v1/crawler/source/getall", methods=["GET"])
  11. def getSource():
  12. # 获取传入的params参数
  13. get_data = request.args.to_dict()
  14. fields = get_data.get('fields')
  15. # # 对参数进行操作
  16. sql = 'select source, task_type, spider_name from crawler_source'
  17. result = MysqlHelper.get_values(sql)
  18. if not result:
  19. return jsonify({'return_code': '200', 'result': [], 'message': 'no data'})
  20. source_list = list()
  21. for source, task_type, spider_name in result:
  22. data = dict(
  23. source=source,
  24. task_type=task_type,
  25. spider_name=spider_name
  26. )
  27. source_list.append(data)
  28. return jsonify({'return_code': '200', 'result': source_list})
  29. @app.route("/v1/crawler/task/insert", methods=["POST"])
  30. def insertTask():
  31. data = request.form
  32. outer_info = data.get(('spider_link'))
  33. source = data.get('source')
  34. exist_outer_info = list()
  35. for link in outer_info:
  36. s_sql = f'select spider_link from crawler_task where source={source}'
  37. result = MysqlHelper.get_values(s_sql)
  38. if link in eval(result[0]):
  39. exist_outer_info.append(link)
  40. if exist_outer_info:
  41. return jsonify({'code': 200, 'message': '名单重复', 'spider_link': exist_outer_info})
  42. # 获取到一个以键且为逗号分隔的字符串,返回一个字符串
  43. keys = ','.join(data.keys())
  44. values = ','.join(['%s'] * len(data))
  45. sql = 'insert into {table}({keys}) VALUES({values})'.format(table='crawler_task', keys=keys, values=values)
  46. MysqlHelper.insert_values(sql, tuple(data.values()))
  47. return jsonify({'code': 200, 'message': 'task create success'})
  48. @app.route("/v1/crawler/task/gettask", methods=["GET"])
  49. def getAllTask():
  50. get_data = request.args.to_dict()
  51. page = get_data.get('page', 1)
  52. offset = get_data.get('offset', 10)
  53. start_count = (page * offset) - offset
  54. end_count = page * offset
  55. sql = f"""select task_id, task_name from crawler_task limit {start_count}, {end_count}"""
  56. result = MysqlHelper.get_values(sql)
  57. if not result:
  58. return jsonify({'return_code': '200', 'result': [], 'message': 'no data'})
  59. source_list = list()
  60. for task_id, task_name in result:
  61. data = dict(
  62. task_id=task_id,
  63. task_name=task_name,
  64. )
  65. source_list.append(data)
  66. return jsonify({'code': '200', 'result': source_list})
  67. # @app.route("/v1/crawler/task/getone", methods=["GET"])
  68. # def getOneTask():
  69. # get_data = request.args.to_dict()
  70. # task_id = get_data['task_id']
  71. # sql = f'select task_id, spider_link from crawler_task where task_id={task_id}'
  72. # result = MysqlHelper.get_values(sql)
  73. # if not result:
  74. # return jsonify({'code': '200', 'result': [], 'message': 'no data'})
  75. # for task_id, spider_link in result:
  76. # data = dict(
  77. # task_id=task_id,
  78. # spider_link=spider_link,
  79. # )
  80. # return jsonify({'code': '200', 'result': data})
  81. @app.route("/v1/crawler/task/update", methods=["POST"])
  82. def updateTask():
  83. task_id = request.form.get('task_id')
  84. spider_link = request.form.get('spider_link')
  85. print(spider_link, task_id)
  86. sql = f"""UPDATE crawler_task SET spider_link='{spider_link}' where task_id = {task_id}"""
  87. print(sql)
  88. result = MysqlHelper.update_values(sql)
  89. if result:
  90. return jsonify({'code': 200, 'message': 'task update success'})
  91. else:
  92. return jsonify({'code': 400, 'message': 'task update faild'})
  93. def get_user_info(source):
  94. source_spider = {
  95. 'xigua': xigua_user_info
  96. }
  97. return source_spider.get(source)
  98. @app.route("/v1/crawler/author/create", methods=["POST"])
  99. def createUser():
  100. get_media_url = 'http://videotest-internal.yishihui.com/longvideoapi/user/virtual/crawler/registerVirtualUser'
  101. data = request.form.get('spider_link')
  102. source = request.form.get('source')
  103. user_tag = request.form.get('user_tag')
  104. for author_url in eval(data):
  105. # crawler = get_user_info(source)
  106. # user_info = crawler(author_url)
  107. post_data = {
  108. # 'count': 1, # (必须)账号个数:传1
  109. # 'accountType': 4, # (必须)账号类型 :传 4 app虚拟账号
  110. 'pwd': '', # 密码 默认 12346
  111. 'nickName': '', # 昵称 默认 vuser......
  112. 'avatarUrl': '',
  113. # 头像Url 默认 http://weapppiccdn.yishihui.com/resources/images/pic_normal.png
  114. 'tagName': user_tag, # 多条数据用英文逗号分割
  115. }
  116. response = requests.post(url=get_media_url, params=post_data)
  117. # print(response.text)
  118. media_id = response.json()['data']
  119. return jsonify({'data': data})
  120. if __name__ == "__main__":
  121. app.run(debug=True,port=5050)