conf_task.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. import os
  2. import sys
  3. import time
  4. import requests
  5. from flask import Flask, request
  6. from flask import jsonify
  7. sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
  8. from common.db.mysql_help import MysqlHelper
  9. from user_spider.user_info import *
  10. app = Flask(__name__)
  11. app.config['JSON_AS_ASCII'] = False
  12. # 只接受get方法访问
  13. @app.route("/v1/crawler/source/getall", methods=["GET"])
  14. def getSource():
  15. # 获取传入的params参数
  16. get_data = request.args.to_dict()
  17. fields = get_data.get('fields')
  18. # # 对参数进行操作
  19. sql = 'select source, task_type, spider_name, machine from crawler_source'
  20. result = MysqlHelper.get_values(sql)
  21. if not result:
  22. return jsonify({'code': '200', 'result': [], 'message': 'no data'})
  23. source_list = list()
  24. for source, task_type, spider_name, machine in result:
  25. data = dict(
  26. source=source,
  27. task_type=task_type,
  28. spider_name=spider_name,
  29. machine=machine
  30. )
  31. source_list.append(data)
  32. return jsonify({'code': '200', 'result': source_list})
  33. @app.route("/v1/crawler/task/insert", methods=["POST"])
  34. def insertTask():
  35. data = request.form
  36. outer_info = data.get(('spider_link'))
  37. source = data.get('source')
  38. exist_outer_info = list()
  39. for link in outer_info:
  40. s_sql = f"""select spider_link from crawler_task where source={source}"""
  41. result = MysqlHelper.get_values(s_sql)
  42. if link in eval(result[0]):
  43. exist_outer_info.append(link)
  44. if exist_outer_info:
  45. return jsonify({'code': 200, 'message': '名单重复', 'spider_link': exist_outer_info})
  46. # 获取到一个以键且为逗号分隔的字符串,返回一个字符串
  47. keys = ','.join(data.keys())
  48. values = ','.join(['%s'] * len(data))
  49. sql = 'insert into {table}({keys}) VALUES({values})'.format(table='crawler_task', keys=keys, values=values)
  50. MysqlHelper.insert_values(sql, tuple(data.values()))
  51. return jsonify({'code': 200, 'message': 'task create success'})
  52. @app.route("/v1/crawler/task/gettask", methods=["GET"])
  53. def getAllTask():
  54. get_data = request.args.to_dict()
  55. page = int(get_data.get('page', 1))
  56. offset = int(get_data.get('offset', 10))
  57. start_count = (page * offset) - offset
  58. end_count = page * offset
  59. sql = f"""select task_id, task_name from crawler_task limit {start_count}, {end_count}"""
  60. result = MysqlHelper.get_values(sql)
  61. if not result:
  62. return jsonify({'code': '200', 'result': [], 'message': 'no data'})
  63. source_list = list()
  64. for task_id, task_name in result:
  65. data = dict(
  66. task_id=task_id,
  67. task_name=task_name,
  68. )
  69. source_list.append(data)
  70. return jsonify({'code': '200', 'result': source_list})
  71. # @app.route("/v1/crawler/task/getone", methods=["GET"])
  72. # def getOneTask():
  73. # get_data = request.args.to_dict()
  74. # task_id = get_data['task_id']
  75. # sql = f'select task_id, spider_link from crawler_task where task_id={task_id}'
  76. # result = MysqlHelper.get_values(sql)
  77. # if not result:
  78. # return jsonify({'code': '200', 'result': [], 'message': 'no data'})
  79. # for task_id, spider_link in result:
  80. # data = dict(
  81. # task_id=task_id,
  82. # spider_link=spider_link,
  83. # )
  84. # return jsonify({'code': '200', 'result': data})
  85. @app.route("/v1/crawler/task/update", methods=["POST"])
  86. def updateTask():
  87. task_id = request.form.get('task_id')
  88. spider_link = request.form.get('spider_link')
  89. print(spider_link, task_id)
  90. sql = f"""UPDATE crawler_task SET spider_link='{spider_link}' where task_id = {task_id}"""
  91. print(sql)
  92. result = MysqlHelper.update_values(sql)
  93. if result:
  94. return jsonify({'code': 200, 'message': 'task update success'})
  95. else:
  96. return jsonify({'code': 400, 'message': 'task update faild'})
  97. def get_user_info(source):
  98. source_spider = {
  99. 'xigua': xigua_user_info
  100. }
  101. return source_spider.get(source)
  102. @app.route("/v1/crawler/author/create", methods=["POST"])
  103. def createUser():
  104. get_media_url = 'http://videotest-internal.yishihui.com/longvideoapi/user/virtual/crawler/registerVirtualUser'
  105. print(request.form.to_dict())
  106. spider_link = request.form.get('spider_link')
  107. print(111111,spider_link,type(spider_link))
  108. source = request.form.get('source')
  109. task_type = request.form.get('task_type')
  110. applets_status = request.form.get('applets_status')
  111. app_status = request.form.get('app_status')
  112. user_tag = request.form.get('user_tag')
  113. user_content_tag = request.form.get('user_content_tag')
  114. success_list = list()
  115. fail_list = list()
  116. for author_url in eval(spider_link):
  117. try:
  118. post_data = {
  119. # 'count': 1, # (必须)账号个数:传1
  120. # 'accountType': 4, # (必须)账号类型 :传 4 app虚拟账号
  121. 'pwd': '', # 密码 默认 12346
  122. 'nickName': '', # 昵称 默认 vuser......
  123. 'avatarUrl': '',
  124. # 头像Url 默认 http://weapppiccdn.yishihui.com/resources/images/pic_normal.png
  125. 'tagName': user_tag, # 多条数据用英文逗号分割
  126. }
  127. response = requests.post(url=get_media_url, params=post_data)
  128. media_id = response.json()['data']
  129. f_sql = f"""select spider_link from crawler_author_map where spider_link="{author_url}" """
  130. result = MysqlHelper.get_values(f_sql)
  131. if result:
  132. success_list.append(author_url)
  133. continue
  134. else:
  135. data = dict(
  136. spider_link=author_url,
  137. media_id=media_id,
  138. source=source,
  139. task_type=task_type,
  140. applets_status=applets_status,
  141. app_status=app_status,
  142. user_tag=user_tag,
  143. user_content_tag=user_content_tag,
  144. insert_time=int(time.time()),
  145. update_time=int(time.time())
  146. )
  147. keys = ','.join(data.keys())
  148. values = ','.join(['%s'] * len(data))
  149. table = 'crawler_author_map'
  150. sql = f"""insert into {table}({keys}) VALUES({values})"""
  151. result = MysqlHelper.insert_values(sql, tuple(data.values()))
  152. if not result:
  153. fail_list.append(author_url)
  154. else:
  155. success_list.append(author_url)
  156. except Exception as e:
  157. fail_list.append(author_url)
  158. continue
  159. return jsonify({'code': 200, 'result': {'success': success_list, 'fail': fail_list}})
  160. if __name__ == "__main__":
  161. app.run(debug=True, port=5050)