conf_task.py 27 KB


  1. import copy
  2. import logging
  3. import os
  4. import sys
  5. import time
  6. import requests
  7. from flask import Flask, request
  8. from flask import jsonify
  9. from dotenv import load_dotenv
  10. sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
  11. from conf.config import get_config
  12. from common.db.mysql_help import MysqlHelper
  13. load_dotenv(verbose=True)
  14. env = os.getenv('env')
  15. app = Flask(__name__)
  16. app.config['JSON_AS_ASCII'] = False
  17. # mysql实例
  18. mysql_con = MysqlHelper()
  19. conf = get_config()
  20. @app.route("/v1/crawler/task/addlink", methods=["POST"])
  21. def addSpiderLink():
  22. try:
  23. data = request.json
  24. spider_link = data['spider_link']
  25. task_id = data['task_id']
  26. sql = f'select * from crawler_author_map where spider_link="{spider_link}"'
  27. result = mysql_con.get_values(sql)
  28. now_time = int(time.time())
  29. repeat_list = list()
  30. if result:
  31. is_del = result[0]['is_del']
  32. if is_del:
  33. repeat_list.append(spider_link)
  34. return jsonify({'code': 400, 'message': '抓取名单重复', 'repeat_list': repeat_list})
  35. else:
  36. old_task_id = result[0]['task_id']
  37. if task_id == old_task_id:
  38. up_sql = f'update crawler_author_map set is_del=1 where spider_link="{spider_link}"'
  39. else:
  40. up_sql = f'update crawler_author_map set task_id={task_id},is_del=1 where spider_link="{spider_link}"'
  41. mysql_con.update_values(up_sql)
  42. return jsonify({'code': 200, 'message': '抓取名单增加成功'})
  43. else:
  44. sql = f'select * from crawler_task where task_id={task_id}'
  45. result = mysql_con.get_values(sql)
  46. success_list, fail_list = create_uid(result[0], task_id, spider_link=[spider_link])
  47. spider_links = eval(result[0]['spider_link'])
  48. spider_links.append(spider_link)
  49. str_spider_links = str(spider_links)
  50. u_sql = f'update crawler_task set spider_link="{str_spider_links}", update_time={now_time} where task_id={task_id}'
  51. mysql_con.update_values(u_sql)
  52. return jsonify({'code': 200, 'message': '抓取名单增加成功', 'add_link': success_list})
  53. except Exception as e:
  54. return jsonify(
  55. {'code': 400, 'message': '抓取名单删除失败', 'spider_link': spider_link})
  56. @app.route("/v1/crawler/task/dellink", methods=["POST"])
  57. def delSpiderLink():
  58. data = request.json
  59. spider_link = data['spider_link']
  60. task_id = data['task_id']
  61. up_sql = f'update crawler_author_map set is_del=0 where spider_link="{spider_link}"'
  62. MysqlHelper.update_values(up_sql)
  63. sql = f'select * from crawler_task where task_id ={task_id}'
  64. task = mysql_con.get_values(sql)
  65. spider_links = eval(task[0]['spider_link'])
  66. spider_links.remove(spider_link)
  67. now_time = int(time.time())
  68. u_sql = f'update crawler_task set spider_link="{spider_links}",update_time={now_time} where task_id={task_id}'
  69. mysql_con.update_values(u_sql)
  70. if spider_link:
  71. return jsonify({'code': 200, 'message': '抓取名单删除成功', 'del_link': spider_link})
  72. else:
  73. return jsonify(
  74. {'code': 400, 'message': '抓取名单删除失败', 'del_link': spider_link})
  75. @app.route("/v1/crawler/task/getcategory", methods=["GET"])
  76. def getCategory():
  77. sql = f'select id, content_category from crawler_content_category'
  78. result = mysql_con.get_values(sql)
  79. return jsonify({'code': 200, 'data': result})
  80. @app.route("/v1/crawler/task/getboard", methods=["GET"])
  81. def getBoard():
  82. sql = f'select id, mode_board from crawler_board'
  83. result = mysql_con.get_values(sql)
  84. return jsonify({'code': 200, 'data': result})
  85. @app.route("/v1/crawler/task/getmodename", methods=["GET"])
  86. def getModeName():
  87. sql = f'select id, mode_name from crawler_mode'
  88. result = mysql_con.get_values(sql)
  89. return jsonify({'code': 200, 'data': result})
  90. @app.route("/v1/crawler/task/getrecommendboard", methods=["GET"])
  91. def getRecommendBoard():
  92. sql = f'select id, mode_board from crawler_recommend_board'
  93. result = mysql_con.get_values(sql)
  94. return jsonify({'code': 200, 'data': result})
  95. @app.route("/v1/crawler/user/findmedia", methods=["GET"])
  96. def getMediaInfo():
  97. data = request.args.to_dict()
  98. task_id = data['task_id']
  99. sql = f'select * from crawler_author_map where task_id={task_id} and is_del=1'
  100. result = mysql_con.get_values(sql)
  101. task_user_info = []
  102. for task_info in result:
  103. media_id = task_info['media_id']
  104. media_info = requests.get(url=conf['select_media_url'], params={'uid': media_id}, verify=False).json()[
  105. 'content']
  106. media_name = media_info['longvideoNickName'] if media_info['longvideoNickName'] else media_info['nickName']
  107. nick_name = task_info['nick_name']
  108. spider_link = task_info['spider_link']
  109. create_user_time = task_info['create_user_time']
  110. media_data = dict(
  111. media_name=media_name,
  112. nick_name=nick_name,
  113. spider_link=spider_link,
  114. media_id={'media_id': media_id, 'media_url': conf['media_main_url'].format(media_id)},
  115. create_user_time=create_user_time * 1000
  116. )
  117. task_user_info.append(media_data)
  118. return jsonify({'code': 200, 'data': task_user_info})
  119. @app.route("/v1/crawler/task/findtask", methods=["GET"])
  120. def getTaskUserInfo():
  121. # 根据条件查找任务
  122. data = request.args.to_dict()
  123. values = ''
  124. for k, v in data.items():
  125. if isinstance(v, int):
  126. values += f'{k}={v} and '
  127. else:
  128. values += f'{k}="{v}" and '
  129. sql = f"select task_id from crawler_author_map where {values[:-4]}" # [:-1]是为了去掉末尾的逗号
  130. res = mysql_con.get_values(sql)
  131. task_id = res['task_id']
  132. sql = f'select task_name, source, task_type, create_task_user, insert_time, update_task_user, update_time from crawler_task where task_id={task_id} '
  133. task_info = mysql_con.get_values(sql)
  134. return jsonify({'code': 200, 'data': task_info})
  135. # 只接受get方法访问
  136. @app.route("/v1/crawler/source/getall", methods=["GET"])
  137. def getSource():
  138. try:
  139. # # 对参数进行操作
  140. sql = 'select * from crawler_source'
  141. result = mysql_con.get_values(sql)
  142. if not result:
  143. return jsonify({'code': '200', 'result': [], 'message': '没有更多数据'})
  144. except Exception as e:
  145. return jsonify({'code': '400', 'message': '获取数据源信息失败'})
  146. return jsonify({'code': '200', 'result': result})
  147. @app.route("/v1/crawler/source/getasktype", methods=["GET"])
  148. def getTaskType():
  149. try:
  150. data = request.args.to_dict()
  151. source = data['source']
  152. # # 对参数进行操作
  153. sql = f'select * from crawler_task_type where source="{source}"'
  154. result = mysql_con.get_values(sql)
  155. if not result:
  156. return jsonify({'code': '200', 'result': [], 'message': '没有更多数据'})
  157. else:
  158. task_type_list = list()
  159. for task_type_info in result:
  160. task_info = {
  161. 'type': task_type_info['task_type'],
  162. 'description': task_type_info['task_type_desc'],
  163. 'spider': {
  164. 'spider_name': task_type_info['spider_name'],
  165. 'description': task_type_info['spider_name_desc']
  166. }
  167. }
  168. task_type_list.append(task_info)
  169. source_dict = {
  170. 'task_type': task_type_list,
  171. }
  172. except Exception as e:
  173. return jsonify({'code': '400', 'message': '获取数据源信息失败'})
  174. return jsonify({'code': '200', 'result': source_dict})
  175. @app.route("/v1/crawler/task/checkrepeat", methods=["POST"])
  176. def get_repeat_list():
  177. data = request.json
  178. # 字段转换
  179. spider_links = data.get('spider_link')
  180. repeat_list = list()
  181. # 判断是否为重复名单
  182. for spider_link in spider_links:
  183. if isinstance(spider_link, int):
  184. s_sql = f"""select spider_link from crawler_author_map where spider_link={spider_link}"""
  185. else:
  186. s_sql = f"""select spider_link from crawler_author_map where spider_link='{spider_link}'"""
  187. result = mysql_con.get_values(s_sql)
  188. if result:
  189. repeat_list.append(spider_link)
  190. if repeat_list:
  191. return jsonify({'code': 200, 'message': '抓取名单校验通过', 'repeat_list': repeat_list})
  192. else:
  193. return jsonify({'code': 200, 'message': '抓取名单校验通过', 'repeat_list': repeat_list})
  194. @app.route("/v1/crawler/task/insert", methods=["POST"])
  195. def insertTask():
  196. try:
  197. data = request.json
  198. user_data = copy.deepcopy(data)
  199. tag_name_list = []
  200. content_tag_list = []
  201. user_tag = data['user_tag']
  202. user_content_tag = data['user_content_tag']
  203. for tag in user_tag:
  204. tag_name_list.append(tag['tagName'])
  205. for tag in user_content_tag:
  206. content_tag_list.append(tag['tagName'])
  207. if data['min_publish_time']:
  208. data['min_publish_time'] = int(data['min_publish_time'] / 1000)
  209. else:
  210. data['min_publish_time'] = 0
  211. if not data['min_publish_day']:
  212. data['min_publish_day'] = 0
  213. data['next_time'] = int(data['next_time'] / 1000)
  214. data['insert_time'] = int(time.time())
  215. data['update_time'] = int(time.time())
  216. data['spider_link'] = str(data['spider_link'])
  217. data['spider_rule'] = str(data['spider_rule'])
  218. data['user_tag_info'] = str(user_tag)
  219. data['content_tag_info'] = str(user_content_tag)
  220. data['user_tag'] = ','.join(str(i) for i in tag_name_list)
  221. data['user_content_tag'] = ','.join(str(i) for i in content_tag_list)
  222. # data['crawler_interval'] = data.pop('interval')
  223. # 获取到一个以键且为逗号分隔的字符串,返回一个字符串
  224. keys = ','.join(data.keys())
  225. values = ','.join(['%s'] * len(data))
  226. sql = 'insert into {table}({keys}) VALUES({values})'.format(table='crawler_task', keys=keys, values=values)
  227. task_id = mysql_con.insert_values(sql, tuple(data.values()))
  228. if task_id:
  229. spider_link = user_data['spider_link']
  230. success_list, fail_list, repeat_list = create_uid(user_data, task_id, spider_link)
  231. if repeat_list:
  232. d_sql = f'delete from crawler_task where task_id={task_id}'
  233. mysql_con.del_values(d_sql)
  234. message = f'该任务和其他任务抓取名单重复:{repeat_list}'
  235. return jsonify({'code': 400, 'message':message})
  236. return jsonify(
  237. {'code': 200, 'message': 'task create success', 'success_list': success_list, 'fail_list': fail_list})
  238. except Exception as e:
  239. # task_name = da
  240. # s_sql = f'select * from crawler_task where task_name={}'
  241. # d_sql = f'delete from crawler_task where task_id={task_id}'
  242. # mysql_con.del_values(d_sql)
  243. return jsonify({'code': 500, 'message': '任务写入失败,原因:{e}'})
  244. @app.route("/v1/crawler/task/gettask", methods=["POST"])
  245. def getAllTask():
  246. try:
  247. get_data = request.json
  248. page = int(get_data.get('page', 1))
  249. offset = int(get_data.get('offset', 10))
  250. start_count = (page * offset) - offset
  251. end_count = page * offset
  252. if get_data.get('fields'):
  253. select_data = get_data['fields']
  254. values = ''
  255. for k, v in select_data.items():
  256. if isinstance(v, int):
  257. values += f'{k}={v} and '
  258. else:
  259. values += f'{k}="{v}" and '
  260. sql = f"select task_id from crawler_author_map where {values[:-4]} and is_del=1" # [:-1]是为了去掉末尾的逗号
  261. res = mysql_con.get_values(sql)
  262. task_id_set = set()
  263. for task in res:
  264. task_id_set.add(task['task_id'])
  265. task_list = list()
  266. for task_id in task_id_set:
  267. sql = f'select * from crawler_task where task_id={task_id} order by update_time desc limit {start_count}, {end_count}'
  268. task_info = mysql_con.get_values(sql)[0]
  269. task_data = dict(
  270. task_id=task_info['task_id'],
  271. task_name=task_info['task_name'],
  272. source=task_info['source'],
  273. task_type=task_info['task_type'],
  274. create_task_user=task_info['create_task_user'],
  275. insert_time=task_info['insert_time'] * 1000,
  276. update_task_user=task_info['update_task_user'],
  277. update_time=task_info['update_time'] * 1000
  278. )
  279. task_list.append(task_data)
  280. return jsonify({'code': 200, 'result': task_list, 'total': len(task_list)})
  281. sql = f"""select * from crawler_task order by update_time desc limit {start_count}, {end_count} """
  282. result = mysql_con.get_values(sql)
  283. if not result:
  284. return jsonify({'code': '200', 'result': [], 'message': '没有更多任务'})
  285. task_list = list()
  286. for task_info in result:
  287. source = task_info['source']
  288. task_type = task_info['task_type']
  289. source_sql = f'select * from crawler_source where source="{source}"'
  290. source_info = mysql_con.get_values(source_sql)
  291. task_type_sql = f'select * from crawler_task_type where task_type="{task_type}"'
  292. type_info = mysql_con.get_values(task_type_sql)
  293. task_data = dict(
  294. task_id=task_info['task_id'],
  295. task_name=task_info['task_name'],
  296. source_name=source_info[0]['source_desc'],
  297. task_type_name=type_info[0]['task_type_desc'],
  298. source=task_info['source'],
  299. task_type=task_info['task_type'],
  300. create_task_user=task_info['create_task_user'],
  301. insert_time=task_info['insert_time'] * 1000,
  302. update_task_user=task_info['update_task_user'],
  303. update_time=task_info['update_time'] * 1000
  304. )
  305. task_list.append(task_data)
  306. t_sql = f"""select count(*) from crawler_task"""
  307. t_res = mysql_con.get_values(t_sql)
  308. total = t_res[0]['count(*)']
  309. except Exception as e:
  310. return jsonify({"code": "400", 'message': "任务列表获取失败"})
  311. return jsonify({'code': '200', 'result': task_list, 'total': total})
  312. @app.route("/v1/crawler/task/getone", methods=["GET"])
  313. def getOneTask():
  314. try:
  315. get_data = request.args.to_dict()
  316. task_id = get_data['task_id']
  317. sql = f'select * from crawler_task where task_id={task_id}'
  318. result = mysql_con.get_values(sql)
  319. if not result:
  320. return jsonify({'code': '400', 'result': [], 'message': 'no data'})
  321. data = result[0]
  322. if data['min_publish_time']:
  323. data['min_publish_time'] = data['min_publish_time'] * 1000
  324. else:
  325. data['min_publish_time'] = 0
  326. data['next_time'] = data['next_time'] * 1000
  327. data['spider_link'] = eval(data['spider_link'])
  328. data['spider_rule'] = eval(data['spider_rule'])
  329. #
  330. data['user_tag_info'] = eval(data['user_tag_info'])
  331. data['content_tag_info'] = eval(data['content_tag_info'])
  332. if not data['mode_name_id']:
  333. data['mode_name_id'] = ''
  334. if not data['mode_board_id']:
  335. data['mode_board_id'] = ''
  336. if not data['content_category_id']:
  337. data['content_category_id'] = ''
  338. except Exception as e:
  339. return jsonify({'code': '500', "message": "获取任务信息失败"})
  340. return jsonify({'code': '200', 'result': result})
  341. @app.route("/v1/crawler/task/update", methods=["POST"])
  342. def updateTask():
  343. try:
  344. data = request.json
  345. task_id = data.get('task_id')
  346. task_info = data.get('task_info')
  347. values = ''
  348. if task_info['min_publish_time']:
  349. task_info['min_publish_time'] = task_info['min_publish_time'] / 1000
  350. else:
  351. task_info['min_publish_time'] = 0
  352. if not task_info['min_publish_day']:
  353. task_info['min_publish_day'] = 0
  354. task_info['next_time'] = task_info['next_time'] / 1000
  355. user_tag = task_info['user_tag']
  356. user_content_tag = task_info['user_content_tag']
  357. tag_name_list = []
  358. content_tag_list = []
  359. for tag in user_tag:
  360. tag_name_list.append(tag['tagName'])
  361. for tag in user_content_tag:
  362. content_tag_list.append(tag['tagName'])
  363. task_info['user_tag_info'] = str(user_tag)
  364. task_info['content_tag_info'] = str(user_content_tag)
  365. task_info['user_tag'] = ','.join(str(i) for i in tag_name_list)
  366. task_info['user_content_tag'] = ','.join(str(i) for i in content_tag_list)
  367. for k, v in task_info.items():
  368. if isinstance(v, int):
  369. values += f'{k}={v},'
  370. else:
  371. values += f'{k}="{v}",'
  372. sql = f'update crawler_task set {values[:-1]} where task_id={task_id}'
  373. result = mysql_con.update_values(sql)
  374. if result:
  375. return jsonify({'code': 200, 'message': 'task update success'})
  376. else:
  377. return jsonify({'code': 400, 'message': 'task update faild'})
  378. except Exception as e:
  379. return jsonify({'code': 400, 'message': '任务更新失败'})
  380. def create_uid(task, task_id, spider_link):
  381. if not isinstance(spider_link, list):
  382. spider_link = eval(spider_link)
  383. source = task.get('source')
  384. task_type = task.get('task_type')
  385. applets_status = task.get('applets_status')
  386. app_status = task.get('app_status')
  387. try:
  388. user_tag = eval(task.get('user_tag_info'))
  389. user_content_tag = eval(task.get('content_tag_info'))
  390. except Exception as e:
  391. user_tag = task.get('user_tag')
  392. user_content_tag = task.get('user_content_tag')
  393. mode_name_id = task.get('mode_name_id', 0)
  394. mode_board_id = task.get('mode_board_id', 0)
  395. content_category_id = task.get('content_category_id', 0)
  396. mn_sql = f'select * from crawler_mode where id={mode_name_id}'
  397. mode_name_list = mysql_con.get_values(mn_sql)
  398. mb_sql = f'select * from crawler_board where id={mode_board_id}'
  399. mode_board_list = mysql_con.get_values(mb_sql)
  400. cc_sql = f'select * from crawler_content_category where id={content_category_id}'
  401. content_category_list = mysql_con.get_values(cc_sql)
  402. source_sql = f'select * from crawler_source where source="{source}"'
  403. source_res = mysql_con.get_values(source_sql)[0]
  404. spider_platform = source_res['source_desc']
  405. if mode_name_list:
  406. task['mode_name_str'] = mode_name_list[0]['mode_name']
  407. else:
  408. task['mode_name_str'] = ''
  409. if mode_board_list:
  410. task['mode_board_str'] = mode_board_list[0]['mode_board']
  411. else:
  412. task['mode_board_str'] = ''
  413. if content_category_list:
  414. task['content_category_str'] = content_category_list[0]['content_category']
  415. else:
  416. task['content_category_str'] = ''
  417. success_list = list()
  418. fail_list = list()
  419. tag_name_list = list()
  420. content_tag_list = list()
  421. for tag in user_tag:
  422. tag_name_list.append(tag['tagName'])
  423. for tag in user_content_tag:
  424. content_tag_list.append(tag['tagName'])
  425. user_tags = ','.join(str(i) for i in tag_name_list)
  426. user_content_tags = ','.join(str(i) for i in content_tag_list)
  427. repeat_list = []
  428. create_user_list = []
  429. for author_url in spider_link:
  430. repeat_sql = f'select * from crawler_author_map where spider_link="{author_url}"'
  431. result = mysql_con.get_values(repeat_sql)
  432. if result:
  433. old_task_id = result[0]['task_id']
  434. is_del = result[0]['is_del']
  435. if task_id == old_task_id:
  436. if is_del:
  437. continue
  438. else:
  439. up_sql = f'update crawler_author_map set is_del=1 where spider_link="{author_url}"'
  440. mysql_con.update_values(up_sql)
  441. else:
  442. if is_del:
  443. repeat_list.append(author_url)
  444. else:
  445. up_sql = f'update crawler_author_map set task_id={task_id},is_del=1 where spider_link="{author_url}"'
  446. mysql_con.update_values(up_sql)
  447. else:
  448. create_user_list.append(author_url)
  449. if repeat_list:
  450. return success_list, fail_list, repeat_list
  451. else:
  452. for author_url in create_user_list:
  453. now_time = int(time.time())
  454. time_array = time.localtime(now_time)
  455. str_time = time.strftime("%Y%m%d", time_array)
  456. # 生成创建用户的tag
  457. tags = ''
  458. if task['task_type'] == 'author':
  459. spider_task = '账号'
  460. tags_list = ['spider', spider_task, spider_platform, user_tags, task['content_category_str'], str_time]
  461. elif task['task_type'] == 'search':
  462. spider_task = '搜索'
  463. tags_list = ['spider', spider_task, spider_platform, user_tags, author_url,
  464. task['content_category_str'], str_time]
  465. elif task['task_type'] == 'board':
  466. spider_task = '榜单'
  467. mode_tags = task['mode_board_str']
  468. tags_list = ['spider', spider_task, spider_platform, user_tags, mode_tags, task['content_category_str'],
  469. str_time]
  470. elif task['task_type'] == 'recommend':
  471. spider_task = '推荐'
  472. mode_tags = task['mode_name_str'] + task['mode_board_str']
  473. tags_list = ['spider', spider_task, spider_platform, user_tags, mode_tags, task['content_category_str'],
  474. str_time]
  475. else:
  476. tags_list = ['spider', spider_platform, user_tags, task['content_category_str'], str_time]
  477. for v in tags_list:
  478. if v:
  479. tags += str(v) + ','
  480. post_data = {
  481. # 'count': 1, # (必须)账号个数:传1
  482. # 'accountType': 4, # (必须)账号类型 :传 4 app虚拟账号
  483. 'pwd': '', # 密码 默认 12346
  484. 'nickName': '', # 昵称 默认 vuser......
  485. 'avatarUrl': '',
  486. # 头像Url 默认 http://weapppiccdn.yishihui.com/resources/images/pic_normal.png
  487. 'tagName': tags[:-1], # 多条数据用英文逗号分割
  488. }
  489. try:
  490. response = requests.post(url=conf['media_url'], params=post_data)
  491. media_id = response.json()['data']
  492. media_info = requests.get(url=conf['select_media_url'], params={'uid': media_id}, verify=False).json()[
  493. 'content']
  494. except Exception as e:
  495. logging.warning(f'创建账户:{spider_link},失败,原因:{e}')
  496. fail_list.append(author_url)
  497. continue
  498. data = dict(
  499. spider_link=author_url,
  500. media_id=media_id,
  501. media_name=media_info['longvideoNickName'] if media_info['longvideoNickName'] else media_info[
  502. 'nickName'],
  503. source=source,
  504. task_type=task_type,
  505. applets_status=applets_status,
  506. app_status=app_status,
  507. user_tag=user_tags,
  508. user_content_tag=user_content_tags,
  509. insert_time=int(time.time()),
  510. update_time=int(time.time()),
  511. create_user_time=now_time,
  512. mode_name_str=task['mode_name_str'],
  513. mode_board_str=task['mode_board_str'],
  514. content_category_str=task['content_category_str'],
  515. # mode_value_str=mode_value_str,
  516. task_id=task_id,
  517. media_main_url=conf['media_main_url'].format(media_id)
  518. )
  519. keys = ','.join(data.keys())
  520. values = ','.join(['%s'] * len(data))
  521. table = 'crawler_author_map'
  522. sql = f"""insert into {table}({keys}) VALUES({values})"""
  523. mysql_con.insert_values(sql, tuple(data.values()))
  524. uer_info = dict(
  525. outer_id=author_url,
  526. uid=media_id
  527. )
  528. success_list.append(uer_info)
  529. return success_list, fail_list, repeat_list
  530. @app.route("/v1/crawler/author/create", methods=["POST"])
  531. def createUser():
  532. spider_link = request.json.get('spider_link')
  533. source = request.json.get('source')
  534. task_type = request.json.get('task_type')
  535. applets_status = request.json.get('applets_status')
  536. app_status = request.json.get('app_status')
  537. user_tag = request.json.get('user_tag')
  538. user_content_tag = request.json.get('user_content_tag')
  539. success_list = list()
  540. fail_list = list()
  541. for author_url in spider_link:
  542. try:
  543. f_sql = f"""select spider_link from crawler_author_map where spider_link="{author_url}" """
  544. result = mysql_con.get_values(f_sql)
  545. if result:
  546. success_list.append(author_url)
  547. continue
  548. else:
  549. tag_name_list = []
  550. content_tag_list = []
  551. for tag in user_tag:
  552. tag_name_list.append(tag['tagName'])
  553. for tag in user_content_tag:
  554. content_tag_list.append(tag['tagName'])
  555. user_tags = ','.join(str(i) for i in tag_name_list)
  556. user_content_tags = ','.join(str(i) for i in content_tag_list)
  557. post_data = {
  558. # 'count': 1, # (必须)账号个数:传1
  559. # 'accountType': 4, # (必须)账号类型 :传 4 app虚拟账号
  560. 'pwd': '', # 密码 默认 12346
  561. 'nickName': '', # 昵称 默认 vuser......
  562. 'avatarUrl': '',
  563. # 头像Url 默认 http://weapppiccdn.yishihui.com/resources/images/pic_normal.png
  564. 'tagName': user_tags, # 多条数据用英文逗号分割
  565. }
  566. response = requests.post(url=conf['media_url'], params=post_data)
  567. media_id = response.json()['data']
  568. data = dict(
  569. spider_link=author_url,
  570. media_id=media_id,
  571. source=source,
  572. task_type=task_type,
  573. applets_status=applets_status,
  574. app_status=app_status,
  575. user_tag=user_tags,
  576. user_content_tag=user_content_tags,
  577. insert_time=int(time.time()),
  578. update_time=int(time.time())
  579. )
  580. keys = ','.join(data.keys())
  581. values = ','.join(['%s'] * len(data))
  582. table = 'crawler_author_map'
  583. sql = f"""insert into {table}({keys}) VALUES({values})"""
  584. result = mysql_con.insert_values(sql, tuple(data.values()))
  585. if not result:
  586. fail_list.append(author_url)
  587. else:
  588. success_list.append(author_url)
  589. except Exception as e:
  590. fail_list.append(author_url)
  591. continue
  592. return jsonify({'code': 200, 'result': {'success': success_list, 'fail': fail_list}})
  593. if __name__ == "__main__":
  594. app.run(debug=True, port=5050)