research_app.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. """
  2. @author: luojunhui
  3. """
  4. import time
  5. import json
  6. import schedule
  7. import pymysql
  8. import requests
  9. from datetime import datetime, timedelta
  10. def find_defeat_info():
  11. """
  12. 查找失败的视频
  13. :return:
  14. """
  15. now_dt = datetime.utcfromtimestamp(int(time.time()) - 1 * 60 * 60)
  16. beijing_time = now_dt + timedelta(hours=8)
  17. today_dt = datetime.today().strftime("%Y-%m-%d")
  18. select_sql = f"""
  19. select trace_id, article_title, article_text, gh_id, account_name
  20. from long_articles_video
  21. where update_time < '{beijing_time}' and update_time > '{today_dt}' and success = 0;"""
  22. connection = pymysql.connect(
  23. host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  24. port=3306, # 端口号
  25. user="crawler", # mysql用户名
  26. passwd="crawler123456@", # mysql用户登录密码
  27. db="piaoquan-crawler", # 数据库名
  28. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  29. )
  30. cursor = connection.cursor()
  31. cursor.execute(select_sql)
  32. fail_list = cursor.fetchall()
  33. return fail_list
  34. def job2():
  35. """
  36. 定时任务
  37. :return:
  38. """
  39. fail_list = find_defeat_info()
  40. if fail_list:
  41. for result in fail_list:
  42. params = {
  43. "trace_id": result[0],
  44. "title": result[1],
  45. "ghId": result[3],
  46. "content": result[2],
  47. "accountName": result[4]
  48. }
  49. url = "http://61.48.133.26:8111/re_search_videos"
  50. a = time.time()
  51. header = {
  52. "Content-Type": "application/json",
  53. }
  54. response = requests.post(url, json=params, headers=header, timeout=600)
  55. b = time.time()
  56. print(response.text)
  57. print(b - a)
  58. print(json.dumps(response.json(), ensure_ascii=False, indent=4))
  59. time.sleep(20)
  60. else:
  61. print("No videos")
  62. if __name__ == '__main__':
  63. schedule.every().hour.do(job2)
  64. while True:
  65. schedule.run_pending()
  66. time.sleep(1)