research_app.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. # encodings : utf-8
  2. """
  3. @author: luojunhui
  4. """
  5. import datetime
  6. import time
  7. import json
  8. import pymysql
  9. import requests
  10. from functions import MatchRate
  11. def find_fail_data(dt):
  12. """
  13. 查找1h失败的数据
  14. :param dt:
  15. :return:
  16. """
  17. M = MatchRate()
  18. time_stamp_list = M.generate_stamp_list(dt, dt)
  19. c = 0
  20. for item in time_stamp_list:
  21. s_d = int(item)
  22. e_d = int(item) + 24 * 60 * 60 * 1000
  23. result = M.match_rate(s_d, e_d)
  24. s = 0
  25. f = 0
  26. p = 0
  27. w = []
  28. for obj in result:
  29. if obj[0] == 2:
  30. s += 1
  31. elif obj[0] == 3:
  32. f += 1
  33. elif obj[0] == 1:
  34. p += 1
  35. w.append(obj[1])
  36. c += 1
  37. long_time_data = [i for i in w if int(time.time()) - int(i.split("-")[-1]) > 3600]
  38. return tuple(long_time_data)
  39. def find_defeat_info(trace_id_tuple):
  40. """
  41. 查找失败的视频
  42. :return:
  43. """
  44. select_sql = f"""
  45. select trace_id, article_title, gh_id, `kimi_summary` , `kimi_keys`
  46. from `long_articles_video` where `trace_id` in {trace_id_tuple};
  47. """
  48. connection = pymysql.connect(
  49. host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  50. port=3306, # 端口号
  51. user="crawler", # mysql用户名
  52. passwd="crawler123456@", # mysql用户登录密码
  53. db="piaoquan-crawler", # 数据库名
  54. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  55. )
  56. cursor = connection.cursor()
  57. cursor.execute(select_sql)
  58. fail_list = cursor.fetchall()
  59. return fail_list
  60. def request_for_research(result):
  61. """
  62. research from new machine
  63. :param result:
  64. """
  65. params = {
  66. "trace_id": result[0],
  67. "title": result[1],
  68. "ghId": result[2],
  69. "kimi_summary": result[3],
  70. "kimi_keys": result[4]
  71. }
  72. url = "http://47.99.132.47:8111/re_search_videos"
  73. a = time.time()
  74. header = {
  75. "Content-Type": "application/json",
  76. }
  77. response = requests.post(url, json=params, headers=header, timeout=600)
  78. b = time.time()
  79. print("total cost: ", b - a, " s")
  80. print(json.dumps(response.json(), ensure_ascii=False, indent=4))
  81. def job2():
  82. """
  83. 定时任务
  84. :return:
  85. """
  86. date_str = datetime.datetime.today().strftime("%Y%m%d")
  87. trace_id_t = find_fail_data(dt=date_str)
  88. if trace_id_t:
  89. fail_list = find_defeat_info(trace_id_t)
  90. now_time_str = datetime.datetime.now().__str__()
  91. if fail_list:
  92. print("{} find {} defeat requests".format(now_time_str, len(fail_list)))
  93. count = 1
  94. for obj in fail_list:
  95. print(obj)
  96. request_for_research(obj)
  97. count += 1
  98. print("{} success re_search {} defeat requests".format(now_time_str, count))
  99. else:
  100. print("{} No videos Find".format(now_time_str))
  101. if __name__ == '__main__':
  102. while True:
  103. now_time_str = datetime.datetime.now().__str__()
  104. job2()
  105. print("{}: 执行程序完成, 等待一小时".format(now_time_str))
  106. time.sleep(60 * 60)