functions.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. """
  2. @author: luojunhui
  3. """
  4. import pymysql
  5. from datetime import datetime, timedelta
  6. class MatchRate(object):
  7. """
  8. 匹配率
  9. """
  10. @classmethod
  11. def generate_stamp_list(cls, start_date, end_date):
  12. """
  13. Generate daily date_str
  14. :param start_date:
  15. :param end_date:
  16. :return:
  17. """
  18. start = datetime.strptime(start_date, "%Y%m%d")
  19. end = datetime.strptime(end_date, "%Y%m%d")
  20. current = start
  21. timestamp_list = []
  22. while current <= end:
  23. timestamp_list.append(current.timestamp() * 1000)
  24. current += timedelta(days=1)
  25. return timestamp_list
  26. @classmethod
  27. def generate_today_stamp(cls, date_string):
  28. """
  29. :param date_string:
  30. :return:
  31. """
  32. return datetime.strptime(date_string, "%Y%m%d").timestamp() * 1000
  33. @classmethod
  34. def generate_yesterday_stamp(cls, now_dt):
  35. """
  36. Generate date in 3 days
  37. :param now_dt:
  38. :return:
  39. """
  40. now_date = datetime.strptime(now_dt, "%Y%m%d")
  41. yesterday = now_date - timedelta(days=1)
  42. return yesterday.timestamp() * 1000
  43. @classmethod
  44. def match_rate(cls, start_time_stamp, end_time_stamp):
  45. """
  46. sensitive words
  47. :return:
  48. """
  49. connection = pymysql.connect(
  50. host="rm-t4na9qj85v7790tf84o.mysql.singapore.rds.aliyuncs.com", # 数据库IP地址,内网地址
  51. port=3306, # 端口号
  52. user="crawler_readonly", # mysql用户名
  53. passwd="cyber#crawler_2023", # mysql用户登录密码
  54. db="aigc-admin-prod", # 数据库名
  55. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  56. )
  57. sql = f"""
  58. select status, trace_id, error_msg
  59. from publish_content_miniprogram
  60. where create_timestamp >= {start_time_stamp} and create_timestamp < {end_time_stamp};
  61. """
  62. cursor = connection.cursor()
  63. cursor.execute(sql)
  64. data = cursor.fetchall()
  65. result = [list(line) for line in data]
  66. return result
  67. @classmethod
  68. def match_rate_origin(cls, start_time_stamp, end_time_stamp):
  69. """
  70. 先前的匹配
  71. :param start_time_stamp:
  72. :param end_time_stamp:
  73. :return:
  74. """
  75. connection = pymysql.connect(
  76. host="rm-t4na9qj85v7790tf84o.mysql.singapore.rds.aliyuncs.com", # 数据库IP地址,内网地址
  77. port=3306, # 端口号
  78. user="crawler_readonly", # mysql用户名
  79. passwd="cyber#crawler_2023", # mysql用户登录密码
  80. db="aigc-admin-prod", # 数据库名
  81. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  82. )
  83. sql = f"""
  84. select publish_content_id, root_share_id, error_msg
  85. from publish_content_miniprogram
  86. where create_timestamp >= {start_time_stamp} and create_timestamp < {end_time_stamp};
  87. """
  88. cursor = connection.cursor()
  89. cursor.execute(sql)
  90. data = cursor.fetchall()
  91. result = [list(line) for line in data]
  92. return result
  93. class RateDetail(object):
  94. """
  95. Rate Detail
  96. """
  97. @classmethod
  98. def rate_and_error_list(cls, result_list):
  99. """
  100. 成功,失败的数据
  101. :param result_list:
  102. :return:
  103. """
  104. success_count = 0
  105. fail_count = 0
  106. processing_count = 0
  107. total_requests = len(result_list)
  108. error_list = []
  109. if result_list:
  110. for temp in result_list:
  111. status = temp[0]
  112. error = temp[2]
  113. if status == 1:
  114. processing_count += 1
  115. elif status == 2:
  116. success_count += 1
  117. elif status == 3:
  118. fail_count += 1
  119. else:
  120. continue
  121. if error:
  122. error_list.append(temp)
  123. obj = {
  124. "success_count": success_count,
  125. "fail_count": fail_count,
  126. "processing_count": processing_count,
  127. "total_count": total_requests,
  128. "error_list": error_list
  129. }
  130. else:
  131. obj = {
  132. "success_count": None,
  133. "fail_count": None,
  134. "processing_count": None,
  135. "total_count": None,
  136. "error_list": []
  137. }
  138. return obj