Browse Source

匹配率监测上线

罗俊辉 11 months ago
parent
commit
02285e9e84
5 changed files with 119 additions and 28 deletions
  1. 27 0
      functions.py
  2. 1 1
      match_rate_app.py
  3. 46 0
      re_search.py
  4. 23 0
      t.py
  5. 22 27
      test.py

+ 27 - 0
functions.py

@@ -73,6 +73,33 @@ class MatchRate(object):
         result = [list(line) for line in data]
         return result
 
+    @classmethod
+    def match_rate_origin(cls, start_time_stamp, end_time_stamp):
+        """
+        先前的匹配
+        :param start_time_stamp:
+        :param end_time_stamp:
+        :return:
+        """
+        connection = pymysql.connect(
+            host="rm-t4na9qj85v7790tf84o.mysql.singapore.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+            port=3306,  # 端口号
+            user="crawler_readonly",  # mysql用户名
+            passwd="cyber#crawler_2023",  # mysql用户登录密码
+            db="aigc-admin-prod",  # 数据库名
+            charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+        )
+        sql = f"""
+                    select publish_content_id, root_share_id, error_msg
+                    from publish_content_miniprogram 
+                    where create_timestamp >= {start_time_stamp} and create_timestamp < {end_time_stamp};
+                    """
+        cursor = connection.cursor()
+        cursor.execute(sql)
+        data = cursor.fetchall()
+        result = [list(line) for line in data]
+        return result
+
 
 class RateDetail(object):
     """

+ 1 - 1
match_rate_app.py

@@ -23,7 +23,7 @@ def job():
     result_list = MR.match_rate(start_time_stamp=s_time, end_time_stamp=e_time)
     result_obj = RD.rate_and_error_list(result_list)
     rate_list = [
-        today_str,
+        datetime.utcfromtimestamp(s_time).strftime("%Y%m%d"),
         result_obj['total_count'],
         result_obj['success_count'],
         result_obj['success_count'] / result_obj['total_count'] if result_obj['total_count'] else None,

+ 46 - 0
re_search.py

@@ -0,0 +1,46 @@
+"""
+@author: luojunhui
+"""
+import time
+import json
+import pymysql
+import requests
+
+
+
+trace_id = "search-08fa6f87-aaa2-4462-9f57-f5ca72219136-1716827402"
+sql = f"""select trace_id, article_title, article_text, gh_id, account_name from long_articles_video where trace_id = '{trace_id}';"""
+connection = pymysql.connect(
+        host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+        port=3306,  # 端口号
+        user="crawler",  # mysql用户名
+        passwd="crawler123456@",  # mysql用户登录密码
+        db="piaoquan-crawler",  # 数据库名
+        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+    )
+cursor = connection.cursor()
+cursor.execute(sql)
+out_video_list = cursor.fetchall()
+result = out_video_list[0]
+params = {
+    "trace_id": result[0],
+    "title": result[1],
+    "ghId": result[3],
+    "content": result[2],
+    "accountName": result[4]
+
+}
+# print(params)
+url = "http://localhost:8111/re_search_videos"
+
+
+a = time.time()
+header = {
+    "Content-Type": "application/json",
+}
+
+response = requests.post(url, json=params, headers=header, timeout=600)
+b = time.time()
+print(response.text)
+print(b - a)
+print(json.dumps(response.json(), ensure_ascii=False, indent=4))

+ 23 - 0
t.py

@@ -0,0 +1,23 @@
+import pandas as pd
+import random
+
+df = pd.read_excel("result.xlsx")
+columns = df.columns
+data_list = df.values.tolist()
+print(len(data_list))
+
+
+# 定义范围
+start, end = 0, 531
+
+# 从1到532中随机取出30个不重复的数字
+random_numbers = random.sample(range(start, end + 1), 30)
+
+result = []
+for i in random_numbers:
+    print(data_list[i])
+    result.append(data_list[i])
+
+out_df = pd.DataFrame(result, columns=columns)
+out_df.to_excel("test.xlsx", index=False)
+

+ 22 - 27
test.py

@@ -23,31 +23,26 @@ CREATE TABLE `publish_content_miniprogram` (
 """
 import json
 
-import pymysql
-from functions import RateDetail
+import pandas as pd
+from datetime import datetime
+from functions import RateDetail, MatchRate
 
-
-def table_structure():
-    """
-    sensitive words
-    :return:
-    """
-    connection = pymysql.connect(
-        host="rm-t4na9qj85v7790tf84o.mysql.singapore.rds.aliyuncs.com",  # 数据库IP地址,内网地址
-        port=3306,  # 端口号
-        user="crawler_readonly",  # mysql用户名
-        passwd="cyber#crawler_2023",  # mysql用户登录密码
-        db="aigc-admin-prod",  # 数据库名
-        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
-    )
-    sql = "select status, trace_id, error_msg  from publish_content_miniprogram where create_timestamp > 1716739200000;"
-    cursor = connection.cursor()
-    cursor.execute(sql)
-    data = cursor.fetchall()
-    result = [list(line) for line in data]
-    return result
-
-
-result = table_structure()
-obj = RateDetail().rate_and_error_list(result_list=result)
-print(json.dumps(obj, ensure_ascii=False, indent=4))
+M = MatchRate()
+R = RateDetail()
+time_stamp_list = M.generate_stamp_list("20240528", "20240529")
+df = []
+for item in time_stamp_list:
+    s_d = int(item)
+    e_d = int(item) + 24 * 60 * 60 * 1000
+    result = M.match_rate(s_d, e_d)
+    s = 0
+    f = 0
+    p = 0
+    for obj in result:
+        if obj[0] == 2:
+            s += 1
+        elif obj[0] == 3:
+            f += 1
+        elif obj[0] == 1:
+            p += 1
+    print(s, f, p)