1 سال پیش · 02285e9e84
--- a/functions.py
+++ b/functions.py
@@ -73,6 +73,33 @@ class MatchRate(object):
 
				         result = [list(line) for line in data]
			
 
				         return result
			
 
				 
			
 
				+    @classmethod
			
 
				+    def match_rate_origin(cls, start_time_stamp, end_time_stamp):
			
 
				+        """
			
 
				+        先前的匹配
			
 
				+        :param start_time_stamp:
			
 
				+        :param end_time_stamp:
			
 
				+        :return:
			
 
				+        """
			
 
				+        connection = pymysql.connect(
			
 
				+            host="rm-t4na9qj85v7790tf84o.mysql.singapore.rds.aliyuncs.com",  # 数据库IP地址，内网地址
			
 
				+            port=3306,  # 端口号
			
 
				+            user="crawler_readonly",  # mysql用户名
			
 
				+            passwd="cyber#crawler_2023",  # mysql用户登录密码
			
 
				+            db="aigc-admin-prod",  # 数据库名
			
 
				+            charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的，charset指定是utf8
			
 
				+        )
			
 
				+        sql = f"""
			
 
				+                    select publish_content_id, root_share_id, error_msg
			
 
				+                    from publish_content_miniprogram 
			
 
				+                    where create_timestamp >= {start_time_stamp} and create_timestamp < {end_time_stamp};
			
 
				+                    """
			
 
				+        cursor = connection.cursor()
			
 
				+        cursor.execute(sql)
			
 
				+        data = cursor.fetchall()
			
 
				+        result = [list(line) for line in data]
			
 
				+        return result
			
 
				+
			
 
				 
			
 
				 class RateDetail(object):
			
 
				     """
			
--- a/match_rate_app.py
+++ b/match_rate_app.py
@@ -23,7 +23,7 @@ def job():
 
				     result_list = MR.match_rate(start_time_stamp=s_time, end_time_stamp=e_time)
			
 
				     result_obj = RD.rate_and_error_list(result_list)
			
 
				     rate_list = [
			
 
				-        today_str,
			
 
				+        datetime.utcfromtimestamp(s_time).strftime("%Y%m%d"),
			
 
				         result_obj['total_count'],
			
 
				         result_obj['success_count'],
			
 
				         result_obj['success_count'] / result_obj['total_count'] if result_obj['total_count'] else None,
			
--- a/re_search.py
+++ b/re_search.py
@@ -0,0 +1,46 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
 
				+import time
			
 
				+import json
			
 
				+import pymysql
			
 
				+import requests
			
 
				+
			
 
				+
			
 
				+
			
 
				+trace_id = "search-08fa6f87-aaa2-4462-9f57-f5ca72219136-1716827402"
			
 
				+sql = f"""select trace_id, article_title, article_text, gh_id, account_name from long_articles_video where trace_id = '{trace_id}';"""
			
 
				+connection = pymysql.connect(
			
 
				+        host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",  # 数据库IP地址，内网地址
			
 
				+        port=3306,  # 端口号
			
 
				+        user="crawler",  # mysql用户名
			
 
				+        passwd="crawler123456@",  # mysql用户登录密码
			
 
				+        db="piaoquan-crawler",  # 数据库名
			
 
				+        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的，charset指定是utf8
			
 
				+    )
			
 
				+cursor = connection.cursor()
			
 
				+cursor.execute(sql)
			
 
				+out_video_list = cursor.fetchall()
			
 
				+result = out_video_list[0]
			
 
				+params = {
			
 
				+    "trace_id": result[0],
			
 
				+    "title": result[1],
			
 
				+    "ghId": result[3],
			
 
				+    "content": result[2],
			
 
				+    "accountName": result[4]
			
 
				+
			
 
				+}
			
 
				+# print(params)
			
 
				+url = "http://localhost:8111/re_search_videos"
			
 
				+
			
 
				+
			
 
				+a = time.time()
			
 
				+header = {
			
 
				+    "Content-Type": "application/json",
			
 
				+}
			
 
				+
			
 
				+response = requests.post(url, json=params, headers=header, timeout=600)
			
 
				+b = time.time()
			
 
				+print(response.text)
			
 
				+print(b - a)
			
 
				+print(json.dumps(response.json(), ensure_ascii=False, indent=4))
			
--- a/t.py
+++ b/t.py
@@ -0,0 +1,23 @@
 
				+import pandas as pd
			
 
				+import random
			
 
				+
			
 
				+df = pd.read_excel("result.xlsx")
			
 
				+columns = df.columns
			
 
				+data_list = df.values.tolist()
			
 
				+print(len(data_list))
			
 
				+
			
 
				+
			
 
				+# 定义范围
			
 
				+start, end = 0, 531
			
 
				+
			
 
				+# 从1到532中随机取出30个不重复的数字
			
 
				+random_numbers = random.sample(range(start, end + 1), 30)
			
 
				+
			
 
				+result = []
			
 
				+for i in random_numbers:
			
 
				+    print(data_list[i])
			
 
				+    result.append(data_list[i])
			
 
				+
			
 
				+out_df = pd.DataFrame(result, columns=columns)
			
 
				+out_df.to_excel("test.xlsx", index=False)
			
 
				+
			
--- a/test.py
+++ b/test.py
@@ -23,31 +23,26 @@ CREATE TABLE `publish_content_miniprogram` (
 
				 """
			
 
				 import json
			
 
				 
			
 
				-import pymysql
			
 
				-from functions import RateDetail
			
 
				+import pandas as pd
			
 
				+from datetime import datetime
			
 
				+from functions import RateDetail, MatchRate
			
 
				 
			
 
				-
			
 
				-def table_structure():
			
 
				-    """
			
 
				-    sensitive words
			
 
				-    :return:
			
 
				-    """
			
 
				-    connection = pymysql.connect(
			
 
				-        host="rm-t4na9qj85v7790tf84o.mysql.singapore.rds.aliyuncs.com",  # 数据库IP地址，内网地址
			
 
				-        port=3306,  # 端口号
			
 
				-        user="crawler_readonly",  # mysql用户名
			
 
				-        passwd="cyber#crawler_2023",  # mysql用户登录密码
			
 
				-        db="aigc-admin-prod",  # 数据库名
			
 
				-        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的，charset指定是utf8
			
 
				-    )
			
 
				-    sql = "select status, trace_id, error_msg  from publish_content_miniprogram where create_timestamp > 1716739200000;"
			
 
				-    cursor = connection.cursor()
			
 
				-    cursor.execute(sql)
			
 
				-    data = cursor.fetchall()
			
 
				-    result = [list(line) for line in data]
			
 
				-    return result
			
 
				-
			
 
				-
			
 
				-result = table_structure()
			
 
				-obj = RateDetail().rate_and_error_list(result_list=result)
			
 
				-print(json.dumps(obj, ensure_ascii=False, indent=4))
			
 
				+M = MatchRate()
			
 
				+R = RateDetail()
			
 
				+time_stamp_list = M.generate_stamp_list("20240528", "20240529")
			
 
				+df = []
			
 
				+for item in time_stamp_list:
			
 
				+    s_d = int(item)
			
 
				+    e_d = int(item) + 24 * 60 * 60 * 1000
			
 
				+    result = M.match_rate(s_d, e_d)
			
 
				+    s = 0
			
 
				+    f = 0
			
 
				+    p = 0
			
 
				+    for obj in result:
			
 
				+        if obj[0] == 2:
			
 
				+            s += 1
			
 
				+        elif obj[0] == 3:
			
 
				+            f += 1
			
 
				+        elif obj[0] == 1:
			
 
				+            p += 1
			
 
				+    print(s, f, p)