罗俊辉 11 месяцев назад
Родитель
Сommit
579f80005b
7 измененных файлов с 331 добавлено и 3 удалено
  1. 99 0
      feishu.py
  2. 124 0
      functions.py
  3. 0 3
      match_rate.py
  4. 51 0
      match_rate_app.py
  5. 1 0
      readme.md
  6. 4 0
      requirements.txt
  7. 52 0
      test.py

+ 99 - 0
feishu.py

@@ -0,0 +1,99 @@
+"""
+@author: luojunhui
+feishu python方法
+"""
+
+import requests
+
+
+def get_app_token():
+    """
+    获取飞书api token
+    :return:
+    """
+    url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
+    post_data = {
+        "app_id": "cli_a51114cf8bf8d00c",  # 这里账号密码是发布应用的后台账号及密码
+        "app_secret": "cNoTAqMpsAm7mPBcpCAXFfvOzCNL27fe",
+    }
+    response = requests.request("POST", url=url, data=post_data)
+    tenant_access_token = response.json()["tenant_access_token"]
+    print(tenant_access_token)
+    return tenant_access_token
+
+
+class Feishu(object):
+    """
+    feishu Python Object
+    """
+
+    def __init__(self, document_token):
+        self.headers = {"Content-Type": "application/json"}
+        self.document_token = document_token
+
+    def prepend_value(self, sheet_id, ranges, values):
+        """
+        在表的某一个sheet的ranges中插入数据,若该地方存在数据,会自动把已有的数据往下移动,再写如数据
+        :param sheet_id: 飞书表的唯一ID
+        :param ranges: 单元格位置的range, 从左上角到右下角, 两边都是闭区间
+        :param values: 二维数组, 用于填充ranges的空格数组
+        """
+        insert_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{}/values_prepend".format(
+            self.document_token
+        )
+        # print(get_app_token())
+        headers = {
+            "Authorization": "Bearer " + get_app_token(),
+            "contentType": "application/json; charset=utf-8",
+        }
+        body = {
+            "valueRange": {"range": "{}!{}".format(sheet_id, ranges), "values": values}
+        }
+        response = requests.request(
+            "POST", url=insert_value_url, headers=headers, json=body
+        )
+        print(response.json())
+
+    def insert_value(self, sheet_id, ranges, values):
+        """
+        插入数据
+        :param sheet_id:
+        :param ranges:
+        :param values:
+        :return:
+        """
+        insert_value_url = (
+            "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{}/values".format(
+                self.document_token
+            )
+        )
+        headers = {
+            "Authorization": "Bearer " + get_app_token(),
+            "contentType": "application/json; charset=utf-8",
+        }
+        body = {
+            "valueRange": {"range": "{}!{}".format(sheet_id, ranges), "values": values}
+        }
+        response = requests.request(
+            "PUT", url=insert_value_url, headers=headers, json=body
+        )
+        print(response.json())
+
+    def search_value(self, sheet_id, ab):
+        """
+        搜索
+        :param sheet_id:
+        :param ab:
+        :return:
+        """
+        ranges = "{}!{}".format(sheet_id, ab)
+        # print(ranges)
+        search_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{}/values/{}".format(
+            self.document_token, ranges
+        )
+        headers = {
+            "Authorization": "Bearer " + get_app_token(),
+            "contentType": "application/json; charset=utf-8",
+        }
+        response = requests.request("GET", url=search_url, headers=headers)
+        return response.json()

+ 124 - 0
functions.py

@@ -0,0 +1,124 @@
+"""
+@author: luojunhui
+"""
+
+import pymysql
+from datetime import datetime, timedelta
+
+
+class MatchRate(object):
+    """
+    匹配率
+    """
+
+    @classmethod
+    def generate_stamp_list(cls, start_date, end_date):
+        """
+        Generate daily date_str
+        :param start_date:
+        :param end_date:
+        :return:
+        """
+        start = datetime.strptime(start_date, "%Y%m%d")
+        end = datetime.strptime(end_date, "%Y%m%d")
+        current = start
+        timestamp_list = []
+        while current <= end:
+            timestamp_list.append(current.timestamp() * 1000)
+            current += timedelta(days=1)
+        return timestamp_list
+
+    @classmethod
+    def generate_today_stamp(cls, date_string):
+        """
+
+        :param date_string:
+        :return:
+        """
+        return datetime.strptime(date_string, "%Y%m%d").timestamp() * 1000
+
+    @classmethod
+    def generate_yesterday_stamp(cls, now_dt):
+        """
+        Generate date in 3 days
+        :param now_dt:
+        :return:
+        """
+        now_date = datetime.strptime(now_dt, "%Y%m%d")
+        yesterday = now_date - timedelta(days=1)
+        return yesterday.timestamp() * 1000
+
+    @classmethod
+    def match_rate(cls, start_time_stamp, end_time_stamp):
+        """
+        sensitive words
+        :return:
+        """
+        connection = pymysql.connect(
+            host="rm-t4na9qj85v7790tf84o.mysql.singapore.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+            port=3306,  # 端口号
+            user="crawler_readonly",  # mysql用户名
+            passwd="cyber#crawler_2023",  # mysql用户登录密码
+            db="aigc-admin-prod",  # 数据库名
+            charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+        )
+        sql = f"""
+            select status, trace_id, error_msg
+            from publish_content_miniprogram 
+            where create_timestamp >= {start_time_stamp} and create_timestamp < {end_time_stamp};
+            """
+        cursor = connection.cursor()
+        cursor.execute(sql)
+        data = cursor.fetchall()
+        result = [list(line) for line in data]
+        return result
+
+
+class RateDetail(object):
+    """
+    Rate Detail
+    """
+    @classmethod
+    def rate_and_error_list(cls, result_list):
+        """
+        成功,失败的数据
+        :param result_list:
+        :return:
+        """
+        success_count = 0
+        fail_count = 0
+        processing_count = 0
+        total_requests = len(result_list)
+        error_list = []
+        if result_list:
+            for temp in result_list:
+                status = temp[0]
+                error = temp[2]
+                if status == 1:
+                    processing_count += 1
+                elif status == 2:
+                    success_count += 1
+                elif status == 3:
+                    fail_count += 1
+                else:
+                    continue
+                if error:
+                    error_list.append(temp)
+            obj = {
+                "success_count": success_count,
+                "fail_count": fail_count,
+                "processing_count": processing_count,
+                "total_count": total_requests,
+                "error_list": error_list
+                }
+        else:
+            obj = {
+                "success_count": None,
+                "fail_count": None,
+                "processing_count": None,
+                "total_count": None,
+                "error_list": []
+                }
+        return obj
+
+

+ 0 - 3
match_rate.py

@@ -1,3 +0,0 @@
-"""
-@author: luojunhui
-"""

+ 51 - 0
match_rate_app.py

@@ -0,0 +1,51 @@
+"""
+@author: luojunhui
+"""
+import time
+import schedule
+from datetime import datetime
+
+from functions import MatchRate, RateDetail
+from feishu import Feishu
+
+
+def job():
+    """
+    定时任务方法
+    :return:
+    """
+    MR = MatchRate()
+    RD = RateDetail()
+    F = Feishu(document_token="QF4YsYNJHhSOy3t6OM7cYfrBnq3")
+    today_str = datetime.today().strftime("%Y%m%d")
+    e_time = MR.generate_today_stamp(today_str)
+    s_time = MR.generate_yesterday_stamp(today_str)
+    result_list = MR.match_rate(start_time_stamp=s_time, end_time_stamp=e_time)
+    result_obj = RD.rate_and_error_list(result_list)
+    rate_list = [
+        today_str,
+        result_obj['total_count'],
+        result_obj['success_count'],
+        result_obj['success_count'] / result_obj['total_count'] if result_obj['total_count'] else None,
+        result_obj['fail_count'],
+        result_obj['fail_count'] / result_obj['total_count'] if result_obj['total_count'] else None,
+        result_obj['processing_count'],
+        result_obj['processing_count'] / result_obj['total_count'] if result_obj['total_count'] else None,
+    ]
+    error_list = result_obj['error_list']
+    # insert rate_list
+    rate_sheet_id = "c65def"
+    F.prepend_value(sheet_id=rate_sheet_id, values=[["******"]], ranges="A2:A2")
+    F.insert_value(sheet_id=rate_sheet_id, values=[rate_list], ranges="A2:H2")
+    # insert error_list
+    error_sheet_id = "67wu2O"
+    for item in error_list:
+        F.prepend_value(sheet_id=error_sheet_id, values=[["*****"]], ranges="A2:A2")
+        F.insert_value(sheet_id=error_sheet_id, values=[[today_str] + item], ranges="A2:D2")
+
+
+if __name__ == '__main__':
+    schedule.every().day.at("01:00").do(job)
+    while True:
+        schedule.run_pending()
+        time.sleep(1)

+ 1 - 0
readme.md

@@ -0,0 +1 @@
+## 每天凌晨更新前一天匹配率&&匹配失败原因

+ 4 - 0
requirements.txt

@@ -0,0 +1,4 @@
+pymysql
+schedule
+requests
+

+ 52 - 0
test.py

@@ -0,0 +1,52 @@
+"""
+@author: luojunhui
+CREATE TABLE `publish_content_miniprogram` (
+  `publish_content_id` varchar(64) NOT NULL COMMENT '发布内容ID',
+  `root_share_id` varchar(64) DEFAULT NULL,
+  `source` varchar(64) DEFAULT NULL COMMENT '来源',
+  `program_id` varchar(64) DEFAULT NULL COMMENT '小程序ID',
+  `program_name` varchar(128) DEFAULT NULL COMMENT '小程序名称',
+  `program_avatar` varchar(512) DEFAULT NULL COMMENT '小程序头像',
+  `production_cover` varchar(1024) DEFAULT NULL COMMENT '卡片封面',
+  `production_name` varchar(255) DEFAULT NULL COMMENT '卡片标题',
+  `production_path` varchar(1024) DEFAULT NULL COMMENT '卡片路径',
+  `video_url` varchar(1024) DEFAULT NULL COMMENT '视频播放地址',
+  `hide_flag` int(11) DEFAULT NULL COMMENT '是否隐藏(0-否,1-是)',
+  `status` int(11) NOT NULL DEFAULT '2' COMMENT '状态(0-待处理,1-处理中,2-成功,3-失败)',
+  `trace_id` varchar(128) DEFAULT NULL COMMENT '请求ID',
+  `error_msg` varchar(2048) DEFAULT NULL COMMENT '错误信息',
+  `create_timestamp` bigint(20) DEFAULT NULL COMMENT '创建时间戳',
+  `update_timestamp` bigint(20) DEFAULT NULL COMMENT '更新时间戳',
+  PRIMARY KEY (`publish_content_id`),
+  KEY `idx_rootShareId` (`root_share_id`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='发布内容-插入小程序'
+"""
+import pymysql
+from functions import RateDetail
+
+
+def table_structure():
+    """
+    sensitive words
+    :return:
+    """
+    connection = pymysql.connect(
+        host="rm-t4na9qj85v7790tf84o.mysql.singapore.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+        port=3306,  # 端口号
+        user="crawler_readonly",  # mysql用户名
+        passwd="cyber#crawler_2023",  # mysql用户登录密码
+        db="aigc-admin-prod",  # 数据库名
+        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+    )
+    sql = "select status, trace_id, error_msg  from publish_content_miniprogram where create_timestamp > 1716739200000;"
+    cursor = connection.cursor()
+    cursor.execute(sql)
+    data = cursor.fetchall()
+    result = [list(line) for line in data]
+    return result
+
+
+result = table_structure()
+success, s_r, fail, f_r, processing, p_rate, error_list = RateDetail().each_rate(result)
+for error in error_list:
+    print(error)