Просмотр исходного кода

Merge branch '2024-10-23-luojunhui-add-cal-read-avg' of luojunhui/LongArticlesJob into master

luojunhui 8 месяцев назад
Родитель
Сommit
34c4541eda
1 измененных файлов с 495 добавлено и 0 удалено
  1. 495 0
      not_used_tasks/cal_account_read_rate_avg.py

+ 495 - 0
not_used_tasks/cal_account_read_rate_avg.py

@@ -0,0 +1,495 @@
+"""
+@author: luojunhui
+cal each account && position reading rate
+"""
+import json
+import time
+from tqdm import tqdm
+from datetime import datetime, timezone
+from pandas import DataFrame
+
+from applications import DeNetMysql, PQMySQL, longArticlesMySQL
+
+
+def timestamp_to_str(timestamp) -> str:
+    """
+    :param timestamp:
+    """
+    dt_object = datetime.utcfromtimestamp(timestamp).replace(tzinfo=timezone.utc).astimezone()
+    date_string = dt_object.strftime('%Y-%m-%d')
+    return date_string
+
+
+def get_account_fans_by_dt(db_client) -> dict:
+    """
+    获取每个账号发粉丝,通过日期来区分
+    :return:
+    """
+    sql = f"""
+        SELECT 
+            t1.date_str, 
+            t1.fans_count, 
+            t2.gh_id
+        FROM datastat_wx t1
+        JOIN publish_account t2 ON t1.account_id = t2.id
+        WHERE 
+            t2.channel = 5 
+        AND t2.status = 1 
+        AND t1.date_str >= '2024-07-01' 
+        ORDER BY t1.date_str;
+    """
+    result = db_client.select(sql)
+    D = {}
+    for line in result:
+        dt = line[0]
+        fans = line[1]
+        gh_id = line[2]
+        if D.get(gh_id):
+            D[gh_id][dt] = fans
+        else:
+            D[gh_id] = {dt: fans}
+    return D
+
+
+def get_account_articles_detail(db_client, gh_id_tuple) -> list[dict]:
+    """
+    get articles details
+    :return:
+    """
+    sql = f"""
+            SELECT 
+                ghId, accountName, updateTime, ItemIndex, show_view_count 
+            FROM 
+                official_articles_v2
+            WHERE 
+                ghId IN {gh_id_tuple};
+            """
+    result = db_client.select(sql)
+    response_list = [
+        {
+            "ghId": i[0],
+            "accountName": i[1],
+            "updateTime": i[2],
+            "ItemIndex": i[3],
+            "show_view_count": i[4]
+        }
+        for i in result
+    ]
+    return response_list
+
+
+def cal_account_read_rate(gh_id_tuple) -> DataFrame:
+    """
+    计算账号位置的阅读率
+    :return:
+    """
+    pq_db = PQMySQL()
+    de_db = DeNetMysql()
+    response = []
+    fans_dict_each_day = get_account_fans_by_dt(db_client=de_db)
+    account_article_detail = get_account_articles_detail(
+        db_client=pq_db,
+        gh_id_tuple=gh_id_tuple
+    )
+    for line in account_article_detail:
+        gh_id = line['ghId']
+        dt = timestamp_to_str(line['updateTime'])
+        fans = fans_dict_each_day.get(gh_id, {}).get(dt, 0)
+        line['fans'] = fans
+        if fans:
+            line['readRate'] = line['show_view_count'] / fans if fans else 0
+            response.append(line)
+    return DataFrame(response,
+                     columns=['ghId', 'accountName', 'updateTime', 'ItemIndex', 'show_view_count', 'readRate'])
+
+
+def cal_avg_account_read_rate(df, gh_id, index) -> tuple:
+    """
+    计算账号的阅读率均值
+    :return:
+    """
+    min_time = int(time.time()) - 60 * 24 * 3600
+    max_time = int(time.time())
+    filterDataFrame = df[
+        (df["ghId"] == gh_id)
+        & (min_time <= df["updateTime"])
+        & (df["updateTime"] <= max_time)
+        & (df['ItemIndex'] == index)
+        ]
+    return (
+        filterDataFrame['readRate'].mean(),
+        filterDataFrame['updateTime'].max(),
+        filterDataFrame['updateTime'].min(),
+        len(filterDataFrame)
+    )
+
+
+def main() -> None:
+    """
+    main function
+    :return:
+    """
+    lam = longArticlesMySQL()
+    account_gh_id_list = [
+        {
+            "gh_id": "gh_9e559b3b94ca",
+            "account_name": "票圈大事件"
+        },
+        {
+            "gh_id": "gh_084a485e859a",
+            "account_name": "生活情感叁读"
+        },
+        {
+            "gh_id": "gh_1ee2e1b39ccf",
+            "account_name": "票圈最新消息"
+        },
+        {
+            "gh_id": "gh_4c058673c07e",
+            "account_name": "家家生活指南"
+        },
+        {
+            "gh_id": "gh_de9f9ebc976b",
+            "account_name": "妙招持家帮手"
+        },
+        {
+            "gh_id": "gh_058e41145a0c",
+            "account_name": "多彩妙生活"
+        },
+        {
+            "gh_id": "gh_7b4a5f86d68c",
+            "account_name": "异闻趣事多"
+        },
+        {
+            "gh_id": "gh_4568b5a7e2fe",
+            "account_name": "窦都事说"
+        },
+        {
+            "gh_id": "gh_adca24a8f429",
+            "account_name": "史记趣言"
+        },
+        {
+            "gh_id": "gh_e24da99dc899",
+            "account_name": "缘来养心厅"
+        },
+        {
+            "gh_id": "gh_e0eb490115f5",
+            "account_name": "心灵情感驿站"
+        },
+        {
+            "gh_id": "gh_d2cc901deca7",
+            "account_name": "票圈极速版"
+        },
+        {
+            "gh_id": "gh_26a307578776",
+            "account_name": "票圈美文速递"
+        },
+        {
+            "gh_id": "gh_183d80deffb8",
+            "account_name": "生活良读"
+        },
+        {
+            "gh_id": "gh_5ff48e9fb9ef",
+            "account_name": "祝福养心厅"
+        },
+        {
+            "gh_id": "gh_9f8dc5b0c74e",
+            "account_name": "音药金曲厅"
+        },
+        {
+            "gh_id": "gh_6d9f36e3a7be",
+            "account_name": "音药养心馆"
+        },
+        {
+            "gh_id": "gh_ac43e43b253b",
+            "account_name": "小阳看天下"
+        },
+        {
+            "gh_id": "gh_d5f935d0d1f2",
+            "account_name": "繁花史阁"
+        },
+        {
+            "gh_id": "gh_be8c29139989",
+            "account_name": "退休无忧生活"
+        },
+        {
+            "gh_id": "gh_c91b42649690",
+            "account_name": "农耕趣时刻"
+        },
+        {
+            "gh_id": "gh_93e00e187787",
+            "account_name": "小惠爱厨房"
+        },
+        {
+            "gh_id": "gh_744cb16f6e16",
+            "account_name": "趣史论"
+        },
+        {
+            "gh_id": "gh_9877c8541764",
+            "account_name": "退休老年圈"
+        },
+        {
+            "gh_id": "gh_0c89e11f8bf3",
+            "account_name": "幸福启示"
+        },
+        {
+            "gh_id": "gh_6d205db62f04",
+            "account_name": "指尖奇文"
+        },
+        {
+            "gh_id": "gh_c69776baf2cd",
+            "account_name": "老友欢聚地"
+        },
+        {
+            "gh_id": "gh_6b7c2a257263",
+            "account_name": "幸福晚年知音"
+        },
+        {
+            "gh_id": "gh_bfe5b705324a",
+            "account_name": "奇趣百味生活"
+        },
+        {
+            "gh_id": "gh_29074b51f2b7",
+            "account_name": "老来生活家"
+        },
+        {
+            "gh_id": "gh_7e5818b2dd83",
+            "account_name": "便捷生活好方法"
+        },
+        {
+            "gh_id": "gh_89ef4798d3ea",
+            "account_name": "生活百态观"
+        },
+        {
+            "gh_id": "gh_bff0bcb0694a",
+            "account_name": "喜乐生活派"
+        },
+        {
+            "gh_id": "gh_a2901d34f75b",
+            "account_name": "畅聊奇闻"
+        },
+        {
+            "gh_id": "gh_b15de7c99912",
+            "account_name": "人生百事观"
+        },
+        {
+            "gh_id": "gh_56ca3dae948c",
+            "account_name": "老友闲谈"
+        },
+        {
+            "gh_id": "gh_e75dbdc73d80",
+            "account_name": "票圈正能量"
+        },
+        {
+            "gh_id": "gh_192c9cf58b13",
+            "account_name": "天天学生活技巧"
+        },
+        {
+            "gh_id": "gh_6cfd1132df94",
+            "account_name": "趣味晚年"
+        },
+        {
+            "gh_id": "gh_f25b5fb01977",
+            "account_name": "生活晓常识"
+        },
+        {
+            "gh_id": "gh_080bb43aa0dc",
+            "account_name": "态度说"
+        },
+        {
+            "gh_id": "gh_d49df5e974ca",
+            "account_name": "生活指示录"
+        },
+        {
+            "gh_id": "gh_5ae65db96cb7",
+            "account_name": "路边闲聊社"
+        },
+        {
+            "gh_id": "gh_72bace6b3059",
+            "account_name": "幸福妙招合集"
+        },
+        {
+            "gh_id": "gh_9eef14ad6c16",
+            "account_name": "快乐精选集"
+        },
+        {
+            "gh_id": "gh_c5cdf60d9ab4",
+            "account_name": "老友快乐谈"
+        },
+        {
+            "gh_id": "gh_7f5075624a50",
+            "account_name": "都市镜头"
+        },
+        {
+            "gh_id": "gh_d4dffc34ac39",
+            "account_name": "情为老友"
+        },
+        {
+            "gh_id": "gh_ff487cb5dab3",
+            "account_name": "趣味生活达人"
+        },
+        {
+            "gh_id": "gh_1b27dd1beeca",
+            "account_name": "小贝生活课堂"
+        },
+        {
+            "gh_id": "gh_1d887d61088c",
+            "account_name": "乐享生活小窍门"
+        },
+        {
+            "gh_id": "gh_3ed305b5817f",
+            "account_name": "看不够妙招"
+        },
+        {
+            "gh_id": "gh_dd4c857bbb36",
+            "account_name": "无忧自在生活"
+        },
+        {
+            "gh_id": "gh_f902cea89e48",
+            "account_name": "无忧潮生活"
+        },
+        {
+            "gh_id": "gh_b676b7ad9b74",
+            "account_name": "无忧生活小妙招"
+        },
+        {
+            "gh_id": "gh_b6f2c5332c72",
+            "account_name": "巷尾风声"
+        },
+        {
+            "gh_id": "gh_ee78360d06f5",
+            "account_name": "实用妙招800个"
+        },
+        {
+            "gh_id": "gh_68e7fdc09fe4",
+            "account_name": "史趣探秘"
+        },
+        {
+            "gh_id": "gh_789a40fe7935",
+            "account_name": "史记有言"
+        },
+        {
+            "gh_id": "gh_77f36c109fb1",
+            "account_name": "暖心一隅"
+        },
+        {
+            "gh_id": "gh_ac43eb24376d",
+            "account_name": "麒阁史记"
+        },
+        {
+            "gh_id": "gh_969f5ea5fee1",
+            "account_name": "心海情澜起"
+        },
+        {
+            "gh_id": "gh_57573f01b2ee",
+            "account_name": "那些历史"
+        },
+        {
+            "gh_id": "gh_008ef23062ee",
+            "account_name": "日常生活小技巧集"
+        },
+        {
+            "gh_id": "gh_3e91f0624545",
+            "account_name": "趣谈史记"
+        },
+        {
+            "gh_id": "gh_30816d8adb52",
+            "account_name": "日常巧思集"
+        },
+        {
+            "gh_id": "gh_51e4ad40466d",
+            "account_name": "日常小妙招秘籍"
+        },
+        {
+            "gh_id": "gh_7c66e0dbd2cf",
+            "account_name": "晚年家人"
+        },
+        {
+            "gh_id": "gh_03d32e83122f",
+            "account_name": "快乐生活妙技巧"
+        },
+        {
+            "gh_id": "gh_0e4fd9e88386",
+            "account_name": "持家有妙招"
+        },
+        {
+            "gh_id": "gh_95ed5ecf9363",
+            "account_name": "生活小优招"
+        },
+        {
+            "gh_id": "gh_970460d9ccec",
+            "account_name": "生活之大全"
+        },
+        {
+            "gh_id": "gh_749271f1ccd5",
+            "account_name": "轻松生活方法"
+        },
+        {
+            "gh_id": "gh_660afe87b6fd",
+            "account_name": "趣读奇事"
+        },
+        {
+            "gh_id": "gh_03d45c260115",
+            "account_name": "晚年多享乐"
+        },
+        {
+            "gh_id": "gh_1686250f15b6",
+            "account_name": "福康俏生活"
+        },
+        {
+            "gh_id": "gh_98ec0ffe69b3",
+            "account_name": "博史鉴览"
+        },
+        {
+            "gh_id": "gh_2e615fa75ffb",
+            "account_name": "好招妙"
+        },
+        {
+            "gh_id": "gh_57c9e8babea7",
+            "account_name": "福享暮年"
+        },
+        {
+            "gh_id": "gh_bfea052b5baa",
+            "account_name": "奇读趣史"
+        },
+        {
+            "gh_id": "gh_6d3aa9d13402",
+            "account_name": "悠读生活"
+        }
+    ]
+    df = cal_account_read_rate(tuple([i['gh_id'] for i in account_gh_id_list]))
+    index_list = [1, 2, 3, 4, 5, 6, 7, 8]
+    for account in tqdm(account_gh_id_list):
+        for index in index_list:
+            avg_rate, max_time, min_time, a_count = cal_avg_account_read_rate(df, account['gh_id'], index)
+            print(account['account_name'], "\t", index, "\t", avg_rate, "\t", max_time, "\t", min_time, "\t", a_count,
+                  "\t", account['gh_id'])
+            try:
+                if avg_rate == 0:
+                    continue
+                update_sql = f"""
+                    INSERT INTO long_articles_read_rate
+                    (account_name, gh_id, position, read_rate_avg, remark, articles_count, earliest_publish_time, latest_publish_time, dt_version, is_delete)
+                    values
+                    (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
+                """
+                lam.update(
+                    sql=update_sql,
+                    params=(
+                        account['account_name'],
+                        account['gh_id'],
+                        index,
+                        avg_rate,
+                        "1022去掉粉丝为 0的计数",
+                        a_count,
+                        timestamp_to_str(min_time),
+                        timestamp_to_str(max_time),
+                        1022,
+                        0
+                    )
+                )
+            except Exception as e:
+                print(e)
+
+
+if __name__ == '__main__':
+    main()