123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495 |
- """
- @author: luojunhui
- cal each account && position reading rate
- """
- import json
- import time
- from tqdm import tqdm
- from datetime import datetime, timezone
- from pandas import DataFrame
- from applications import DeNetMysql, PQMySQL, longArticlesMySQL
- def timestamp_to_str(timestamp) -> str:
- """
- :param timestamp:
- """
- dt_object = datetime.utcfromtimestamp(timestamp).replace(tzinfo=timezone.utc).astimezone()
- date_string = dt_object.strftime('%Y-%m-%d')
- return date_string
- def get_account_fans_by_dt(db_client) -> dict:
- """
- 获取每个账号发粉丝,通过日期来区分
- :return:
- """
- sql = f"""
- SELECT
- t1.date_str,
- t1.fans_count,
- t2.gh_id
- FROM datastat_wx t1
- JOIN publish_account t2 ON t1.account_id = t2.id
- WHERE
- t2.channel = 5
- AND t2.status = 1
- AND t1.date_str >= '2024-07-01'
- ORDER BY t1.date_str;
- """
- result = db_client.select(sql)
- D = {}
- for line in result:
- dt = line[0]
- fans = line[1]
- gh_id = line[2]
- if D.get(gh_id):
- D[gh_id][dt] = fans
- else:
- D[gh_id] = {dt: fans}
- return D
- def get_account_articles_detail(db_client, gh_id_tuple) -> list[dict]:
- """
- get articles details
- :return:
- """
- sql = f"""
- SELECT
- ghId, accountName, updateTime, ItemIndex, show_view_count
- FROM
- official_articles_v2
- WHERE
- ghId IN {gh_id_tuple};
- """
- result = db_client.select(sql)
- response_list = [
- {
- "ghId": i[0],
- "accountName": i[1],
- "updateTime": i[2],
- "ItemIndex": i[3],
- "show_view_count": i[4]
- }
- for i in result
- ]
- return response_list
- def cal_account_read_rate(gh_id_tuple) -> DataFrame:
- """
- 计算账号位置的阅读率
- :return:
- """
- pq_db = PQMySQL()
- de_db = DeNetMysql()
- response = []
- fans_dict_each_day = get_account_fans_by_dt(db_client=de_db)
- account_article_detail = get_account_articles_detail(
- db_client=pq_db,
- gh_id_tuple=gh_id_tuple
- )
- for line in account_article_detail:
- gh_id = line['ghId']
- dt = timestamp_to_str(line['updateTime'])
- fans = fans_dict_each_day.get(gh_id, {}).get(dt, 0)
- line['fans'] = fans
- if fans:
- line['readRate'] = line['show_view_count'] / fans if fans else 0
- response.append(line)
- return DataFrame(response,
- columns=['ghId', 'accountName', 'updateTime', 'ItemIndex', 'show_view_count', 'readRate'])
- def cal_avg_account_read_rate(df, gh_id, index) -> tuple:
- """
- 计算账号的阅读率均值
- :return:
- """
- min_time = int(time.time()) - 60 * 24 * 3600
- max_time = int(time.time())
- filterDataFrame = df[
- (df["ghId"] == gh_id)
- & (min_time <= df["updateTime"])
- & (df["updateTime"] <= max_time)
- & (df['ItemIndex'] == index)
- ]
- return (
- filterDataFrame['readRate'].mean(),
- filterDataFrame['updateTime'].max(),
- filterDataFrame['updateTime'].min(),
- len(filterDataFrame)
- )
- def main() -> None:
- """
- main function
- :return:
- """
- lam = longArticlesMySQL()
- account_gh_id_list = [
- {
- "gh_id": "gh_9e559b3b94ca",
- "account_name": "票圈大事件"
- },
- {
- "gh_id": "gh_084a485e859a",
- "account_name": "生活情感叁读"
- },
- {
- "gh_id": "gh_1ee2e1b39ccf",
- "account_name": "票圈最新消息"
- },
- {
- "gh_id": "gh_4c058673c07e",
- "account_name": "家家生活指南"
- },
- {
- "gh_id": "gh_de9f9ebc976b",
- "account_name": "妙招持家帮手"
- },
- {
- "gh_id": "gh_058e41145a0c",
- "account_name": "多彩妙生活"
- },
- {
- "gh_id": "gh_7b4a5f86d68c",
- "account_name": "异闻趣事多"
- },
- {
- "gh_id": "gh_4568b5a7e2fe",
- "account_name": "窦都事说"
- },
- {
- "gh_id": "gh_adca24a8f429",
- "account_name": "史记趣言"
- },
- {
- "gh_id": "gh_e24da99dc899",
- "account_name": "缘来养心厅"
- },
- {
- "gh_id": "gh_e0eb490115f5",
- "account_name": "心灵情感驿站"
- },
- {
- "gh_id": "gh_d2cc901deca7",
- "account_name": "票圈极速版"
- },
- {
- "gh_id": "gh_26a307578776",
- "account_name": "票圈美文速递"
- },
- {
- "gh_id": "gh_183d80deffb8",
- "account_name": "生活良读"
- },
- {
- "gh_id": "gh_5ff48e9fb9ef",
- "account_name": "祝福养心厅"
- },
- {
- "gh_id": "gh_9f8dc5b0c74e",
- "account_name": "音药金曲厅"
- },
- {
- "gh_id": "gh_6d9f36e3a7be",
- "account_name": "音药养心馆"
- },
- {
- "gh_id": "gh_ac43e43b253b",
- "account_name": "小阳看天下"
- },
- {
- "gh_id": "gh_d5f935d0d1f2",
- "account_name": "繁花史阁"
- },
- {
- "gh_id": "gh_be8c29139989",
- "account_name": "退休无忧生活"
- },
- {
- "gh_id": "gh_c91b42649690",
- "account_name": "农耕趣时刻"
- },
- {
- "gh_id": "gh_93e00e187787",
- "account_name": "小惠爱厨房"
- },
- {
- "gh_id": "gh_744cb16f6e16",
- "account_name": "趣史论"
- },
- {
- "gh_id": "gh_9877c8541764",
- "account_name": "退休老年圈"
- },
- {
- "gh_id": "gh_0c89e11f8bf3",
- "account_name": "幸福启示"
- },
- {
- "gh_id": "gh_6d205db62f04",
- "account_name": "指尖奇文"
- },
- {
- "gh_id": "gh_c69776baf2cd",
- "account_name": "老友欢聚地"
- },
- {
- "gh_id": "gh_6b7c2a257263",
- "account_name": "幸福晚年知音"
- },
- {
- "gh_id": "gh_bfe5b705324a",
- "account_name": "奇趣百味生活"
- },
- {
- "gh_id": "gh_29074b51f2b7",
- "account_name": "老来生活家"
- },
- {
- "gh_id": "gh_7e5818b2dd83",
- "account_name": "便捷生活好方法"
- },
- {
- "gh_id": "gh_89ef4798d3ea",
- "account_name": "生活百态观"
- },
- {
- "gh_id": "gh_bff0bcb0694a",
- "account_name": "喜乐生活派"
- },
- {
- "gh_id": "gh_a2901d34f75b",
- "account_name": "畅聊奇闻"
- },
- {
- "gh_id": "gh_b15de7c99912",
- "account_name": "人生百事观"
- },
- {
- "gh_id": "gh_56ca3dae948c",
- "account_name": "老友闲谈"
- },
- {
- "gh_id": "gh_e75dbdc73d80",
- "account_name": "票圈正能量"
- },
- {
- "gh_id": "gh_192c9cf58b13",
- "account_name": "天天学生活技巧"
- },
- {
- "gh_id": "gh_6cfd1132df94",
- "account_name": "趣味晚年"
- },
- {
- "gh_id": "gh_f25b5fb01977",
- "account_name": "生活晓常识"
- },
- {
- "gh_id": "gh_080bb43aa0dc",
- "account_name": "态度说"
- },
- {
- "gh_id": "gh_d49df5e974ca",
- "account_name": "生活指示录"
- },
- {
- "gh_id": "gh_5ae65db96cb7",
- "account_name": "路边闲聊社"
- },
- {
- "gh_id": "gh_72bace6b3059",
- "account_name": "幸福妙招合集"
- },
- {
- "gh_id": "gh_9eef14ad6c16",
- "account_name": "快乐精选集"
- },
- {
- "gh_id": "gh_c5cdf60d9ab4",
- "account_name": "老友快乐谈"
- },
- {
- "gh_id": "gh_7f5075624a50",
- "account_name": "都市镜头"
- },
- {
- "gh_id": "gh_d4dffc34ac39",
- "account_name": "情为老友"
- },
- {
- "gh_id": "gh_ff487cb5dab3",
- "account_name": "趣味生活达人"
- },
- {
- "gh_id": "gh_1b27dd1beeca",
- "account_name": "小贝生活课堂"
- },
- {
- "gh_id": "gh_1d887d61088c",
- "account_name": "乐享生活小窍门"
- },
- {
- "gh_id": "gh_3ed305b5817f",
- "account_name": "看不够妙招"
- },
- {
- "gh_id": "gh_dd4c857bbb36",
- "account_name": "无忧自在生活"
- },
- {
- "gh_id": "gh_f902cea89e48",
- "account_name": "无忧潮生活"
- },
- {
- "gh_id": "gh_b676b7ad9b74",
- "account_name": "无忧生活小妙招"
- },
- {
- "gh_id": "gh_b6f2c5332c72",
- "account_name": "巷尾风声"
- },
- {
- "gh_id": "gh_ee78360d06f5",
- "account_name": "实用妙招800个"
- },
- {
- "gh_id": "gh_68e7fdc09fe4",
- "account_name": "史趣探秘"
- },
- {
- "gh_id": "gh_789a40fe7935",
- "account_name": "史记有言"
- },
- {
- "gh_id": "gh_77f36c109fb1",
- "account_name": "暖心一隅"
- },
- {
- "gh_id": "gh_ac43eb24376d",
- "account_name": "麒阁史记"
- },
- {
- "gh_id": "gh_969f5ea5fee1",
- "account_name": "心海情澜起"
- },
- {
- "gh_id": "gh_57573f01b2ee",
- "account_name": "那些历史"
- },
- {
- "gh_id": "gh_008ef23062ee",
- "account_name": "日常生活小技巧集"
- },
- {
- "gh_id": "gh_3e91f0624545",
- "account_name": "趣谈史记"
- },
- {
- "gh_id": "gh_30816d8adb52",
- "account_name": "日常巧思集"
- },
- {
- "gh_id": "gh_51e4ad40466d",
- "account_name": "日常小妙招秘籍"
- },
- {
- "gh_id": "gh_7c66e0dbd2cf",
- "account_name": "晚年家人"
- },
- {
- "gh_id": "gh_03d32e83122f",
- "account_name": "快乐生活妙技巧"
- },
- {
- "gh_id": "gh_0e4fd9e88386",
- "account_name": "持家有妙招"
- },
- {
- "gh_id": "gh_95ed5ecf9363",
- "account_name": "生活小优招"
- },
- {
- "gh_id": "gh_970460d9ccec",
- "account_name": "生活之大全"
- },
- {
- "gh_id": "gh_749271f1ccd5",
- "account_name": "轻松生活方法"
- },
- {
- "gh_id": "gh_660afe87b6fd",
- "account_name": "趣读奇事"
- },
- {
- "gh_id": "gh_03d45c260115",
- "account_name": "晚年多享乐"
- },
- {
- "gh_id": "gh_1686250f15b6",
- "account_name": "福康俏生活"
- },
- {
- "gh_id": "gh_98ec0ffe69b3",
- "account_name": "博史鉴览"
- },
- {
- "gh_id": "gh_2e615fa75ffb",
- "account_name": "好招妙"
- },
- {
- "gh_id": "gh_57c9e8babea7",
- "account_name": "福享暮年"
- },
- {
- "gh_id": "gh_bfea052b5baa",
- "account_name": "奇读趣史"
- },
- {
- "gh_id": "gh_6d3aa9d13402",
- "account_name": "悠读生活"
- }
- ]
- df = cal_account_read_rate(tuple([i['gh_id'] for i in account_gh_id_list]))
- index_list = [1, 2, 3, 4, 5, 6, 7, 8]
- for account in tqdm(account_gh_id_list):
- for index in index_list:
- avg_rate, max_time, min_time, a_count = cal_avg_account_read_rate(df, account['gh_id'], index)
- print(account['account_name'], "\t", index, "\t", avg_rate, "\t", max_time, "\t", min_time, "\t", a_count,
- "\t", account['gh_id'])
- try:
- if avg_rate == 0:
- continue
- update_sql = f"""
- INSERT INTO long_articles_read_rate
- (account_name, gh_id, position, read_rate_avg, remark, articles_count, earliest_publish_time, latest_publish_time, dt_version, is_delete)
- values
- (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
- """
- lam.update(
- sql=update_sql,
- params=(
- account['account_name'],
- account['gh_id'],
- index,
- avg_rate,
- "1022去掉粉丝为 0的计数",
- a_count,
- timestamp_to_str(min_time),
- timestamp_to_str(max_time),
- 1022,
- 0
- )
- )
- except Exception as e:
- print(e)
- if __name__ == '__main__':
- main()
|