cal_account_read_rate_avg.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495
  1. """
  2. @author: luojunhui
  3. cal each account && position reading rate
  4. """
  5. import json
  6. import time
  7. from tqdm import tqdm
  8. from datetime import datetime, timezone
  9. from pandas import DataFrame
  10. from applications import DeNetMysql, PQMySQL, longArticlesMySQL
  11. def timestamp_to_str(timestamp) -> str:
  12. """
  13. :param timestamp:
  14. """
  15. dt_object = datetime.utcfromtimestamp(timestamp).replace(tzinfo=timezone.utc).astimezone()
  16. date_string = dt_object.strftime('%Y-%m-%d')
  17. return date_string
  18. def get_account_fans_by_dt(db_client) -> dict:
  19. """
  20. 获取每个账号发粉丝,通过日期来区分
  21. :return:
  22. """
  23. sql = f"""
  24. SELECT
  25. t1.date_str,
  26. t1.fans_count,
  27. t2.gh_id
  28. FROM datastat_wx t1
  29. JOIN publish_account t2 ON t1.account_id = t2.id
  30. WHERE
  31. t2.channel = 5
  32. AND t2.status = 1
  33. AND t1.date_str >= '2024-07-01'
  34. ORDER BY t1.date_str;
  35. """
  36. result = db_client.select(sql)
  37. D = {}
  38. for line in result:
  39. dt = line[0]
  40. fans = line[1]
  41. gh_id = line[2]
  42. if D.get(gh_id):
  43. D[gh_id][dt] = fans
  44. else:
  45. D[gh_id] = {dt: fans}
  46. return D
  47. def get_account_articles_detail(db_client, gh_id_tuple) -> list[dict]:
  48. """
  49. get articles details
  50. :return:
  51. """
  52. sql = f"""
  53. SELECT
  54. ghId, accountName, updateTime, ItemIndex, show_view_count
  55. FROM
  56. official_articles_v2
  57. WHERE
  58. ghId IN {gh_id_tuple};
  59. """
  60. result = db_client.select(sql)
  61. response_list = [
  62. {
  63. "ghId": i[0],
  64. "accountName": i[1],
  65. "updateTime": i[2],
  66. "ItemIndex": i[3],
  67. "show_view_count": i[4]
  68. }
  69. for i in result
  70. ]
  71. return response_list
  72. def cal_account_read_rate(gh_id_tuple) -> DataFrame:
  73. """
  74. 计算账号位置的阅读率
  75. :return:
  76. """
  77. pq_db = PQMySQL()
  78. de_db = DeNetMysql()
  79. response = []
  80. fans_dict_each_day = get_account_fans_by_dt(db_client=de_db)
  81. account_article_detail = get_account_articles_detail(
  82. db_client=pq_db,
  83. gh_id_tuple=gh_id_tuple
  84. )
  85. for line in account_article_detail:
  86. gh_id = line['ghId']
  87. dt = timestamp_to_str(line['updateTime'])
  88. fans = fans_dict_each_day.get(gh_id, {}).get(dt, 0)
  89. line['fans'] = fans
  90. if fans:
  91. line['readRate'] = line['show_view_count'] / fans if fans else 0
  92. response.append(line)
  93. return DataFrame(response,
  94. columns=['ghId', 'accountName', 'updateTime', 'ItemIndex', 'show_view_count', 'readRate'])
  95. def cal_avg_account_read_rate(df, gh_id, index) -> tuple:
  96. """
  97. 计算账号的阅读率均值
  98. :return:
  99. """
  100. min_time = int(time.time()) - 60 * 24 * 3600
  101. max_time = int(time.time())
  102. filterDataFrame = df[
  103. (df["ghId"] == gh_id)
  104. & (min_time <= df["updateTime"])
  105. & (df["updateTime"] <= max_time)
  106. & (df['ItemIndex'] == index)
  107. ]
  108. return (
  109. filterDataFrame['readRate'].mean(),
  110. filterDataFrame['updateTime'].max(),
  111. filterDataFrame['updateTime'].min(),
  112. len(filterDataFrame)
  113. )
  114. def main() -> None:
  115. """
  116. main function
  117. :return:
  118. """
  119. lam = longArticlesMySQL()
  120. account_gh_id_list = [
  121. {
  122. "gh_id": "gh_9e559b3b94ca",
  123. "account_name": "票圈大事件"
  124. },
  125. {
  126. "gh_id": "gh_084a485e859a",
  127. "account_name": "生活情感叁读"
  128. },
  129. {
  130. "gh_id": "gh_1ee2e1b39ccf",
  131. "account_name": "票圈最新消息"
  132. },
  133. {
  134. "gh_id": "gh_4c058673c07e",
  135. "account_name": "家家生活指南"
  136. },
  137. {
  138. "gh_id": "gh_de9f9ebc976b",
  139. "account_name": "妙招持家帮手"
  140. },
  141. {
  142. "gh_id": "gh_058e41145a0c",
  143. "account_name": "多彩妙生活"
  144. },
  145. {
  146. "gh_id": "gh_7b4a5f86d68c",
  147. "account_name": "异闻趣事多"
  148. },
  149. {
  150. "gh_id": "gh_4568b5a7e2fe",
  151. "account_name": "窦都事说"
  152. },
  153. {
  154. "gh_id": "gh_adca24a8f429",
  155. "account_name": "史记趣言"
  156. },
  157. {
  158. "gh_id": "gh_e24da99dc899",
  159. "account_name": "缘来养心厅"
  160. },
  161. {
  162. "gh_id": "gh_e0eb490115f5",
  163. "account_name": "心灵情感驿站"
  164. },
  165. {
  166. "gh_id": "gh_d2cc901deca7",
  167. "account_name": "票圈极速版"
  168. },
  169. {
  170. "gh_id": "gh_26a307578776",
  171. "account_name": "票圈美文速递"
  172. },
  173. {
  174. "gh_id": "gh_183d80deffb8",
  175. "account_name": "生活良读"
  176. },
  177. {
  178. "gh_id": "gh_5ff48e9fb9ef",
  179. "account_name": "祝福养心厅"
  180. },
  181. {
  182. "gh_id": "gh_9f8dc5b0c74e",
  183. "account_name": "音药金曲厅"
  184. },
  185. {
  186. "gh_id": "gh_6d9f36e3a7be",
  187. "account_name": "音药养心馆"
  188. },
  189. {
  190. "gh_id": "gh_ac43e43b253b",
  191. "account_name": "小阳看天下"
  192. },
  193. {
  194. "gh_id": "gh_d5f935d0d1f2",
  195. "account_name": "繁花史阁"
  196. },
  197. {
  198. "gh_id": "gh_be8c29139989",
  199. "account_name": "退休无忧生活"
  200. },
  201. {
  202. "gh_id": "gh_c91b42649690",
  203. "account_name": "农耕趣时刻"
  204. },
  205. {
  206. "gh_id": "gh_93e00e187787",
  207. "account_name": "小惠爱厨房"
  208. },
  209. {
  210. "gh_id": "gh_744cb16f6e16",
  211. "account_name": "趣史论"
  212. },
  213. {
  214. "gh_id": "gh_9877c8541764",
  215. "account_name": "退休老年圈"
  216. },
  217. {
  218. "gh_id": "gh_0c89e11f8bf3",
  219. "account_name": "幸福启示"
  220. },
  221. {
  222. "gh_id": "gh_6d205db62f04",
  223. "account_name": "指尖奇文"
  224. },
  225. {
  226. "gh_id": "gh_c69776baf2cd",
  227. "account_name": "老友欢聚地"
  228. },
  229. {
  230. "gh_id": "gh_6b7c2a257263",
  231. "account_name": "幸福晚年知音"
  232. },
  233. {
  234. "gh_id": "gh_bfe5b705324a",
  235. "account_name": "奇趣百味生活"
  236. },
  237. {
  238. "gh_id": "gh_29074b51f2b7",
  239. "account_name": "老来生活家"
  240. },
  241. {
  242. "gh_id": "gh_7e5818b2dd83",
  243. "account_name": "便捷生活好方法"
  244. },
  245. {
  246. "gh_id": "gh_89ef4798d3ea",
  247. "account_name": "生活百态观"
  248. },
  249. {
  250. "gh_id": "gh_bff0bcb0694a",
  251. "account_name": "喜乐生活派"
  252. },
  253. {
  254. "gh_id": "gh_a2901d34f75b",
  255. "account_name": "畅聊奇闻"
  256. },
  257. {
  258. "gh_id": "gh_b15de7c99912",
  259. "account_name": "人生百事观"
  260. },
  261. {
  262. "gh_id": "gh_56ca3dae948c",
  263. "account_name": "老友闲谈"
  264. },
  265. {
  266. "gh_id": "gh_e75dbdc73d80",
  267. "account_name": "票圈正能量"
  268. },
  269. {
  270. "gh_id": "gh_192c9cf58b13",
  271. "account_name": "天天学生活技巧"
  272. },
  273. {
  274. "gh_id": "gh_6cfd1132df94",
  275. "account_name": "趣味晚年"
  276. },
  277. {
  278. "gh_id": "gh_f25b5fb01977",
  279. "account_name": "生活晓常识"
  280. },
  281. {
  282. "gh_id": "gh_080bb43aa0dc",
  283. "account_name": "态度说"
  284. },
  285. {
  286. "gh_id": "gh_d49df5e974ca",
  287. "account_name": "生活指示录"
  288. },
  289. {
  290. "gh_id": "gh_5ae65db96cb7",
  291. "account_name": "路边闲聊社"
  292. },
  293. {
  294. "gh_id": "gh_72bace6b3059",
  295. "account_name": "幸福妙招合集"
  296. },
  297. {
  298. "gh_id": "gh_9eef14ad6c16",
  299. "account_name": "快乐精选集"
  300. },
  301. {
  302. "gh_id": "gh_c5cdf60d9ab4",
  303. "account_name": "老友快乐谈"
  304. },
  305. {
  306. "gh_id": "gh_7f5075624a50",
  307. "account_name": "都市镜头"
  308. },
  309. {
  310. "gh_id": "gh_d4dffc34ac39",
  311. "account_name": "情为老友"
  312. },
  313. {
  314. "gh_id": "gh_ff487cb5dab3",
  315. "account_name": "趣味生活达人"
  316. },
  317. {
  318. "gh_id": "gh_1b27dd1beeca",
  319. "account_name": "小贝生活课堂"
  320. },
  321. {
  322. "gh_id": "gh_1d887d61088c",
  323. "account_name": "乐享生活小窍门"
  324. },
  325. {
  326. "gh_id": "gh_3ed305b5817f",
  327. "account_name": "看不够妙招"
  328. },
  329. {
  330. "gh_id": "gh_dd4c857bbb36",
  331. "account_name": "无忧自在生活"
  332. },
  333. {
  334. "gh_id": "gh_f902cea89e48",
  335. "account_name": "无忧潮生活"
  336. },
  337. {
  338. "gh_id": "gh_b676b7ad9b74",
  339. "account_name": "无忧生活小妙招"
  340. },
  341. {
  342. "gh_id": "gh_b6f2c5332c72",
  343. "account_name": "巷尾风声"
  344. },
  345. {
  346. "gh_id": "gh_ee78360d06f5",
  347. "account_name": "实用妙招800个"
  348. },
  349. {
  350. "gh_id": "gh_68e7fdc09fe4",
  351. "account_name": "史趣探秘"
  352. },
  353. {
  354. "gh_id": "gh_789a40fe7935",
  355. "account_name": "史记有言"
  356. },
  357. {
  358. "gh_id": "gh_77f36c109fb1",
  359. "account_name": "暖心一隅"
  360. },
  361. {
  362. "gh_id": "gh_ac43eb24376d",
  363. "account_name": "麒阁史记"
  364. },
  365. {
  366. "gh_id": "gh_969f5ea5fee1",
  367. "account_name": "心海情澜起"
  368. },
  369. {
  370. "gh_id": "gh_57573f01b2ee",
  371. "account_name": "那些历史"
  372. },
  373. {
  374. "gh_id": "gh_008ef23062ee",
  375. "account_name": "日常生活小技巧集"
  376. },
  377. {
  378. "gh_id": "gh_3e91f0624545",
  379. "account_name": "趣谈史记"
  380. },
  381. {
  382. "gh_id": "gh_30816d8adb52",
  383. "account_name": "日常巧思集"
  384. },
  385. {
  386. "gh_id": "gh_51e4ad40466d",
  387. "account_name": "日常小妙招秘籍"
  388. },
  389. {
  390. "gh_id": "gh_7c66e0dbd2cf",
  391. "account_name": "晚年家人"
  392. },
  393. {
  394. "gh_id": "gh_03d32e83122f",
  395. "account_name": "快乐生活妙技巧"
  396. },
  397. {
  398. "gh_id": "gh_0e4fd9e88386",
  399. "account_name": "持家有妙招"
  400. },
  401. {
  402. "gh_id": "gh_95ed5ecf9363",
  403. "account_name": "生活小优招"
  404. },
  405. {
  406. "gh_id": "gh_970460d9ccec",
  407. "account_name": "生活之大全"
  408. },
  409. {
  410. "gh_id": "gh_749271f1ccd5",
  411. "account_name": "轻松生活方法"
  412. },
  413. {
  414. "gh_id": "gh_660afe87b6fd",
  415. "account_name": "趣读奇事"
  416. },
  417. {
  418. "gh_id": "gh_03d45c260115",
  419. "account_name": "晚年多享乐"
  420. },
  421. {
  422. "gh_id": "gh_1686250f15b6",
  423. "account_name": "福康俏生活"
  424. },
  425. {
  426. "gh_id": "gh_98ec0ffe69b3",
  427. "account_name": "博史鉴览"
  428. },
  429. {
  430. "gh_id": "gh_2e615fa75ffb",
  431. "account_name": "好招妙"
  432. },
  433. {
  434. "gh_id": "gh_57c9e8babea7",
  435. "account_name": "福享暮年"
  436. },
  437. {
  438. "gh_id": "gh_bfea052b5baa",
  439. "account_name": "奇读趣史"
  440. },
  441. {
  442. "gh_id": "gh_6d3aa9d13402",
  443. "account_name": "悠读生活"
  444. }
  445. ]
  446. df = cal_account_read_rate(tuple([i['gh_id'] for i in account_gh_id_list]))
  447. index_list = [1, 2, 3, 4, 5, 6, 7, 8]
  448. for account in tqdm(account_gh_id_list):
  449. for index in index_list:
  450. avg_rate, max_time, min_time, a_count = cal_avg_account_read_rate(df, account['gh_id'], index)
  451. print(account['account_name'], "\t", index, "\t", avg_rate, "\t", max_time, "\t", min_time, "\t", a_count,
  452. "\t", account['gh_id'])
  453. try:
  454. if avg_rate == 0:
  455. continue
  456. update_sql = f"""
  457. INSERT INTO long_articles_read_rate
  458. (account_name, gh_id, position, read_rate_avg, remark, articles_count, earliest_publish_time, latest_publish_time, dt_version, is_delete)
  459. values
  460. (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
  461. """
  462. lam.update(
  463. sql=update_sql,
  464. params=(
  465. account['account_name'],
  466. account['gh_id'],
  467. index,
  468. avg_rate,
  469. "1022去掉粉丝为 0的计数",
  470. a_count,
  471. timestamp_to_str(min_time),
  472. timestamp_to_str(max_time),
  473. 1022,
  474. 0
  475. )
  476. )
  477. except Exception as e:
  478. print(e)
  479. if __name__ == '__main__':
  480. main()