|
@@ -1,296 +0,0 @@
|
|
|
-"""
|
|
|
-@author: luojunhui
|
|
|
-"""
|
|
|
-import json
|
|
|
-
|
|
|
-from pandas import DataFrame
|
|
|
-from datetime import datetime
|
|
|
-from applications import longArticlesMySQL
|
|
|
-
|
|
|
-lam = longArticlesMySQL()
|
|
|
-
|
|
|
-
|
|
|
-class articleLevelUp(object):
|
|
|
- """
|
|
|
- 文章晋级
|
|
|
- """
|
|
|
- columns = [
|
|
|
- "位置",
|
|
|
- "粉丝量",
|
|
|
- "阅读量",
|
|
|
- "平均阅读量",
|
|
|
- "头条阅读量",
|
|
|
- "头条平均阅读量",
|
|
|
- "阅读均值倍数",
|
|
|
- "阅读率",
|
|
|
- "小程序打开率",
|
|
|
- "T+0裂变率",
|
|
|
- "标题",
|
|
|
- "链接"
|
|
|
- ]
|
|
|
- statMapThreeToEight = {
|
|
|
- "阅读均值倍数": {
|
|
|
- "mean": 1.1388723507368606,
|
|
|
- "max": 62.50000000000001,
|
|
|
- "min": 0.0,
|
|
|
- "median": 0.8890469416785206,
|
|
|
- "75%": 1.2617516081147946,
|
|
|
- "80%": 1.37797320398902,
|
|
|
- "90%": 1.8733429945338946,
|
|
|
- "95%": 2.6455874825730517,
|
|
|
- "99%": 6.252251764489181
|
|
|
- },
|
|
|
- "阅读率": {
|
|
|
- "mean": 0.0006051220910642054,
|
|
|
- "max": 0.06252537555826228,
|
|
|
- "min": 0.0,
|
|
|
- "median": 0.0002241206067691894,
|
|
|
- "75%": 0.0005117154674215644,
|
|
|
- "80%": 0.0006449975188817015,
|
|
|
- "90%": 0.001255232384471895,
|
|
|
- "95%": 0.002233845658277497,
|
|
|
- "99%": 0.00633843067255787
|
|
|
- },
|
|
|
- "小程序打开率": {
|
|
|
- "mean": 0.062085135696479415,
|
|
|
- "max": 1.0,
|
|
|
- "min": 0.0,
|
|
|
- "median": 0.045454545454545456,
|
|
|
- "75%": 0.08695652173913043,
|
|
|
- "80%": 0.1,
|
|
|
- "90%": 0.14285714285714285,
|
|
|
- "95%": 0.18518518518518517,
|
|
|
- "99%": 0.310463054187192
|
|
|
- },
|
|
|
- "T+0裂变率": {
|
|
|
- "mean": 0.35277482885383377,
|
|
|
- "max": 181.0,
|
|
|
- "min": 0.0,
|
|
|
- "median": 0.0,
|
|
|
- "75%": 0.0,
|
|
|
- "80%": 0.09090909090909091,
|
|
|
- "90%": 0.6666666666666666,
|
|
|
- "95%": 1.5,
|
|
|
- "99%": 6.0
|
|
|
- }
|
|
|
- }
|
|
|
- statMapTwoToOne = {
|
|
|
- "阅读均值倍数": {
|
|
|
- "mean": 1.0242728432910957,
|
|
|
- "max": 4.921632060507756,
|
|
|
- "min": 0.04236315118498048,
|
|
|
- "median": 0.9604958720021857,
|
|
|
- "75%": 1.237352622811623,
|
|
|
- "80%": 1.3131587863024974,
|
|
|
- "90%": 1.5778563945144477,
|
|
|
- "95%": 1.8312064951656155,
|
|
|
- "99%": 2.5125234834603165
|
|
|
- },
|
|
|
- "阅读率": {
|
|
|
- "mean": 0.0073535037464145655,
|
|
|
- "max": 0.05265662356955502,
|
|
|
- "min": 0.00020895172629276676,
|
|
|
- "median": 0.005941952332154309,
|
|
|
- "75%": 0.009324205525316574,
|
|
|
- "80%": 0.010420614811741105,
|
|
|
- "90%": 0.013728137204835086,
|
|
|
- "95%": 0.01704242661483454,
|
|
|
- "99%": 0.02622215995438508
|
|
|
- },
|
|
|
- "小程序打开率": {
|
|
|
- "mean": 0.14893695109764848,
|
|
|
- "max": 2.5,
|
|
|
- "min": 0.0,
|
|
|
- "median": 0.1360318513603185,
|
|
|
- "75%": 0.1875,
|
|
|
- "80%": 0.20230028849345147,
|
|
|
- "90%": 0.25449906489537877,
|
|
|
- "95%": 0.3051369784478383,
|
|
|
- "99%": 0.4016107123469446
|
|
|
- },
|
|
|
- "T+0裂变率": {
|
|
|
- "mean": 0.6465295965706923,
|
|
|
- "max": 12.804878048780488,
|
|
|
- "min": 0.0,
|
|
|
- "median": 0.48770491803278687,
|
|
|
- "75%": 0.8011363636363636,
|
|
|
- "80%": 0.9144722345551121,
|
|
|
- "90%": 1.317362236032163,
|
|
|
- "95%": 1.792137476827772,
|
|
|
- "99%": 3.277849462365585
|
|
|
- }
|
|
|
- }
|
|
|
- firstLevelMap = {
|
|
|
- "阅读均值倍数": {
|
|
|
- "mean": 1.0469541000103093,
|
|
|
- "max": 25.719380724649426,
|
|
|
- "min": 0.037429819089207735,
|
|
|
- "median": 0.9521466355025219,
|
|
|
- "75%": 1.2800839124458492,
|
|
|
- "80%": 1.370275508982941,
|
|
|
- "90%": 1.674800845262867,
|
|
|
- "95%": 1.995613204168999,
|
|
|
- "99%": 2.9869225601165135
|
|
|
- },
|
|
|
- "阅读率": {
|
|
|
- "mean": 0.016311355353310464,
|
|
|
- "max": 0.7427434456928839,
|
|
|
- "min": 0.0006011082360982278,
|
|
|
- "median": 0.01255841121495327,
|
|
|
- "75%": 0.020080845617803843,
|
|
|
- "80%": 0.022950649260452458,
|
|
|
- "90%": 0.03136776141996209,
|
|
|
- "95%": 0.0398727631704118,
|
|
|
- "99%": 0.05986584275411923
|
|
|
- },
|
|
|
- "小程序打开率": {
|
|
|
- "mean": 0.20655535828501095,
|
|
|
- "max": 0.8,
|
|
|
- "min": 0.0,
|
|
|
- "median": 0.19921326215228996,
|
|
|
- "75%": 0.25838983436476154,
|
|
|
- "80%": 0.27586206896551724,
|
|
|
- "90%": 0.32290043225754594,
|
|
|
- "95%": 0.3709317026683608,
|
|
|
- "99%": 0.4685840031614304
|
|
|
- },
|
|
|
- "T+0裂变率": {
|
|
|
- "mean": 0.6660929834568661,
|
|
|
- "max": 46.0,
|
|
|
- "min": 0.0,
|
|
|
- "median": 0.5434782608695652,
|
|
|
- "75%": 0.7940509083886685,
|
|
|
- "80%": 0.8776439089692103,
|
|
|
- "90%": 1.159075752014066,
|
|
|
- "95%": 1.62348848368522,
|
|
|
- "99%": 2.785400696864109
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- @classmethod
|
|
|
- def readRateDebias(cls, row):
|
|
|
- """
|
|
|
- 阅读均值倍数通过头条消偏
|
|
|
- :param row:
|
|
|
- :return:
|
|
|
- """
|
|
|
- if row["位置"] != 1:
|
|
|
- return row["阅读量"] / (
|
|
|
- max(1.0, row["头条阅读量"] / row["头条阅读均值"]) * row["阅读均值"]
|
|
|
- )
|
|
|
- else:
|
|
|
- return row["阅读均值倍数"]
|
|
|
-
|
|
|
- @classmethod
|
|
|
- def getBaseData(cls):
|
|
|
- """
|
|
|
-
|
|
|
- :return:
|
|
|
- """
|
|
|
- sql = f"""
|
|
|
- SELECT
|
|
|
- position, fans, view_count, avg_view_count, first_view_count, first_avg_view_count, read_rate, read_fans_rate, first_read_rate, fission0_first_rate, title, link
|
|
|
- FROM
|
|
|
- datastat_sort_strategy;
|
|
|
- """
|
|
|
- response = lam.select(sql)
|
|
|
- df = DataFrame(response, columns=cls.columns)
|
|
|
- df = df.sort_values(by=["阅读均值倍数"], ascending=[False]).reset_index(drop=True)
|
|
|
- df = df[df["粉丝量"] > 10000].reset_index(drop=True)
|
|
|
- return df
|
|
|
-
|
|
|
- @classmethod
|
|
|
- def analysisDF(cls, indexList):
|
|
|
- """
|
|
|
- 分析 dataframe 中数据占比
|
|
|
- :return:
|
|
|
- """
|
|
|
- DF = cls.getBaseData()
|
|
|
- DF = DF[(DF["位置"].isin(indexList))]
|
|
|
- print(len(DF))
|
|
|
- avg_read_times = DF['阅读均值倍数'].sort_values(ascending=False)
|
|
|
- read_rate = DF['阅读率'].sort_values(ascending=False)
|
|
|
- mini_open_rate = DF['小程序打开率'].sort_values(ascending=False)
|
|
|
- t_plus_0_fission = DF['T+0裂变率'].sort_values(ascending=False)
|
|
|
- detail = {
|
|
|
- "阅读均值倍数": {
|
|
|
- "mean": avg_read_times.mean(),
|
|
|
- "max": avg_read_times.max(),
|
|
|
- "min": avg_read_times.min(),
|
|
|
- "median": avg_read_times.median(),
|
|
|
- "75%": avg_read_times.quantile(0.75),
|
|
|
- "80%": avg_read_times.quantile(0.8),
|
|
|
- "90%": avg_read_times.quantile(0.9),
|
|
|
- "95%": avg_read_times.quantile(0.95),
|
|
|
- "99%": avg_read_times.quantile(0.99)
|
|
|
- },
|
|
|
- "阅读率": {
|
|
|
- "mean": read_rate.mean(),
|
|
|
- "max": read_rate.max(),
|
|
|
- "min": read_rate.min(),
|
|
|
- "median": read_rate.median(),
|
|
|
- "75%": read_rate.quantile(0.75),
|
|
|
- "80%": read_rate.quantile(0.8),
|
|
|
- "90%": read_rate.quantile(0.9),
|
|
|
- "95%": read_rate.quantile(0.95),
|
|
|
- "99%": read_rate.quantile(0.99)
|
|
|
- },
|
|
|
- "小程序打开率": {
|
|
|
- "mean": mini_open_rate.mean(),
|
|
|
- "max": mini_open_rate.max(),
|
|
|
- "min": mini_open_rate.min(),
|
|
|
- "median": mini_open_rate.median(),
|
|
|
- "75%": mini_open_rate.quantile(0.75),
|
|
|
- "80%": mini_open_rate.quantile(0.8),
|
|
|
- "90%": mini_open_rate.quantile(0.9),
|
|
|
- "95%": mini_open_rate.quantile(0.95),
|
|
|
- "99%": mini_open_rate.quantile(0.99)
|
|
|
- },
|
|
|
- "T+0裂变率": {
|
|
|
- "mean": t_plus_0_fission.mean(),
|
|
|
- "max": t_plus_0_fission.max(),
|
|
|
- "min": t_plus_0_fission.min(),
|
|
|
- "median": t_plus_0_fission.median(),
|
|
|
- "75%": t_plus_0_fission.quantile(0.75),
|
|
|
- "80%": t_plus_0_fission.quantile(0.8),
|
|
|
- "90%": t_plus_0_fission.quantile(0.9),
|
|
|
- "95%": t_plus_0_fission.quantile(0.95),
|
|
|
- "99%": t_plus_0_fission.quantile(0.99)
|
|
|
- }
|
|
|
- }
|
|
|
- print(json.dumps(detail, ensure_ascii=False, indent=4))
|
|
|
-
|
|
|
- @classmethod
|
|
|
- def upLevel38To2(cls):
|
|
|
- """
|
|
|
- :return:
|
|
|
- """
|
|
|
- dataThreeToEight = cls.getBaseData()
|
|
|
- dataThreeToEight = dataThreeToEight[dataThreeToEight['位置'].isin([3, 4, 5, 6, 7, 8])]
|
|
|
- filter_data = dataThreeToEight[
|
|
|
- (dataThreeToEight['T+0裂变率'] > cls.statMapThreeToEight['T+0裂变率']['95%'])
|
|
|
- & (dataThreeToEight['阅读均值倍数'] > cls.statMapThreeToEight['阅读均值倍数']['80%'])
|
|
|
- ]
|
|
|
- return filter_data
|
|
|
-
|
|
|
- @classmethod
|
|
|
- def upLevel2To1(cls):
|
|
|
- """
|
|
|
- :return:
|
|
|
- """
|
|
|
- dataThreeToEight = cls.getBaseData()
|
|
|
- dataThreeToEight = dataThreeToEight[dataThreeToEight['位置'].isin([2])]
|
|
|
- filter_data = dataThreeToEight[
|
|
|
- (dataThreeToEight['T+0裂变率'] > cls.statMapThreeToEight['T+0裂变率']['90%'])
|
|
|
- & (dataThreeToEight['阅读均值倍数'] > cls.statMapThreeToEight['阅读均值倍数']['90%'])
|
|
|
- ]
|
|
|
- return filter_data
|
|
|
-
|
|
|
-
|
|
|
-U = articleLevelUp()
|
|
|
-U.analysisDF(indexList=[1])
|
|
|
-f_d = U.upLevel2To1()
|
|
|
-for line in list(zip(f_d['标题'], f_d['链接'])):
|
|
|
- print(line[0])
|
|
|
- print(line[1])
|
|
|
- print("\n")
|