| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296 | """@author: luojunhui"""import jsonfrom pandas import DataFramefrom datetime import datetimefrom applications import longArticlesMySQLlam = longArticlesMySQL()class articleLevelUp(object):    """    文章晋级    """    columns = [        "位置",        "粉丝量",        "阅读量",        "平均阅读量",        "头条阅读量",        "头条平均阅读量",        "阅读均值倍数",        "阅读率",        "小程序打开率",        "T+0裂变率",        "标题",        "链接"    ]    statMapThreeToEight = {        "阅读均值倍数": {            "mean": 1.1388723507368606,            "max": 62.50000000000001,            "min": 0.0,            "median": 0.8890469416785206,            "75%": 1.2617516081147946,            "80%": 1.37797320398902,            "90%": 1.8733429945338946,            "95%": 2.6455874825730517,            "99%": 6.252251764489181        },        "阅读率": {            "mean": 0.0006051220910642054,            "max": 0.06252537555826228,            "min": 0.0,            "median": 0.0002241206067691894,            "75%": 0.0005117154674215644,            "80%": 0.0006449975188817015,            "90%": 0.001255232384471895,            "95%": 0.002233845658277497,            "99%": 0.00633843067255787        },        "小程序打开率": {            "mean": 0.062085135696479415,            "max": 1.0,            "min": 0.0,            "median": 0.045454545454545456,            "75%": 0.08695652173913043,            "80%": 0.1,            "90%": 0.14285714285714285,            "95%": 0.18518518518518517,            "99%": 0.310463054187192        },        "T+0裂变率": {            "mean": 0.35277482885383377,            "max": 181.0,            "min": 0.0,            "median": 0.0,            "75%": 0.0,            "80%": 0.09090909090909091,            "90%": 0.6666666666666666,            "95%": 1.5,            "99%": 6.0        }    }    statMapTwoToOne = {        "阅读均值倍数": {            "mean": 1.0242728432910957,            "max": 4.921632060507756,            "min": 0.04236315118498048,            "median": 0.9604958720021857,            "75%": 1.237352622811623,            "80%": 1.3131587863024974,            "90%": 1.5778563945144477,            "95%": 1.8312064951656155,            "99%": 2.5125234834603165        },        "阅读率": {            "mean": 0.0073535037464145655,            "max": 0.05265662356955502,            "min": 0.00020895172629276676,            "median": 0.005941952332154309,            "75%": 0.009324205525316574,            "80%": 0.010420614811741105,            "90%": 0.013728137204835086,            "95%": 0.01704242661483454,            "99%": 0.02622215995438508        },        "小程序打开率": {            "mean": 0.14893695109764848,            "max": 2.5,            "min": 0.0,            "median": 0.1360318513603185,            "75%": 0.1875,            "80%": 0.20230028849345147,            "90%": 0.25449906489537877,            "95%": 0.3051369784478383,            "99%": 0.4016107123469446        },        "T+0裂变率": {            "mean": 0.6465295965706923,            "max": 12.804878048780488,            "min": 0.0,            "median": 0.48770491803278687,            "75%": 0.8011363636363636,            "80%": 0.9144722345551121,            "90%": 1.317362236032163,            "95%": 1.792137476827772,            "99%": 3.277849462365585        }    }    firstLevelMap = {        "阅读均值倍数": {            "mean": 1.0469541000103093,            "max": 25.719380724649426,            "min": 0.037429819089207735,            "median": 0.9521466355025219,            "75%": 1.2800839124458492,            "80%": 1.370275508982941,            "90%": 1.674800845262867,            "95%": 1.995613204168999,            "99%": 2.9869225601165135        },        "阅读率": {            "mean": 0.016311355353310464,            "max": 0.7427434456928839,            "min": 0.0006011082360982278,            "median": 0.01255841121495327,            "75%": 0.020080845617803843,            "80%": 0.022950649260452458,            "90%": 0.03136776141996209,            "95%": 0.0398727631704118,            "99%": 0.05986584275411923        },        "小程序打开率": {            "mean": 0.20655535828501095,            "max": 0.8,            "min": 0.0,            "median": 0.19921326215228996,            "75%": 0.25838983436476154,            "80%": 0.27586206896551724,            "90%": 0.32290043225754594,            "95%": 0.3709317026683608,            "99%": 0.4685840031614304        },        "T+0裂变率": {            "mean": 0.6660929834568661,            "max": 46.0,            "min": 0.0,            "median": 0.5434782608695652,            "75%": 0.7940509083886685,            "80%": 0.8776439089692103,            "90%": 1.159075752014066,            "95%": 1.62348848368522,            "99%": 2.785400696864109        }    }    @classmethod    def readRateDebias(cls, row):        """        阅读均值倍数通过头条消偏        :param row:        :return:        """        if row["位置"] != 1:            return row["阅读量"] / (                    max(1.0, row["头条阅读量"] / row["头条阅读均值"]) * row["阅读均值"]            )        else:            return row["阅读均值倍数"]    @classmethod    def getBaseData(cls):        """        :return:        """        sql = f"""        SELECT            position, fans, view_count, avg_view_count, first_view_count, first_avg_view_count, read_rate, read_fans_rate, first_read_rate, fission0_first_rate, title, link        FROM             datastat_sort_strategy;        """        response = lam.select(sql)        df = DataFrame(response, columns=cls.columns)        df = df.sort_values(by=["阅读均值倍数"], ascending=[False]).reset_index(drop=True)        df = df[df["粉丝量"] > 10000].reset_index(drop=True)        return df    @classmethod    def analysisDF(cls, indexList):        """        分析 dataframe 中数据占比        :return:        """        DF = cls.getBaseData()        DF = DF[(DF["位置"].isin(indexList))]        print(len(DF))        avg_read_times = DF['阅读均值倍数'].sort_values(ascending=False)        read_rate = DF['阅读率'].sort_values(ascending=False)        mini_open_rate = DF['小程序打开率'].sort_values(ascending=False)        t_plus_0_fission = DF['T+0裂变率'].sort_values(ascending=False)        detail = {            "阅读均值倍数": {                "mean": avg_read_times.mean(),                "max": avg_read_times.max(),                "min": avg_read_times.min(),                "median": avg_read_times.median(),                "75%": avg_read_times.quantile(0.75),                "80%": avg_read_times.quantile(0.8),                "90%": avg_read_times.quantile(0.9),                "95%": avg_read_times.quantile(0.95),                "99%": avg_read_times.quantile(0.99)            },            "阅读率": {                "mean": read_rate.mean(),                "max": read_rate.max(),                "min": read_rate.min(),                "median": read_rate.median(),                "75%": read_rate.quantile(0.75),                "80%": read_rate.quantile(0.8),                "90%": read_rate.quantile(0.9),                "95%": read_rate.quantile(0.95),                "99%": read_rate.quantile(0.99)            },            "小程序打开率": {                "mean": mini_open_rate.mean(),                "max": mini_open_rate.max(),                "min": mini_open_rate.min(),                "median": mini_open_rate.median(),                "75%": mini_open_rate.quantile(0.75),                "80%": mini_open_rate.quantile(0.8),                "90%": mini_open_rate.quantile(0.9),                "95%": mini_open_rate.quantile(0.95),                "99%": mini_open_rate.quantile(0.99)            },            "T+0裂变率": {                "mean": t_plus_0_fission.mean(),                "max": t_plus_0_fission.max(),                "min": t_plus_0_fission.min(),                "median": t_plus_0_fission.median(),                "75%": t_plus_0_fission.quantile(0.75),                "80%": t_plus_0_fission.quantile(0.8),                "90%": t_plus_0_fission.quantile(0.9),                "95%": t_plus_0_fission.quantile(0.95),                "99%": t_plus_0_fission.quantile(0.99)            }        }        print(json.dumps(detail, ensure_ascii=False, indent=4))    @classmethod    def upLevel38To2(cls):        """        :return:        """        dataThreeToEight = cls.getBaseData()        dataThreeToEight = dataThreeToEight[dataThreeToEight['位置'].isin([3, 4, 5, 6, 7, 8])]        filter_data = dataThreeToEight[            (dataThreeToEight['T+0裂变率'] > cls.statMapThreeToEight['T+0裂变率']['95%'])            & (dataThreeToEight['阅读均值倍数'] > cls.statMapThreeToEight['阅读均值倍数']['80%'])            ]        return filter_data    @classmethod    def upLevel2To1(cls):        """        :return:        """        dataThreeToEight = cls.getBaseData()        dataThreeToEight = dataThreeToEight[dataThreeToEight['位置'].isin([2])]        filter_data = dataThreeToEight[            (dataThreeToEight['T+0裂变率'] > cls.statMapThreeToEight['T+0裂变率']['90%'])            & (dataThreeToEight['阅读均值倍数'] > cls.statMapThreeToEight['阅读均值倍数']['90%'])            ]        return filter_dataU = articleLevelUp()U.analysisDF(indexList=[1])f_d = U.upLevel2To1()for line in list(zip(f_d['标题'], f_d['链接'])):    print(line[0])    print(line[1])    print("\n")
 |