""" @author: luojunhui """ import json from pandas import DataFrame from datetime import datetime from applications import longArticlesMySQL lam = longArticlesMySQL() class articleLevelUp(object): """ 文章晋级 """ columns = [ "位置", "粉丝量", "阅读量", "平均阅读量", "头条阅读量", "头条平均阅读量", "阅读均值倍数", "阅读率", "小程序打开率", "T+0裂变率", "标题", "链接" ] statMapThreeToEight = { "阅读均值倍数": { "mean": 1.1388723507368606, "max": 62.50000000000001, "min": 0.0, "median": 0.8890469416785206, "75%": 1.2617516081147946, "80%": 1.37797320398902, "90%": 1.8733429945338946, "95%": 2.6455874825730517, "99%": 6.252251764489181 }, "阅读率": { "mean": 0.0006051220910642054, "max": 0.06252537555826228, "min": 0.0, "median": 0.0002241206067691894, "75%": 0.0005117154674215644, "80%": 0.0006449975188817015, "90%": 0.001255232384471895, "95%": 0.002233845658277497, "99%": 0.00633843067255787 }, "小程序打开率": { "mean": 0.062085135696479415, "max": 1.0, "min": 0.0, "median": 0.045454545454545456, "75%": 0.08695652173913043, "80%": 0.1, "90%": 0.14285714285714285, "95%": 0.18518518518518517, "99%": 0.310463054187192 }, "T+0裂变率": { "mean": 0.35277482885383377, "max": 181.0, "min": 0.0, "median": 0.0, "75%": 0.0, "80%": 0.09090909090909091, "90%": 0.6666666666666666, "95%": 1.5, "99%": 6.0 } } statMapTwoToOne = { "阅读均值倍数": { "mean": 1.0242728432910957, "max": 4.921632060507756, "min": 0.04236315118498048, "median": 0.9604958720021857, "75%": 1.237352622811623, "80%": 1.3131587863024974, "90%": 1.5778563945144477, "95%": 1.8312064951656155, "99%": 2.5125234834603165 }, "阅读率": { "mean": 0.0073535037464145655, "max": 0.05265662356955502, "min": 0.00020895172629276676, "median": 0.005941952332154309, "75%": 0.009324205525316574, "80%": 0.010420614811741105, "90%": 0.013728137204835086, "95%": 0.01704242661483454, "99%": 0.02622215995438508 }, "小程序打开率": { "mean": 0.14893695109764848, "max": 2.5, "min": 0.0, "median": 0.1360318513603185, "75%": 0.1875, "80%": 0.20230028849345147, "90%": 0.25449906489537877, "95%": 0.3051369784478383, "99%": 0.4016107123469446 }, "T+0裂变率": { "mean": 0.6465295965706923, "max": 12.804878048780488, "min": 0.0, "median": 0.48770491803278687, "75%": 0.8011363636363636, "80%": 0.9144722345551121, "90%": 1.317362236032163, "95%": 1.792137476827772, "99%": 3.277849462365585 } } firstLevelMap = { "阅读均值倍数": { "mean": 1.0469541000103093, "max": 25.719380724649426, "min": 0.037429819089207735, "median": 0.9521466355025219, "75%": 1.2800839124458492, "80%": 1.370275508982941, "90%": 1.674800845262867, "95%": 1.995613204168999, "99%": 2.9869225601165135 }, "阅读率": { "mean": 0.016311355353310464, "max": 0.7427434456928839, "min": 0.0006011082360982278, "median": 0.01255841121495327, "75%": 0.020080845617803843, "80%": 0.022950649260452458, "90%": 0.03136776141996209, "95%": 0.0398727631704118, "99%": 0.05986584275411923 }, "小程序打开率": { "mean": 0.20655535828501095, "max": 0.8, "min": 0.0, "median": 0.19921326215228996, "75%": 0.25838983436476154, "80%": 0.27586206896551724, "90%": 0.32290043225754594, "95%": 0.3709317026683608, "99%": 0.4685840031614304 }, "T+0裂变率": { "mean": 0.6660929834568661, "max": 46.0, "min": 0.0, "median": 0.5434782608695652, "75%": 0.7940509083886685, "80%": 0.8776439089692103, "90%": 1.159075752014066, "95%": 1.62348848368522, "99%": 2.785400696864109 } } @classmethod def readRateDebias(cls, row): """ 阅读均值倍数通过头条消偏 :param row: :return: """ if row["位置"] != 1: return row["阅读量"] / ( max(1.0, row["头条阅读量"] / row["头条阅读均值"]) * row["阅读均值"] ) else: return row["阅读均值倍数"] @classmethod def getBaseData(cls): """ :return: """ sql = f""" SELECT position, fans, view_count, avg_view_count, first_view_count, first_avg_view_count, read_rate, read_fans_rate, first_read_rate, fission0_first_rate, title, link FROM datastat_sort_strategy; """ response = lam.select(sql) df = DataFrame(response, columns=cls.columns) df = df.sort_values(by=["阅读均值倍数"], ascending=[False]).reset_index(drop=True) df = df[df["粉丝量"] > 10000].reset_index(drop=True) return df @classmethod def analysisDF(cls, indexList): """ 分析 dataframe 中数据占比 :return: """ DF = cls.getBaseData() DF = DF[(DF["位置"].isin(indexList))] print(len(DF)) avg_read_times = DF['阅读均值倍数'].sort_values(ascending=False) read_rate = DF['阅读率'].sort_values(ascending=False) mini_open_rate = DF['小程序打开率'].sort_values(ascending=False) t_plus_0_fission = DF['T+0裂变率'].sort_values(ascending=False) detail = { "阅读均值倍数": { "mean": avg_read_times.mean(), "max": avg_read_times.max(), "min": avg_read_times.min(), "median": avg_read_times.median(), "75%": avg_read_times.quantile(0.75), "80%": avg_read_times.quantile(0.8), "90%": avg_read_times.quantile(0.9), "95%": avg_read_times.quantile(0.95), "99%": avg_read_times.quantile(0.99) }, "阅读率": { "mean": read_rate.mean(), "max": read_rate.max(), "min": read_rate.min(), "median": read_rate.median(), "75%": read_rate.quantile(0.75), "80%": read_rate.quantile(0.8), "90%": read_rate.quantile(0.9), "95%": read_rate.quantile(0.95), "99%": read_rate.quantile(0.99) }, "小程序打开率": { "mean": mini_open_rate.mean(), "max": mini_open_rate.max(), "min": mini_open_rate.min(), "median": mini_open_rate.median(), "75%": mini_open_rate.quantile(0.75), "80%": mini_open_rate.quantile(0.8), "90%": mini_open_rate.quantile(0.9), "95%": mini_open_rate.quantile(0.95), "99%": mini_open_rate.quantile(0.99) }, "T+0裂变率": { "mean": t_plus_0_fission.mean(), "max": t_plus_0_fission.max(), "min": t_plus_0_fission.min(), "median": t_plus_0_fission.median(), "75%": t_plus_0_fission.quantile(0.75), "80%": t_plus_0_fission.quantile(0.8), "90%": t_plus_0_fission.quantile(0.9), "95%": t_plus_0_fission.quantile(0.95), "99%": t_plus_0_fission.quantile(0.99) } } print(json.dumps(detail, ensure_ascii=False, indent=4)) @classmethod def upLevel38To2(cls): """ :return: """ dataThreeToEight = cls.getBaseData() dataThreeToEight = dataThreeToEight[dataThreeToEight['位置'].isin([3, 4, 5, 6, 7, 8])] filter_data = dataThreeToEight[ (dataThreeToEight['T+0裂变率'] > cls.statMapThreeToEight['T+0裂变率']['95%']) & (dataThreeToEight['阅读均值倍数'] > cls.statMapThreeToEight['阅读均值倍数']['80%']) ] return filter_data @classmethod def upLevel2To1(cls): """ :return: """ dataThreeToEight = cls.getBaseData() dataThreeToEight = dataThreeToEight[dataThreeToEight['位置'].isin([2])] filter_data = dataThreeToEight[ (dataThreeToEight['T+0裂变率'] > cls.statMapThreeToEight['T+0裂变率']['90%']) & (dataThreeToEight['阅读均值倍数'] > cls.statMapThreeToEight['阅读均值倍数']['90%']) ] return filter_data U = articleLevelUp() U.analysisDF(indexList=[1]) f_d = U.upLevel2To1() for line in list(zip(f_d['标题'], f_d['链接'])): print(line[0]) print(line[1]) print("\n")