|
@@ -0,0 +1,234 @@
|
|
|
+"""
|
|
|
+@author: luojunhui
|
|
|
+"""
|
|
|
+import json
|
|
|
+
|
|
|
+from pandas import DataFrame
|
|
|
+from datetime import datetime
|
|
|
+from applications import longArticlesMySQL
|
|
|
+
|
|
|
+lam = longArticlesMySQL()
|
|
|
+
|
|
|
+
|
|
|
+class articleLevelUp(object):
|
|
|
+ """
|
|
|
+ 文章晋级
|
|
|
+ """
|
|
|
+ columns = [
|
|
|
+ "位置",
|
|
|
+ "粉丝量",
|
|
|
+ "阅读量",
|
|
|
+ "平均阅读量",
|
|
|
+ "头条阅读量",
|
|
|
+ "头条平均阅读量",
|
|
|
+ "阅读均值倍数",
|
|
|
+ "阅读率",
|
|
|
+ "小程序打开率",
|
|
|
+ "T+0裂变率",
|
|
|
+ "标题",
|
|
|
+ "链接"
|
|
|
+ ]
|
|
|
+ statMapThreeToEight = {
|
|
|
+ "阅读均值倍数": {
|
|
|
+ "mean": 1.1388723507368606,
|
|
|
+ "max": 62.50000000000001,
|
|
|
+ "min": 0.0,
|
|
|
+ "median": 0.8890469416785206,
|
|
|
+ "75%": 1.2617516081147946,
|
|
|
+ "80%": 1.37797320398902,
|
|
|
+ "90%": 1.8733429945338946,
|
|
|
+ "95%": 2.6455874825730517,
|
|
|
+ "99%": 6.252251764489181
|
|
|
+ },
|
|
|
+ "阅读率": {
|
|
|
+ "mean": 0.0006051220910642054,
|
|
|
+ "max": 0.06252537555826228,
|
|
|
+ "min": 0.0,
|
|
|
+ "median": 0.0002241206067691894,
|
|
|
+ "75%": 0.0005117154674215644,
|
|
|
+ "80%": 0.0006449975188817015,
|
|
|
+ "90%": 0.001255232384471895,
|
|
|
+ "95%": 0.002233845658277497,
|
|
|
+ "99%": 0.00633843067255787
|
|
|
+ },
|
|
|
+ "小程序打开率": {
|
|
|
+ "mean": 0.062085135696479415,
|
|
|
+ "max": 1.0,
|
|
|
+ "min": 0.0,
|
|
|
+ "median": 0.045454545454545456,
|
|
|
+ "75%": 0.08695652173913043,
|
|
|
+ "80%": 0.1,
|
|
|
+ "90%": 0.14285714285714285,
|
|
|
+ "95%": 0.18518518518518517,
|
|
|
+ "99%": 0.310463054187192
|
|
|
+ },
|
|
|
+ "T+0裂变率": {
|
|
|
+ "mean": 0.35277482885383377,
|
|
|
+ "max": 181.0,
|
|
|
+ "min": 0.0,
|
|
|
+ "median": 0.0,
|
|
|
+ "75%": 0.0,
|
|
|
+ "80%": 0.09090909090909091,
|
|
|
+ "90%": 0.6666666666666666,
|
|
|
+ "95%": 1.5,
|
|
|
+ "99%": 6.0
|
|
|
+ }
|
|
|
+ }
|
|
|
+ statMapTwoToOne = {
|
|
|
+ "阅读均值倍数": {
|
|
|
+ "mean": 1.0242728432910957,
|
|
|
+ "max": 4.921632060507756,
|
|
|
+ "min": 0.04236315118498048,
|
|
|
+ "median": 0.9604958720021857,
|
|
|
+ "75%": 1.237352622811623,
|
|
|
+ "80%": 1.3131587863024974,
|
|
|
+ "90%": 1.5778563945144477,
|
|
|
+ "95%": 1.8312064951656155,
|
|
|
+ "99%": 2.5125234834603165
|
|
|
+ },
|
|
|
+ "阅读率": {
|
|
|
+ "mean": 0.0073535037464145655,
|
|
|
+ "max": 0.05265662356955502,
|
|
|
+ "min": 0.00020895172629276676,
|
|
|
+ "median": 0.005941952332154309,
|
|
|
+ "75%": 0.009324205525316574,
|
|
|
+ "80%": 0.010420614811741105,
|
|
|
+ "90%": 0.013728137204835086,
|
|
|
+ "95%": 0.01704242661483454,
|
|
|
+ "99%": 0.02622215995438508
|
|
|
+ },
|
|
|
+ "小程序打开率": {
|
|
|
+ "mean": 0.14893695109764848,
|
|
|
+ "max": 2.5,
|
|
|
+ "min": 0.0,
|
|
|
+ "median": 0.1360318513603185,
|
|
|
+ "75%": 0.1875,
|
|
|
+ "80%": 0.20230028849345147,
|
|
|
+ "90%": 0.25449906489537877,
|
|
|
+ "95%": 0.3051369784478383,
|
|
|
+ "99%": 0.4016107123469446
|
|
|
+ },
|
|
|
+ "T+0裂变率": {
|
|
|
+ "mean": 0.6465295965706923,
|
|
|
+ "max": 12.804878048780488,
|
|
|
+ "min": 0.0,
|
|
|
+ "median": 0.48770491803278687,
|
|
|
+ "75%": 0.8011363636363636,
|
|
|
+ "80%": 0.9144722345551121,
|
|
|
+ "90%": 1.317362236032163,
|
|
|
+ "95%": 1.792137476827772,
|
|
|
+ "99%": 3.277849462365585
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def getBaseData(cls):
|
|
|
+ """
|
|
|
+
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ # today = datetime.today().strftime("%Y%m%d")
|
|
|
+ sql = f"""
|
|
|
+ SELECT
|
|
|
+ position, fans, view_count, avg_view_count, first_view_count, first_avg_view_count, read_rate, read_fans_rate, first_read_rate, fission0_first_rate, title, link
|
|
|
+ FROM
|
|
|
+ datastat_sort_strategy;
|
|
|
+ """
|
|
|
+ response = lam.select(sql)
|
|
|
+ df = DataFrame(response, columns=cls.columns)
|
|
|
+ return df
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def analysisDF(cls, indexList):
|
|
|
+ """
|
|
|
+ 分析 dataframe 中数据占比
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ DF = cls.getBaseData()
|
|
|
+ DF = DF[(DF["位置"].isin(indexList))]
|
|
|
+ print(len(DF))
|
|
|
+ avg_read_times = DF['阅读均值倍数'].sort_values(ascending=False)
|
|
|
+ read_rate = DF['阅读率'].sort_values(ascending=False)
|
|
|
+ mini_open_rate = DF['小程序打开率'].sort_values(ascending=False)
|
|
|
+ t_plus_0_fission = DF['T+0裂变率'].sort_values(ascending=False)
|
|
|
+ detail = {
|
|
|
+ "阅读均值倍数": {
|
|
|
+ "mean": avg_read_times.mean(),
|
|
|
+ "max": avg_read_times.max(),
|
|
|
+ "min": avg_read_times.min(),
|
|
|
+ "median": avg_read_times.median(),
|
|
|
+ "75%": avg_read_times.quantile(0.75),
|
|
|
+ "80%": avg_read_times.quantile(0.8),
|
|
|
+ "90%": avg_read_times.quantile(0.9),
|
|
|
+ "95%": avg_read_times.quantile(0.95),
|
|
|
+ "99%": avg_read_times.quantile(0.99)
|
|
|
+ },
|
|
|
+ "阅读率": {
|
|
|
+ "mean": read_rate.mean(),
|
|
|
+ "max": read_rate.max(),
|
|
|
+ "min": read_rate.min(),
|
|
|
+ "median": read_rate.median(),
|
|
|
+ "75%": read_rate.quantile(0.75),
|
|
|
+ "80%": read_rate.quantile(0.8),
|
|
|
+ "90%": read_rate.quantile(0.9),
|
|
|
+ "95%": read_rate.quantile(0.95),
|
|
|
+ "99%": read_rate.quantile(0.99)
|
|
|
+ },
|
|
|
+ "小程序打开率": {
|
|
|
+ "mean": mini_open_rate.mean(),
|
|
|
+ "max": mini_open_rate.max(),
|
|
|
+ "min": mini_open_rate.min(),
|
|
|
+ "median": mini_open_rate.median(),
|
|
|
+ "75%": mini_open_rate.quantile(0.75),
|
|
|
+ "80%": mini_open_rate.quantile(0.8),
|
|
|
+ "90%": mini_open_rate.quantile(0.9),
|
|
|
+ "95%": mini_open_rate.quantile(0.95),
|
|
|
+ "99%": mini_open_rate.quantile(0.99)
|
|
|
+ },
|
|
|
+ "T+0裂变率": {
|
|
|
+ "mean": t_plus_0_fission.mean(),
|
|
|
+ "max": t_plus_0_fission.max(),
|
|
|
+ "min": t_plus_0_fission.min(),
|
|
|
+ "median": t_plus_0_fission.median(),
|
|
|
+ "75%": t_plus_0_fission.quantile(0.75),
|
|
|
+ "80%": t_plus_0_fission.quantile(0.8),
|
|
|
+ "90%": t_plus_0_fission.quantile(0.9),
|
|
|
+ "95%": t_plus_0_fission.quantile(0.95),
|
|
|
+ "99%": t_plus_0_fission.quantile(0.99)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ print(json.dumps(detail, ensure_ascii=False, indent=4))
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def upLevel38To2(cls):
|
|
|
+ """
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ dataThreeToEight = cls.getBaseData()
|
|
|
+ dataThreeToEight = dataThreeToEight[dataThreeToEight['位置'].isin([3, 4, 5, 6, 7, 8])]
|
|
|
+ filter_data = dataThreeToEight[
|
|
|
+ (dataThreeToEight['T+0裂变率'] > cls.statMapThreeToEight['T+0裂变率']['95%'])
|
|
|
+ & (dataThreeToEight['阅读均值倍数'] > cls.statMapThreeToEight['阅读均值倍数']['95%'])
|
|
|
+ ]
|
|
|
+ return filter_data
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def upLevel2To1(cls):
|
|
|
+ """
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ dataThreeToEight = cls.getBaseData()
|
|
|
+ dataThreeToEight = dataThreeToEight[dataThreeToEight['位置'].isin([2])]
|
|
|
+ filter_data = dataThreeToEight[
|
|
|
+ (dataThreeToEight['T+0裂变率'] > cls.statMapThreeToEight['T+0裂变率']['90%'])
|
|
|
+ & (dataThreeToEight['阅读均值倍数'] > cls.statMapThreeToEight['阅读均值倍数']['90%'])
|
|
|
+ ]
|
|
|
+ return filter_data
|
|
|
+
|
|
|
+
|
|
|
+U = articleLevelUp()
|
|
|
+f_d = U.upLevel2To1()
|
|
|
+for line in list(zip(f_d['标题'], f_d['链接'])):
|
|
|
+ print(line[0])
|
|
|
+ print(line[1])
|
|
|
+ print("\n")
|