Jelajahi Sumber

计算阅读率均值
若没有获取到当天的粉丝,则获取最大的日子的粉丝

luojunhui 11 bulan lalu
induk
melakukan
ff549e12ad
2 mengubah file dengan 42 tambahan dan 9 penghapusan
  1. 27 0
      applications/kimi_api.py
  2. 15 9
      cal_account_read_rate_avg_daily.py

+ 27 - 0
applications/kimi_api.py

@@ -0,0 +1,27 @@
+"""
+@author: luojunhui
+"""
+from openai import OpenAI
+
+
+def kimi_process(prompt):
+    """
+    Ask Kimi for information
+    :param prompt: tiny text
+    :return: "{}"
+    """
+    client = OpenAI(
+        api_key='sk-5DqYCa88kche6nwIWjLE1p4oMm8nXrR9kQMKbBolNAWERu7q',
+        base_url="https://api.moonshot.cn/v1"
+    )
+    chat_completion = client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": prompt,
+            }
+        ],
+        model="moonshot-v1-8k",
+    )
+    response = chat_completion.choices[0].message.content
+    return response

+ 15 - 9
cal_account_read_rate_avg_daily.py

@@ -64,7 +64,7 @@ def str_to_timestamp(date_string) -> int:
     return int(timestamp)
 
 
-def get_account_fans_by_dt(db_client) -> dict:
+def get_account_fans_by_dt(db_client) -> tuple[dict, dict]:
     """
     获取每个账号发粉丝,通过日期来区分
     :return:
@@ -83,16 +83,21 @@ def get_account_fans_by_dt(db_client) -> dict:
         ORDER BY t1.date_str;
     """
     result = db_client.select(sql)
-    D = {}
+    # 分日期的粉丝数据
+    gh_id_fans_dt_dict = {}
+    # 不分日期的粉丝数据
+    gh_id_fans_dict = {}
     for line in result:
         dt = line[0]
         fans = line[1]
         gh_id = line[2]
-        if D.get(gh_id):
-            D[gh_id][dt] = fans
+        if gh_id_fans_dt_dict.get(gh_id):
+            gh_id_fans_dt_dict[gh_id][dt] = fans
         else:
-            D[gh_id] = {dt: fans}
-    return D
+            gh_id_fans_dt_dict[gh_id] = {dt: fans}
+        if fans:
+            gh_id_fans_dict[gh_id] = fans
+    return gh_id_fans_dt_dict, gh_id_fans_dict
 
 
 def get_publishing_accounts(db_client) -> list[dict]:
@@ -167,7 +172,7 @@ def cal_account_read_rate(gh_id_tuple) -> DataFrame:
     pq_db = PQMySQL()
     de_db = DeNetMysql()
     response = []
-    fans_dict_each_day = get_account_fans_by_dt(db_client=de_db)
+    fans_dict_each_day, fans_dict = get_account_fans_by_dt(db_client=de_db)
     account_article_detail = get_account_articles_detail(
         db_client=pq_db,
         gh_id_tuple=gh_id_tuple
@@ -176,6 +181,8 @@ def cal_account_read_rate(gh_id_tuple) -> DataFrame:
         gh_id = line['ghId']
         dt = timestamp_to_str(line['updateTime'])
         fans = fans_dict_each_day.get(gh_id, {}).get(dt, 0)
+        if fans == 0:
+            fans = fans_dict.get(gh_id, 0)
         line['fans'] = fans
         if fans:
             line['readRate'] = line['show_view_count'] / fans if fans else 0
@@ -198,9 +205,8 @@ def cal_avg_account_read_rate(df, gh_id, index, dt) -> tuple:
         & (df["updateTime"] <= max_time)
         & (df['ItemIndex'] == index)
         ]
-    # print("位置", index)
     finalDF = filter_outlier_data(filterDataFrame)
-    # finalDF = finalDF.sort_values(by=['updateTime'], ascending=False)
+    finalDF = finalDF.sort_values(by=['updateTime'], ascending=False)
     # if index == 1:
     #     for i in finalDF.values.tolist():
     #         print(datetime.fromtimestamp(i[2]).strftime('%Y-%m-%d'), i)