Browse Source

Merge branch '2024-12-13-cal_rate_improve' of luojunhui/LongArticlesJob into master

luojunhui 4 tháng trước cách đây
mục cha
commit
debca74e21

+ 8 - 2
cal_account_read_rate_avg_daily.py

@@ -2,6 +2,7 @@
 @author: luojunhui
 @author: luojunhui
 cal each account && position reading rate
 cal each account && position reading rate
 """
 """
+import json
 from tqdm import tqdm
 from tqdm import tqdm
 from pandas import DataFrame
 from pandas import DataFrame
 from argparse import ArgumentParser
 from argparse import ArgumentParser
@@ -9,8 +10,11 @@ from datetime import datetime
 
 
 from applications import DeNetMysql, PQMySQL, longArticlesMySQL, bot, Functions
 from applications import DeNetMysql, PQMySQL, longArticlesMySQL, bot, Functions
 from applications.const import updateAccountReadRateTaskConst
 from applications.const import updateAccountReadRateTaskConst
+from config import apolloConfig
 
 
 const = updateAccountReadRateTaskConst()
 const = updateAccountReadRateTaskConst()
+config = apolloConfig()
+unauthorized_account = json.loads(config.getConfigValue("unauthorized_gh_id_fans"))
 functions = Functions()
 functions = Functions()
 read_rate_table = "long_articles_read_rate"
 read_rate_table = "long_articles_read_rate"
 
 
@@ -88,7 +92,7 @@ def get_publishing_accounts(db_client) -> list[dict]:
     WHERE
     WHERE
         t1.plan_status = 1
         t1.plan_status = 1
         AND t3.channel = 5
         AND t3.channel = 5
-        AND t3.follower_count > 0
+        -- AND t3.follower_count > 0
         GROUP BY t3.id;
         GROUP BY t3.id;
     """
     """
     account_list = db_client.select(sql)
     account_list = db_client.select(sql)
@@ -145,6 +149,8 @@ def cal_account_read_rate(gh_id_tuple) -> DataFrame:
         gh_id = line['ghId']
         gh_id = line['ghId']
         dt = functions.timestamp_to_str(timestamp=line['publish_timestamp'], string_format='%Y-%m-%d')
         dt = functions.timestamp_to_str(timestamp=line['publish_timestamp'], string_format='%Y-%m-%d')
         fans = fans_dict_each_day.get(gh_id, {}).get(dt, 0)
         fans = fans_dict_each_day.get(gh_id, {}).get(dt, 0)
+        if not fans:
+            fans = int(unauthorized_account.get(gh_id, 0))
         line['fans'] = fans
         line['fans'] = fans
         if fans > 1000:
         if fans > 1000:
             line['readRate'] = line['show_view_count'] / fans if fans else 0
             line['readRate'] = line['show_view_count'] / fans if fans else 0
@@ -235,7 +241,7 @@ def update_single_day(dt, account_list, article_df, lam):
         string_format='%Y-%m-%d'
         string_format='%Y-%m-%d'
     )
     )
 
 
-    for account in tqdm(account_list):
+    for account in tqdm(account_list, desc=dt):
         for index in const.ARTICLE_INDEX_LIST:
         for index in const.ARTICLE_INDEX_LIST:
             read_rate_detail = cal_avg_account_read_rate(
             read_rate_detail = cal_avg_account_read_rate(
                 df=article_df,
                 df=article_df,

+ 12 - 6
updateAccountV3.py

@@ -10,6 +10,10 @@ from argparse import ArgumentParser
 
 
 from applications import PQMySQL, DeNetMysql, longArticlesMySQL
 from applications import PQMySQL, DeNetMysql, longArticlesMySQL
 from applications.const import updateAccountReadAvgTaskConst
 from applications.const import updateAccountReadAvgTaskConst
+from config import apolloConfig
+
+config = apolloConfig()
+unauthorized_account = json.loads(config.getConfigValue("unauthorized_gh_id_fans"))
 
 
 
 
 def get_account_fans_by_dt(db_client) -> dict:
 def get_account_fans_by_dt(db_client) -> dict:
@@ -125,13 +129,15 @@ class UpdateAccountInfoVersion3(object):
         fans_dict = get_account_fans_by_dt(db_client=self.de)
         fans_dict = get_account_fans_by_dt(db_client=self.de)
         account_list = self.get_publishing_accounts()
         account_list = self.get_publishing_accounts()
         rate_dict = self.get_account_position_read_rate(dt)
         rate_dict = self.get_account_position_read_rate(dt)
-        for account in tqdm(account_list):
-            business_type = self.const.TOULIU if account[
-                                                     'gh_id'] in self.const.TOULIU_ACCOUNTS else self.const.ARTICLES_DAILY
-            fans = fans_dict.get(account['gh_id'], {}).get(dt, 0)
+        for account in tqdm(account_list, desc=dt):
+            gh_id = account["gh_id"]
+            business_type = self.const.TOULIU if gh_id in self.const.TOULIU_ACCOUNTS else self.const.ARTICLES_DAILY
+            fans = fans_dict.get(gh_id, {}).get(dt, 0)
+            if not fans:
+                fans = int(unauthorized_account.get(gh_id, 0))
             if fans:
             if fans:
                 for index in range(1, 9):
                 for index in range(1, 9):
-                    gh_id_position = "{}_{}".format(account['gh_id'], index)
+                    gh_id_position = "{}_{}".format(gh_id, index)
                     if rate_dict.get(gh_id_position):
                     if rate_dict.get(gh_id_position):
                         rate = rate_dict[gh_id_position]
                         rate = rate_dict[gh_id_position]
                         read_avg = fans * rate
                         read_avg = fans * rate
@@ -146,7 +152,7 @@ class UpdateAccountInfoVersion3(object):
                             self.pq.update(
                             self.pq.update(
                                 sql=insert_sql,
                                 sql=insert_sql,
                                 params=(
                                 params=(
-                                    account['gh_id'],
+                                    gh_id,
                                     index,
                                     index,
                                     dt,
                                     dt,
                                     account['account_name'],
                                     account['account_name'],