فهرست منبع

Update alg_growth_gh_reply_video_v1: reverse sending order

StrayWarrior 7 ماه پیش
والد
کامیت
caa03ede09
1فایلهای تغییر یافته به همراه9 افزوده شده و 5 حذف شده
  1. 9 5
      alg_growth_gh_reply_video_v1.py

+ 9 - 5
alg_growth_gh_reply_video_v1.py

@@ -31,6 +31,7 @@ GH_REPLY_STATS_TABLE = 'alg_growth_gh_reply_video_stats'
 ODPS_RANK_RESULT_TABLE = 'alg_gh_autoreply_video_rank_data'
 RDS_RANK_RESULT_TABLE = 'alg_gh_autoreply_video_rank_data'
 STATS_PERIOD_DAYS = 3
+SEND_N = 2
 
 def check_data_partition(project, table, data_dt, data_hr=None):
     """检查数据是否准备好"""
@@ -85,7 +86,7 @@ def rank_for_layer1(run_dt, run_hour, project, table):
     # TODO: 修改权重计算策略
     sample_weights = df['rov']
 
-    sampled_df = df.sample(n=2, weights=sample_weights)
+    sampled_df = df.sample(n=SEND_N, weights=sample_weights)
     sampled_df['sort'] = range(1, len(sampled_df) + 1)
     sampled_df['strategy_key'] = EXPLORE1_GROUP_NAME
     sampled_df['dt_version'] = dt_version
@@ -113,7 +114,7 @@ def rank_for_layer2(run_dt, run_hour, project, table):
     sampled_dfs = []
     # 处理default逻辑(default-explore2)
     default_stats_df = stats_df.query('gh_id == "default"')
-    sampled_df = default_stats_df.sample(n=2, weights=default_stats_df['score'])
+    sampled_df = default_stats_df.sample(n=SEND_N, weights=default_stats_df['score'])
     sampled_df['sort'] = range(1, len(sampled_df) + 1)
     sampled_dfs.append(sampled_df)
 
@@ -123,10 +124,10 @@ def rank_for_layer2(run_dt, run_hour, project, table):
     # TODO: 个数不足时的兜底逻辑
     for gh_id in GH_IDS:
         sub_df = df.query(f'gh_id == "{gh_id}"')
-        sampled_df = sub_df.sample(n=2, weights=sub_df['score'])
+        sampled_df = sub_df.sample(n=SEND_N, weights=sub_df['score'])
         sampled_df['sort'] = range(1, len(sampled_df) + 1)
         sampled_dfs.append(sampled_df)
-        if len(sampled_df) != 2:
+        if len(sampled_df) != SEND_N:
             raise
 
     extend_df = pd.concat(sampled_dfs)
@@ -170,7 +171,7 @@ def rank_for_base(run_dt, run_hour, project, stats_table, rank_table):
         top_n = group_sorted.head(n)
         top_n['sort'] = range(1, n + 1)
         return top_n
-    ranked_df = grouped_stats_df.groupby('gh_id').apply(set_top_n)
+    ranked_df = grouped_stats_df.groupby('gh_id').apply(set_top_n, SEND_N)
     ranked_df = ranked_df.reset_index(drop=True)
     #ranked_df['sort'] = grouped_stats_df.groupby('gh_id')['score'].rank(ascending=False)
     ranked_df['strategy_key'] = BASE_GROUP_NAME
@@ -199,6 +200,9 @@ def build_and_transfer_data(run_dt, run_hour, project):
     final_df = final_df.to_pandas()
     final_df = final_df[['strategy_key', 'dt_version', 'gh_id', 'sort', 'video_id', 'title', 'cover_url']]
 
+    # reverse sending order
+    final_df['sort'] = SEND_N + 1 - final_df['sort']
+
     # save to ODPS
     t = odps_instance.get_table(ODPS_RANK_RESULT_TABLE)
     part_spec_dict = {'dt': run_dt, 'hour': run_hour, 'ctime': dt_version}