浏览代码

Update alg_growth_gh_reply_video_v1: use send_count

StrayWarrior 7 月之前
父节点
当前提交
ceffadda9f
共有 1 个文件被更改,包括 7 次插入8 次删除
  1. 7 8
      alg_growth_gh_reply_video_v1.py

+ 7 - 8
alg_growth_gh_reply_video_v1.py

@@ -80,7 +80,7 @@ def process_reply_stats(project, table, period, run_dt):
 
     merged_df = pd.concat([df, default_stats_df]).reset_index(drop=True)
 
-    merged_df['score'] = merged_df['day0_return'] / (merged_df['first_visit_uv'] + 1000)
+    merged_df['score'] = merged_df['day0_return'] / (merged_df['send_count'] + 1000)
     return merged_df
 
 
@@ -93,9 +93,9 @@ def rank_for_layer1(run_dt, run_hour, project, table):
     np.random.seed(int(dt_version)+1)
 
     # TODO: 修改权重计算策略
-    sample_weights = df['rov']
+    df['score'] = df['rov']
 
-    sampled_df = df.sample(n=SEND_N, weights=sample_weights)
+    sampled_df = df.sample(n=SEND_N, weights=df['score'])
     sampled_df['sort'] = range(1, len(sampled_df) + 1)
     sampled_df['strategy_key'] = EXPLORE1_GROUP_NAME
     sampled_df['dt_version'] = dt_version
@@ -105,7 +105,7 @@ def rank_for_layer1(run_dt, run_hour, project, table):
     gh_name_df['_tmpkey'] = 1
     extend_df = sampled_df.merge(gh_name_df, on='_tmpkey').drop('_tmpkey', axis=1)
 
-    result_df = extend_df[['strategy_key', 'dt_version', 'gh_id', 'sort', 'video_id']]
+    result_df = extend_df[['strategy_key', 'dt_version', 'gh_id', 'sort', 'video_id', 'score']]
     return result_df
 
 def rank_for_layer2(run_dt, run_hour, project, stats_table, rank_table):
@@ -129,7 +129,6 @@ def rank_for_layer2(run_dt, run_hour, project, stats_table, rank_table):
 
     # 基础过滤for账号
     df = stats_df.query('day0_return > 100')
-    # TODO: fetch send_count
 
     # fallback to base if necessary
     base_strategy_df = get_last_strategy_result(
@@ -150,7 +149,7 @@ def rank_for_layer2(run_dt, run_hour, project, stats_table, rank_table):
     extend_df = pd.concat(sampled_dfs)
     extend_df['strategy_key'] = EXPLORE2_GROUP_NAME
     extend_df['dt_version'] = dt_version
-    result_df = extend_df[['strategy_key', 'dt_version', 'gh_id', 'sort', 'video_id']]
+    result_df = extend_df[['strategy_key', 'dt_version', 'gh_id', 'sort', 'video_id', 'score']]
     return result_df
 
 def rank_for_base(run_dt, run_hour, project, stats_table, rank_table):
@@ -190,7 +189,7 @@ def rank_for_base(run_dt, run_hour, project, stats_table, rank_table):
     #ranked_df['sort'] = grouped_stats_df.groupby('gh_id')['score'].rank(ascending=False)
     ranked_df['strategy_key'] = BASE_GROUP_NAME
     ranked_df['dt_version'] = dt_version
-    ranked_df = ranked_df[['strategy_key', 'dt_version', 'gh_id', 'sort', 'video_id']]
+    ranked_df = ranked_df[['strategy_key', 'dt_version', 'gh_id', 'sort', 'video_id', 'score']]
     return ranked_df
 
 
@@ -222,7 +221,7 @@ def build_and_transfer_data(run_dt, run_hour, project):
     final_df = odps_ranked_df.join(video_df, on=('video_id', 'id'))
 
     final_df = final_df.to_pandas()
-    final_df = final_df[['strategy_key', 'dt_version', 'gh_id', 'sort', 'video_id', 'title', 'cover_url']]
+    final_df = final_df[['strategy_key', 'dt_version', 'gh_id', 'sort', 'video_id', 'title', 'cover_url', 'score']]
 
     # reverse sending order
     final_df['sort'] = SEND_N + 1 - final_df['sort']