|
@@ -80,7 +80,7 @@ def process_reply_stats(project, table, period, run_dt):
|
|
|
|
|
|
merged_df = pd.concat([df, default_stats_df]).reset_index(drop=True)
|
|
|
|
|
|
- merged_df['score'] = merged_df['day0_return'] / (merged_df['first_visit_uv'] + 1000)
|
|
|
+ merged_df['score'] = merged_df['day0_return'] / (merged_df['send_count'] + 1000)
|
|
|
return merged_df
|
|
|
|
|
|
|
|
@@ -93,9 +93,9 @@ def rank_for_layer1(run_dt, run_hour, project, table):
|
|
|
np.random.seed(int(dt_version)+1)
|
|
|
|
|
|
# TODO: 修改权重计算策略
|
|
|
- sample_weights = df['rov']
|
|
|
+ df['score'] = df['rov']
|
|
|
|
|
|
- sampled_df = df.sample(n=SEND_N, weights=sample_weights)
|
|
|
+ sampled_df = df.sample(n=SEND_N, weights=df['score'])
|
|
|
sampled_df['sort'] = range(1, len(sampled_df) + 1)
|
|
|
sampled_df['strategy_key'] = EXPLORE1_GROUP_NAME
|
|
|
sampled_df['dt_version'] = dt_version
|
|
@@ -105,7 +105,7 @@ def rank_for_layer1(run_dt, run_hour, project, table):
|
|
|
gh_name_df['_tmpkey'] = 1
|
|
|
extend_df = sampled_df.merge(gh_name_df, on='_tmpkey').drop('_tmpkey', axis=1)
|
|
|
|
|
|
- result_df = extend_df[['strategy_key', 'dt_version', 'gh_id', 'sort', 'video_id']]
|
|
|
+ result_df = extend_df[['strategy_key', 'dt_version', 'gh_id', 'sort', 'video_id', 'score']]
|
|
|
return result_df
|
|
|
|
|
|
def rank_for_layer2(run_dt, run_hour, project, stats_table, rank_table):
|
|
@@ -129,7 +129,6 @@ def rank_for_layer2(run_dt, run_hour, project, stats_table, rank_table):
|
|
|
|
|
|
# 基础过滤for账号
|
|
|
df = stats_df.query('day0_return > 100')
|
|
|
- # TODO: fetch send_count
|
|
|
|
|
|
# fallback to base if necessary
|
|
|
base_strategy_df = get_last_strategy_result(
|
|
@@ -150,7 +149,7 @@ def rank_for_layer2(run_dt, run_hour, project, stats_table, rank_table):
|
|
|
extend_df = pd.concat(sampled_dfs)
|
|
|
extend_df['strategy_key'] = EXPLORE2_GROUP_NAME
|
|
|
extend_df['dt_version'] = dt_version
|
|
|
- result_df = extend_df[['strategy_key', 'dt_version', 'gh_id', 'sort', 'video_id']]
|
|
|
+ result_df = extend_df[['strategy_key', 'dt_version', 'gh_id', 'sort', 'video_id', 'score']]
|
|
|
return result_df
|
|
|
|
|
|
def rank_for_base(run_dt, run_hour, project, stats_table, rank_table):
|
|
@@ -190,7 +189,7 @@ def rank_for_base(run_dt, run_hour, project, stats_table, rank_table):
|
|
|
#ranked_df['sort'] = grouped_stats_df.groupby('gh_id')['score'].rank(ascending=False)
|
|
|
ranked_df['strategy_key'] = BASE_GROUP_NAME
|
|
|
ranked_df['dt_version'] = dt_version
|
|
|
- ranked_df = ranked_df[['strategy_key', 'dt_version', 'gh_id', 'sort', 'video_id']]
|
|
|
+ ranked_df = ranked_df[['strategy_key', 'dt_version', 'gh_id', 'sort', 'video_id', 'score']]
|
|
|
return ranked_df
|
|
|
|
|
|
|
|
@@ -222,7 +221,7 @@ def build_and_transfer_data(run_dt, run_hour, project):
|
|
|
final_df = odps_ranked_df.join(video_df, on=('video_id', 'id'))
|
|
|
|
|
|
final_df = final_df.to_pandas()
|
|
|
- final_df = final_df[['strategy_key', 'dt_version', 'gh_id', 'sort', 'video_id', 'title', 'cover_url']]
|
|
|
+ final_df = final_df[['strategy_key', 'dt_version', 'gh_id', 'sort', 'video_id', 'title', 'cover_url', 'score']]
|
|
|
|
|
|
# reverse sending order
|
|
|
final_df['sort'] = SEND_N + 1 - final_df['sort']
|