|
@@ -168,7 +168,7 @@ def rank_for_layer2(run_dt, run_hour, project, stats_table, rank_table):
|
|
|
sampled_dfs.append(sampled_df)
|
|
|
|
|
|
# 基础过滤for账号
|
|
|
- df = stats_df.query('day0_return > 100')
|
|
|
+ df = stats_df.query('send_count > 200 and score > 0')
|
|
|
|
|
|
# fallback to base if necessary
|
|
|
base_strategy_df = get_last_strategy_result(
|
|
@@ -215,10 +215,11 @@ def rank_for_base(run_dt, run_hour, project, stats_table, rank_table, stg_key):
|
|
|
|
|
|
stats_with_strategy_df = stats_df \
|
|
|
.merge(
|
|
|
- base_strategy_df,
|
|
|
- on=['gh_id', 'video_id'],
|
|
|
- how='left') \
|
|
|
- .query('strategy_key.notna() or score > 0.1')
|
|
|
+ base_strategy_df,
|
|
|
+ on=['gh_id', 'video_id'],
|
|
|
+ how='outer') \
|
|
|
+ .query('strategy_key.notna() or (send_count > 500 and score > 0.05)') \
|
|
|
+ .fillna({'score': 0.0})
|
|
|
|
|
|
# 合并default和分账号数据
|
|
|
grouped_stats_df = pd.concat([default_stats_df, stats_with_strategy_df]).reset_index()
|
|
@@ -310,9 +311,9 @@ def build_and_transfer_data(run_dt, run_hour, project, **kwargs):
|
|
|
|
|
|
layer1_rank = rank_for_layer1(run_dt, run_hour, ODS_PROJECT, EXPLORE_POOL_TABLE, gh_df)
|
|
|
layer2_rank = rank_for_layer2(run_dt, run_hour, ODS_PROJECT, GH_REPLY_STATS_TABLE, ODPS_RANK_RESULT_TABLE)
|
|
|
- # base_rank = rank_for_base(run_dt, run_hour, ODS_PROJECT, GH_REPLY_STATS_TABLE, ODPS_RANK_RESULT_TABLE,BASE_GROUP_NAME)
|
|
|
+ base_rank = rank_for_base(run_dt, run_hour, ODS_PROJECT, GH_REPLY_STATS_TABLE, ODPS_RANK_RESULT_TABLE,BASE_GROUP_NAME)
|
|
|
# layer2_rank = rank_for_base_designate(run_dt, run_hour, EXPLORE2_GROUP_NAME)
|
|
|
- base_rank = rank_for_base_designate(run_dt, run_hour, BASE_GROUP_NAME)
|
|
|
+ # base_rank = rank_for_base_designate(run_dt, run_hour, BASE_GROUP_NAME)
|
|
|
|
|
|
final_rank_df = pd.concat([layer1_rank, layer2_rank, base_rank]).reset_index(drop=True)
|
|
|
|