|
@@ -109,10 +109,12 @@ def calc_calibration_score2(df: pd.DataFrame, calibration_df: pd.DataFrame) -> [
|
|
|
|
|
|
def predict_df_agg(df: pd.DataFrame) -> [pd.DataFrame]:
|
|
def predict_df_agg(df: pd.DataFrame) -> [pd.DataFrame]:
|
|
# 基础聚合操作
|
|
# 基础聚合操作
|
|
|
|
+ df['abs_error'] = abs(df['label'] - df['score'])
|
|
agg_operations = {
|
|
agg_operations = {
|
|
'view': ('cid', 'size'),
|
|
'view': ('cid', 'size'),
|
|
'conv': ('label', 'sum'),
|
|
'conv': ('label', 'sum'),
|
|
'score_avg': ('score', lambda x: round(x.mean(), 6)),
|
|
'score_avg': ('score', lambda x: round(x.mean(), 6)),
|
|
|
|
+ 'mae': ('abs_error', lambda x: round(x.mean(), 6)),
|
|
}
|
|
}
|
|
|
|
|
|
# 如果存在 score_2 列,则增加相关聚合
|
|
# 如果存在 score_2 列,则增加相关聚合
|
|
@@ -145,10 +147,10 @@ def _main(old_predict_path: str, new_predict_path: str, calibration_file: str, a
|
|
new_agg_df = predict_df_agg(new_df)
|
|
new_agg_df = predict_df_agg(new_df)
|
|
|
|
|
|
# 字段重命名,和列过滤
|
|
# 字段重命名,和列过滤
|
|
- old_agg_df.rename(columns={'score_avg': 'old_score_avg', 'score_2_avg': 'old_score_2_avg'}, inplace=True)
|
|
|
|
- new_agg_df.rename(columns={'score_avg': 'new_score_avg', 'score_2_avg': 'new_score_2_avg'}, inplace=True)
|
|
|
|
- old_group_df = old_agg_df[['cid', 'view', 'conv', 'true_ctcvr', 'old_score_avg', 'old_score_2_avg']]
|
|
|
|
- new_group_df = new_agg_df[['cid', 'new_score_avg', 'new_score_2_avg']]
|
|
|
|
|
|
+ old_agg_df.rename(columns={'score_avg': 'old_score_avg', 'score_2_avg': 'old_score_2_avg', 'mae': 'old_mae'}, inplace=True)
|
|
|
|
+ new_agg_df.rename(columns={'score_avg': 'new_score_avg', 'score_2_avg': 'new_score_2_avg', 'mae': 'new_mae'}, inplace=True)
|
|
|
|
+ old_group_df = old_agg_df[['cid', 'view', 'conv', 'true_ctcvr', 'old_score_avg', 'old_score_2_avg', 'old_mae']]
|
|
|
|
+ new_group_df = new_agg_df[['cid', 'new_score_avg', 'new_score_2_avg', 'new_mae']]
|
|
merged = pd.merge(old_group_df, new_group_df, on='cid', how='left')
|
|
merged = pd.merge(old_group_df, new_group_df, on='cid', how='left')
|
|
|
|
|
|
# 计算与真实ctcvr的差异值
|
|
# 计算与真实ctcvr的差异值
|
|
@@ -165,6 +167,7 @@ def _main(old_predict_path: str, new_predict_path: str, calibration_file: str, a
|
|
'cid', 'view', "conv", "true_ctcvr",
|
|
'cid', 'view', "conv", "true_ctcvr",
|
|
"old_score_avg", "new_score_avg", "(old-true)/true", "(new-true)/true",
|
|
"old_score_avg", "new_score_avg", "(old-true)/true", "(new-true)/true",
|
|
"old_score_2_avg", "new_score_2_avg", "(old2-true)/true", "(new2-true)/true",
|
|
"old_score_2_avg", "new_score_2_avg", "(old2-true)/true", "(new2-true)/true",
|
|
|
|
+ 'old_mae', 'new_mae'
|
|
]]
|
|
]]
|
|
|
|
|
|
# 根据文件名保存不同的格式
|
|
# 根据文件名保存不同的格式
|