Преглед на файлове

Update model_predict_analyse: add mae

StrayWarrior преди 3 месеца
родител
ревизия
2b5d66f2ac
променени са 1 файла, в които са добавени 7 реда и са изтрити 4 реда
  1. 7 4
      ad/model_predict_analyse.py

+ 7 - 4
ad/model_predict_analyse.py

@@ -109,10 +109,12 @@ def calc_calibration_score2(df: pd.DataFrame, calibration_df: pd.DataFrame) -> [
 
 def predict_df_agg(df: pd.DataFrame) -> [pd.DataFrame]:
     # 基础聚合操作
+    df['abs_error'] = abs(df['label'] - df['score'])
     agg_operations = {
         'view': ('cid', 'size'),
         'conv': ('label', 'sum'),
         'score_avg': ('score', lambda x: round(x.mean(), 6)),
+        'mae': ('abs_error', lambda x: round(x.mean(), 6)),
     }
 
     # 如果存在 score_2 列,则增加相关聚合
@@ -145,10 +147,10 @@ def _main(old_predict_path: str, new_predict_path: str, calibration_file: str, a
     new_agg_df = predict_df_agg(new_df)
 
     # 字段重命名,和列过滤
-    old_agg_df.rename(columns={'score_avg': 'old_score_avg', 'score_2_avg': 'old_score_2_avg'}, inplace=True)
-    new_agg_df.rename(columns={'score_avg': 'new_score_avg', 'score_2_avg': 'new_score_2_avg'}, inplace=True)
-    old_group_df = old_agg_df[['cid', 'view', 'conv', 'true_ctcvr', 'old_score_avg', 'old_score_2_avg']]
-    new_group_df = new_agg_df[['cid', 'new_score_avg', 'new_score_2_avg']]
+    old_agg_df.rename(columns={'score_avg': 'old_score_avg', 'score_2_avg': 'old_score_2_avg', 'mae': 'old_mae'}, inplace=True)
+    new_agg_df.rename(columns={'score_avg': 'new_score_avg', 'score_2_avg': 'new_score_2_avg', 'mae': 'new_mae'}, inplace=True)
+    old_group_df = old_agg_df[['cid', 'view', 'conv', 'true_ctcvr', 'old_score_avg', 'old_score_2_avg', 'old_mae']]
+    new_group_df = new_agg_df[['cid', 'new_score_avg', 'new_score_2_avg', 'new_mae']]
     merged = pd.merge(old_group_df, new_group_df, on='cid', how='left')
 
     # 计算与真实ctcvr的差异值
@@ -165,6 +167,7 @@ def _main(old_predict_path: str, new_predict_path: str, calibration_file: str, a
         'cid', 'view', "conv", "true_ctcvr",
         "old_score_avg", "new_score_avg", "(old-true)/true", "(new-true)/true",
         "old_score_2_avg", "new_score_2_avg", "(old2-true)/true", "(new2-true)/true",
+        'old_mae', 'new_mae'
     ]]
 
     # 根据文件名保存不同的格式