7 months ago · fbb7451da4
--- a/model/model_predict_analyse_20241115.py
+++ b/model/model_predict_analyse_20241115.py
@@ -1,4 +1,3 @@
 
				-import argparse
			
 
				 import gzip
			
 
				 import os.path
			
 
				 
			
@@ -7,8 +6,8 @@ from hdfs import InsecureClient
 
				 
			
 
				 client = InsecureClient("http://master-1-1.c-7f31a3eea195cb73.cn-hangzhou.emr.aliyuncs.com:9870", user="spark")
			
 
				 
			
 
				-SEGMENT_BASE_PATH = os.environ.get("SEGMENT_BASE_PATH", "/dw/recommend/model/36_model_attachment/score_calibration_file")
			
 
				-PREDICT_CACHE_PATH = os.environ.get("PREDICT_CACHE_PATH", "/root/zhaohp/XGB/predict_cache")
			
 
				+SEGMENT_BASE_PATH = os.environ.get("SEGMENT_BASE_PATH", "/Users/zhao/Desktop/tzld/XGB/predict_cache")
			
 
				+PREDICT_CACHE_PATH = os.environ.get("PREDICT_CACHE_PATH", "/Users/zhao/Desktop/tzld/XGB/predict_cache")
			
 
				 
			
 
				 
			
 
				 def parse_predict_line(line: str) -> [bool, dict]:
			
@@ -85,7 +84,7 @@ def get_predict_calibration_file(df: pd.DataFrame, predict_basename: str) -> [pd
 
				     agg_df['diff_rate'] = (agg_df['score_avg'] / agg_df['true_ctcvr'] - 1).mask(agg_df['true_ctcvr'] == 0, 0).round(6)
			
 
				     condition = 'view > 1000 and diff_rate >= 0.2'
			
 
				     save_full_calibration_file(agg_df, f"{SEGMENT_BASE_PATH}/{predict_basename}.txt")
			
 
				-    calibration = agg_df.query(condition)
			
 
				+    calibration = agg_df[(agg_df['view'] > 1000) & ((agg_df['diff_rate'] >= 0.2) | (agg_df['diff_rate'] <= 0.2)) & agg_df['diff_rate'] != 0]
			
 
				     return calibration
			
 
				 
			
 
				 
			
@@ -177,16 +176,22 @@ def _main(old_predict_path: str, new_predict_path: str, calibration_file: str, a
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				-    parser = argparse.ArgumentParser(description="model_predict_analyse_20241101.py")
			
 
				-    parser.add_argument("-op", "--old_predict_path", required=True, help="老模型评估结果")
			
 
				-    parser.add_argument("-np", "--new_predict_path", required=True, help="新模型评估结果")
			
 
				-    parser.add_argument("-af", "--analyse_file", required=True, help="最后计算结果的保存路径")
			
 
				-    parser.add_argument("-cf", "--calibration_file", required=True, help="线上使用的segment文件保存路径")
			
 
				-    args = parser.parse_args()
			
 
				-
			
 
				     _main(
			
 
				-        old_predict_path=args.old_predict_path,
			
 
				-        new_predict_path=args.new_predict_path,
			
 
				-        calibration_file=args.calibration_file,
			
 
				-        analyse_file=args.analyse_file
			
 
				+        old_predict_path="/Users/zhao/Desktop/tzld/XGB/predict_result/20241210_351_1000_1201_1207.txt",
			
 
				+        new_predict_path="/Users/zhao/Desktop/tzld/XGB/predict_result/20241210_351_1000_1203_1209.txt",
			
 
				+        calibration_file="/Users/zhao/Desktop/tzld/XGB/calibration_file/model_xgb_351_1000_v2_calibration.txt",
			
 
				+        analyse_file="/Users/zhao/Desktop/tzld/XGB/predict_cache/analyse_file.txt"
			
 
				     )
			
 
				+    # parser = argparse.ArgumentParser(description="model_predict_analyse_20241101.py")
			
 
				+    # parser.add_argument("-op", "--old_predict_path", required=True, help="老模型评估结果")
			
 
				+    # parser.add_argument("-np", "--new_predict_path", required=True, help="新模型评估结果")
			
 
				+    # parser.add_argument("-af", "--analyse_file", required=True, help="最后计算结果的保存路径")
			
 
				+    # parser.add_argument("-cf", "--calibration_file", required=True, help="线上使用的segment文件保存路径")
			
 
				+    # args = parser.parse_args()
			
 
				+    #
			
 
				+    # _main(
			
 
				+    #     old_predict_path=args.old_predict_path,
			
 
				+    #     new_predict_path=args.new_predict_path,
			
 
				+    #     calibration_file=args.calibration_file,
			
 
				+    #     analyse_file=args.analyse_file
			
 
				+    # )