|
@@ -99,15 +99,13 @@ def segment_calc_diff_rate_by_score(df: pd.DataFrame, segment_file_path: str, st
|
|
|
return merged_df, filtered_df
|
|
|
|
|
|
|
|
|
-def read_and_calibration_predict(predict_path: str, is_hdfs=True, step=100) -> [pd.DataFrame, pd.DataFrame]:
|
|
|
+def read_and_calibration_predict(predict_path: str, step=100) -> [pd.DataFrame, pd.DataFrame]:
|
|
|
"""
|
|
|
读取评估结果,并进行校准
|
|
|
"""
|
|
|
- if is_hdfs:
|
|
|
- # 文件路径处理
|
|
|
- predicts = read_predict_from_hdfs(predict_path)
|
|
|
- else:
|
|
|
- predicts = read_predict_from_local_txt(predict_path)
|
|
|
+ # 本地调试使用
|
|
|
+ # predicts = read_predict_from_local_txt(predict_path)
|
|
|
+ predicts = read_predict_from_hdfs(predict_path)
|
|
|
df = pd.DataFrame(predicts)
|
|
|
|
|
|
# 模型分分段计算与真实ctcvr的dff_rate
|
|
@@ -132,8 +130,8 @@ def read_and_calibration_predict(predict_path: str, is_hdfs=True, step=100) -> [
|
|
|
|
|
|
|
|
|
def _main(old_predict_path: str, new_predict_path: str, calibration_file: str, analyse_file: str):
|
|
|
- old_group_df, old_segment_df = read_and_calibration_predict(old_predict_path, is_hdfs=False)
|
|
|
- new_group_df, new_segment_df = read_and_calibration_predict(new_predict_path, is_hdfs=False)
|
|
|
+ old_group_df, old_segment_df = read_and_calibration_predict(old_predict_path)
|
|
|
+ new_group_df, new_segment_df = read_and_calibration_predict(new_predict_path)
|
|
|
|
|
|
# 分段文件保存, 此处保留的最后使用的分段文件,不是所有的分段
|
|
|
new_segment_df.to_csv(calibration_file, sep='\t', index=False, header=False)
|