|
@@ -85,7 +85,7 @@ def segment_calc_diff_rate_by_score(df: pd.DataFrame, segment_file_path: str, st
|
|
|
|
|
|
# 完整的分段文件保存
|
|
|
csv_data = group_df.to_csv(sep="\t", index=False)
|
|
|
- with client.write(segment_file_path, encoding='utf-8') as writer:
|
|
|
+ with client.write(segment_file_path, encoding='utf-8', overwrite=True) as writer:
|
|
|
writer.write(csv_data)
|
|
|
|
|
|
filtered_df = group_df[(abs(group_df['segment_diff_rate']) >= 0.2) & (group_df['segment_label_cnt'] >= 1000)]
|
|
@@ -110,7 +110,7 @@ def read_and_calibration_predict(predict_path: str, step=100) -> [pd.DataFrame,
|
|
|
predict_basename = os.path.basename(predict_path)
|
|
|
if predict_basename.endswith("/"):
|
|
|
predict_basename = predict_basename[:-1]
|
|
|
- df, segment_df = segment_calc_diff_rate_by_score(df, segment_file_path=f"{SEGMENT_BASE_PATH}/{predict_basename}", step=100)
|
|
|
+ df, segment_df = segment_calc_diff_rate_by_score(df, segment_file_path=f"{SEGMENT_BASE_PATH}/{predict_basename}.txt", step=100)
|
|
|
|
|
|
# 生成校准后的分数
|
|
|
df['score_2'] = df['score'] / (1 + df['segment_diff_rate'])
|