import numpy as np import pandas as pd from scipy.optimize import minimize from sklearn.metrics import mean_squared_error # 1. 读取数据 # 假设文件名为 "data.csv",有两列:预测值 (pred) 和真实值 (label) file_path = "a.txt" y_pred = [] y_true = [] with open(file_path, 'r') as f: for line in f.readlines(): lines = line.strip().split("\t") y_pred.append(float(lines[0])) y_true.append(float(lines[1])) y_pred = np.array(y_pred) y_true = np.array(y_true) # 2. 定义对数-线性缩放函数 def logistic_scaling(pred, a, b): """ 对数-线性缩放函数 :param pred: 原始预测值 (范围 0 到 1) :param a: 缩放参数 :param b: 偏移参数 :return: 校准后的预测值 """ # logit_pred = np.log(pred / (1 - pred)) # 计算 logit # calibrated_logit = a * logit_pred + b # 线性变换 # return 1 / (1 + np.exp(-calibrated_logit)) # 通过 Sigmoid 归一化 return np.power(pred + a, b) # 3. 定义优化目标函数 def objective(params, y_pred, y_true): """ 目标函数,用于优化 a 和 b :param params: 待优化的参数 (a, b) :param y_pred: 原始预测值 :param y_true: 真实值 (连续值) :return: MSE 损失 """ a, b = params # 使用 Logistic Scaling 校准预测值 calibrated_pred = logistic_scaling(y_pred, a, b) # 计算均方误差 (MSE) return mean_squared_error(y_true, calibrated_pred) # 4. 优化参数 # 初始化参数 a=1, b=0 initial_params = [0.0, 1.0] # 使用 scipy.optimize.minimize 进行优化 result = minimize(objective, initial_params, args=(y_pred, y_true), method='L-BFGS-B') # 获取优化后的参数 a 和 b optimized_a, optimized_b = result.x print(f"Optimized a: {optimized_a}, Optimized b: {optimized_b}") # 5. 应用校准模型 calibrated_preds = logistic_scaling(y_pred, optimized_a, optimized_b) # 7. 验证效果 # 计算校准前后的 MSE original_mse = mean_squared_error(y_true, y_pred) calibrated_mse = mean_squared_error(y_true, calibrated_preds) print(f"Original MSE: {original_mse:.6f}") print(f"Calibrated MSE: {calibrated_mse:.6f}") def calibration_function(p, x0=0.07, k1=15, k2=15, p_max=0.13): # Sigmoid 平滑校正 sigmoid_part = 1 / (1 + np.exp(-k1 * (p - x0))) # 线性与非线性的平滑过渡 calibrated = p + sigmoid_part / (1 + np.exp(-k2 * (p - x0))) * (p_max - p) return calibrated print(calibration_function(0.00001))