test.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. import numpy as np
  2. import pandas as pd
  3. from scipy.optimize import minimize
  4. from sklearn.metrics import mean_squared_error
  5. # 1. 读取数据
  6. # 假设文件名为 "data.csv",有两列:预测值 (pred) 和真实值 (label)
  7. file_path = "a.txt"
  8. y_pred = []
  9. y_true = []
  10. with open(file_path, 'r') as f:
  11. for line in f.readlines():
  12. lines = line.strip().split("\t")
  13. y_pred.append(float(lines[0]))
  14. y_true.append(float(lines[1]))
  15. y_pred = np.array(y_pred)
  16. y_true = np.array(y_true)
  17. # 2. 定义对数-线性缩放函数
  18. def logistic_scaling(pred, a, b):
  19. """
  20. 对数-线性缩放函数
  21. :param pred: 原始预测值 (范围 0 到 1)
  22. :param a: 缩放参数
  23. :param b: 偏移参数
  24. :return: 校准后的预测值
  25. """
  26. # logit_pred = np.log(pred / (1 - pred)) # 计算 logit
  27. # calibrated_logit = a * logit_pred + b # 线性变换
  28. # return 1 / (1 + np.exp(-calibrated_logit)) # 通过 Sigmoid 归一化
  29. return np.power(pred + a, b)
  30. # 3. 定义优化目标函数
  31. def objective(params, y_pred, y_true):
  32. """
  33. 目标函数,用于优化 a 和 b
  34. :param params: 待优化的参数 (a, b)
  35. :param y_pred: 原始预测值
  36. :param y_true: 真实值 (连续值)
  37. :return: MSE 损失
  38. """
  39. a, b = params
  40. # 使用 Logistic Scaling 校准预测值
  41. calibrated_pred = logistic_scaling(y_pred, a, b)
  42. # 计算均方误差 (MSE)
  43. return mean_squared_error(y_true, calibrated_pred)
  44. # 4. 优化参数
  45. # 初始化参数 a=1, b=0
  46. initial_params = [0.0, 1.0]
  47. # 使用 scipy.optimize.minimize 进行优化
  48. result = minimize(objective, initial_params, args=(y_pred, y_true), method='L-BFGS-B')
  49. # 获取优化后的参数 a 和 b
  50. optimized_a, optimized_b = result.x
  51. print(f"Optimized a: {optimized_a}, Optimized b: {optimized_b}")
  52. # 5. 应用校准模型
  53. calibrated_preds = logistic_scaling(y_pred, optimized_a, optimized_b)
  54. # 7. 验证效果
  55. # 计算校准前后的 MSE
  56. original_mse = mean_squared_error(y_true, y_pred)
  57. calibrated_mse = mean_squared_error(y_true, calibrated_preds)
  58. print(f"Original MSE: {original_mse:.6f}")
  59. print(f"Calibrated MSE: {calibrated_mse:.6f}")
  60. def calibration_function(p, x0=0.07, k1=15, k2=15, p_max=0.13):
  61. # Sigmoid 平滑校正
  62. sigmoid_part = 1 / (1 + np.exp(-k1 * (p - x0)))
  63. # 线性与非线性的平滑过渡
  64. calibrated = p + sigmoid_part / (1 + np.exp(-k2 * (p - x0))) * (p_max - p)
  65. return calibrated
  66. print(calibration_function(0.00001))