q_plot_tool.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. #! /usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # vim:fenc=utf-8
  4. #
  5. # Copyright © 2025 StrayWarrior <i@straywarrior.com>
  6. import matplotlib.pyplot as plt
  7. import numpy as np
  8. import pandas as pd
  9. plt.rcParams['font.sans-serif'] = ['Songti SC']
  10. def draw_figures(df, plot_name, sample_rate=1, filename=None):
  11. num_bins = 20
  12. df['p_bin'], _ = pd.qcut(df['score'], q=num_bins, duplicates='drop', retbins=True)
  13. quantile_data = df.groupby('p_bin').agg(
  14. mean_p=('score', 'mean'),
  15. mean_y=('label', 'mean')
  16. ).reset_index()
  17. ctr = quantile_data['mean_y']
  18. actual_quantiles = ctr / (ctr + (1 - ctr) / sample_rate)
  19. pctr = quantile_data['mean_p']
  20. predicted_quantiles = pctr / (pctr + (1 - pctr) / sample_rate)
  21. plt.figure(figsize=(6, 6))
  22. plt.plot(predicted_quantiles, actual_quantiles, ms=3, ls='-', color='blue', label='old')
  23. plt.plot([0, 1], [0, 1], color='gray', linestyle='--', label='Ideal Line')
  24. axis_max = max(predicted_quantiles.tolist()[-1],
  25. actual_quantiles.tolist()[-1])
  26. plt.xlim(0, axis_max)
  27. plt.ylim(0, axis_max)
  28. plt.xlabel('Predicted pCTR')
  29. plt.ylabel('Actual CTR')
  30. plt.title('Q-Q Plot for pCTR Calibration %s' % (plot_name))
  31. plt.grid(True)
  32. if filename:
  33. plt.savefig(filename)
  34. else:
  35. plt.show()