1234567891011121314151617181920212223242526272829303132333435363738394041 |
- #! /usr/bin/env python
- # -*- coding: utf-8 -*-
- # vim:fenc=utf-8
- #
- # Copyright © 2025 StrayWarrior <i@straywarrior.com>
- import matplotlib.pyplot as plt
- import numpy as np
- import pandas as pd
- plt.rcParams['font.sans-serif'] = ['Songti SC']
- def draw_figures(df, plot_name, sample_rate=1, filename=None):
- num_bins = 20
- df['p_bin'], _ = pd.qcut(df['score'], q=num_bins, duplicates='drop', retbins=True)
- quantile_data = df.groupby('p_bin').agg(
- mean_p=('score', 'mean'),
- mean_y=('label', 'mean')
- ).reset_index()
-
- ctr = quantile_data['mean_y']
- actual_quantiles = ctr / (ctr + (1 - ctr) / sample_rate)
- pctr = quantile_data['mean_p']
- predicted_quantiles = pctr / (pctr + (1 - pctr) / sample_rate)
- plt.figure(figsize=(6, 6))
- plt.plot(predicted_quantiles, actual_quantiles, ms=3, ls='-', color='blue', label='old')
- plt.plot([0, 1], [0, 1], color='gray', linestyle='--', label='Ideal Line')
- axis_max = max(predicted_quantiles.tolist()[-1],
- actual_quantiles.tolist()[-1])
- plt.xlim(0, axis_max)
- plt.ylim(0, axis_max)
- plt.xlabel('Predicted pCTR')
- plt.ylabel('Actual CTR')
- plt.title('Q-Q Plot for pCTR Calibration %s' % (plot_name))
- plt.grid(True)
- if filename:
- plt.savefig(filename)
- else:
- plt.show()
|