verify_decision.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. """
  2. 离线验证脚本 — auto_put_ad_mini V3
  3. 用本地 30 天 CSV 数据跑完整 V3 流程:
  4. 1. 加载创意级数据(outputs/raw/creative_*.csv)
  5. 2. 聚合创意→广告
  6. 3. 计算 动态ROI
  7. 4. 运行三维度决策引擎
  8. 5. 生成决策报告(CSV + XLSX)
  9. 用法:
  10. cd /Users/liulidong/project/agent/Agent
  11. .venv/bin/python3 examples/auto_put_ad_mini/verify_decision.py
  12. """
  13. import sys
  14. from datetime import datetime, timedelta
  15. from pathlib import Path
  16. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  17. import pandas as pd
  18. from examples.auto_put_ad_mini.config import (
  19. MIN_DAILY_COST,
  20. MIN_AD_AGE_DAYS,
  21. ROI_LOW_FACTOR,
  22. NO_SPEND_THRESHOLD,
  23. STABLE_SPEND_THRESHOLD,
  24. )
  25. from examples.auto_put_ad_mini.tools.roi_calculator import (
  26. _aggregate_creative_to_ad,
  27. _calculate_f7_dynamic_roi,
  28. _calculate_yesterday_roi,
  29. _calculate_7d_summary,
  30. _calculate_30d_summary,
  31. )
  32. from examples.auto_put_ad_mini.tools.ad_decision import (
  33. _extract_audience_tier,
  34. _calculate_ad_age_days,
  35. _run_decision_engine,
  36. _detect_decay_signals,
  37. )
  38. from examples.auto_put_ad_mini.tools.report_generator import (
  39. _write_xlsx_with_format,
  40. OUTPUT_COLUMNS,
  41. )
  42. base_dir = Path(__file__).parent
  43. raw_dir = base_dir / "outputs" / "raw"
  44. ad_status_dir = base_dir / "outputs" / "ad_status"
  45. reports_dir = base_dir / "outputs" / "reports"
  46. print("=" * 60)
  47. print("V3 离线验证 — 完整流程测试")
  48. print("=" * 60)
  49. # ① 加载 30 天创意级数据
  50. print("\n① 加载创意级数据...")
  51. creative_files = sorted(raw_dir.glob("creative_*.csv"))
  52. if not creative_files:
  53. print(f"错误: {raw_dir} 下没有找到 creative_*.csv 文件")
  54. print("请先运行: .venv/bin/python3 examples/auto_put_ad_mini/fetch_data.py --days 30")
  55. sys.exit(1)
  56. print(f" 找到 {len(creative_files)} 个文件")
  57. dfs = []
  58. for f in creative_files:
  59. df = pd.read_csv(f)
  60. # 从文件名提取日期
  61. date_str = f.stem.replace("creative_", "")
  62. df["date"] = date_str
  63. dfs.append(df)
  64. print(f" {f.name}: {len(df)} 行")
  65. df_creative = pd.concat(dfs, ignore_index=True)
  66. print(f" 合并后: {len(df_creative)} 行")
  67. # ② 聚合创意→广告
  68. print("\n② 聚合创意→广告...")
  69. df_ad = _aggregate_creative_to_ad(df_creative)
  70. print(f" 广告级数据: {len(df_ad)} 行(ad_id × date)")
  71. # ③ 计算 动态ROI
  72. print("\n③ 计算 动态ROI...")
  73. df_ad = _calculate_f7_dynamic_roi(df_ad, min_daily_cost=MIN_DAILY_COST)
  74. print(f" 已添加: T0裂变系数, arpu, 当日裂变收益率, 当日回流倍数, 裂变效率稳定因子, 动态ROI, 动态ROI_7日均值")
  75. # ④ 计算昨日 ROI(使用数据中最新的日期,而非系统昨日)
  76. yesterday = df_ad["date"].max()
  77. print(f"\n④ 计算昨日 ROI ({yesterday})...")
  78. df_ad = _calculate_yesterday_roi(df_ad, yesterday)
  79. # ⑤ 计算 7 日汇总
  80. print("\n⑤ 计算 7 日汇总...")
  81. df_7d = _calculate_7d_summary(df_ad, yesterday)
  82. print(f" 7 日汇总: {len(df_7d)} 个广告")
  83. # ⑥ 计算 30 日汇总
  84. print("\n⑥ 计算 30 日汇总...")
  85. df_30d = _calculate_30d_summary(df_ad)
  86. # ⑦ 检测衰退信号(出价变化 + 创意变化)
  87. print("\n⑦ 检测衰退信号...")
  88. ad_ids = df_ad["ad_id"].unique().tolist()
  89. df_decay = _detect_decay_signals(ad_ids, raw_dir, ad_status_dir, yesterday)
  90. print(f" 衰退信号检测: {len(df_decay)} 个广告")
  91. # ⑧ 合并所有指标
  92. print("\n⑧ 合并所有指标...")
  93. # 提取最新一天的广告属性(ad_name, create_time 等)
  94. # 注意:verify 脚本加载的是 raw creative CSV,不含 configured_status/bid_amount
  95. _attr_cols = ["ad_id", "ad_name", "account_id", "create_time", "configured_status", "bid_amount"]
  96. _attr_cols = [c for c in _attr_cols if c in df_ad.columns]
  97. latest_ad_attrs = df_ad[df_ad["date"] == yesterday][_attr_cols].drop_duplicates(subset=["ad_id"])
  98. df_final = latest_ad_attrs.merge(df_7d, on="ad_id", how="left")
  99. df_final = df_final.merge(df_30d, on="ad_id", how="left")
  100. df_final = df_final.merge(df_decay, on="ad_id", how="left")
  101. # 重命名 动态ROI_latest → 动态ROI,动态ROI_7日均值_latest → 动态ROI_7日均值
  102. if "动态ROI_latest" in df_final.columns:
  103. df_final.rename(columns={"动态ROI_latest": "动态ROI"}, inplace=True)
  104. if "动态ROI_7日均值_latest" in df_final.columns:
  105. df_final.rename(columns={"动态ROI_7日均值_latest": "动态ROI_7日均值"}, inplace=True)
  106. # 填充缺失值
  107. df_final["bid_increased_7d"] = df_final["bid_increased_7d"].fillna(False)
  108. df_final["creative_changed_7d"] = df_final["creative_changed_7d"].fillna(False)
  109. # 添加人群包和广告年龄
  110. df_final["audience_tier"] = df_final["ad_name"].apply(_extract_audience_tier)
  111. df_final["ad_age_days"] = df_final["create_time"].apply(_calculate_ad_age_days)
  112. print(f" 最终数据: {len(df_final)} 个广告, {len(df_final.columns)} 列")
  113. # ⑨ 运行决策引擎
  114. print("\n⑨ 运行三维度决策引擎...")
  115. f_roi_mean_all = df_final["动态ROI_7日均值"].mean()
  116. print(f" 全体 动态ROI_7日均值 均值: {f_roi_mean_all:.4f}")
  117. context = {
  118. "动态ROI_mean_all": f_roi_mean_all,
  119. "min_ad_age_days": MIN_AD_AGE_DAYS,
  120. "min_daily_cost": MIN_DAILY_COST,
  121. "roi_low_factor": ROI_LOW_FACTOR,
  122. "no_spend_threshold": NO_SPEND_THRESHOLD,
  123. "stable_spend_threshold": STABLE_SPEND_THRESHOLD,
  124. }
  125. df_decision = _run_decision_engine(df_final, context)
  126. print(f" 决策完成: {len(df_decision)} 个广告")
  127. # ⑩ 统计
  128. print("\n⑩ 决策统计:")
  129. action_counts = df_decision["action"].value_counts()
  130. for action, count in action_counts.items():
  131. print(f" {action}: {count} 个")
  132. dimension_counts = df_decision["dimension"].value_counts()
  133. print("\n 命中维度:")
  134. for dim, count in dimension_counts.items():
  135. print(f" {dim}: {count} 个")
  136. # ⑪ 生成报告
  137. print("\n⑪ 生成报告...")
  138. reports_dir.mkdir(parents=True, exist_ok=True)
  139. # 选择输出列
  140. cols = [c for c in OUTPUT_COLUMNS if c in df_decision.columns]
  141. df_out = df_decision[cols].copy()
  142. # 排序:关停在前
  143. df_out["_sort"] = (df_out["action"] == "pause").astype(int) * -1
  144. df_out = df_out.sort_values(["_sort", "cost_7d_total"], ascending=[True, False])
  145. df_out.drop(columns=["_sort"], inplace=True)
  146. # CSV
  147. csv_path = reports_dir / f"decision_{yesterday}_verify.csv"
  148. df_out.to_csv(csv_path, index=False, encoding="utf-8-sig")
  149. print(f" CSV: {csv_path}")
  150. # XLSX
  151. xlsx_path = reports_dir / f"decision_{yesterday}_verify.xlsx"
  152. _write_xlsx_with_format(df_out, xlsx_path)
  153. print(f" XLSX: {xlsx_path}")
  154. print("\n=" * 60)
  155. print("验证完成!")
  156. print("=" * 60)
  157. # 显示前 10 个关停广告
  158. pause_ads = df_out[df_out["action"] == "pause"].head(10)
  159. if not pause_ads.empty:
  160. print("\n前 10 个关停广告:")
  161. for _, row in pause_ads.iterrows():
  162. print(f" {int(row['ad_id'])}: {row['ad_name'][:30]}")
  163. print(f" 维度: {row['dimension']}, 理由: {row['reason']}")
  164. print(f" 动态ROI: {row.get('动态ROI', 'N/A')}, 7日消耗: {row.get('cost_7d_avg', 0):.2f}元")
  165. print()