verify_decision.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. """
  2. 离线验证脚本 — auto_put_ad_mini V3
  3. 用本地 30 天 CSV 数据跑完整 V3 流程:
  4. 1. 加载创意级数据(outputs/raw/creative_*.csv)
  5. 2. 聚合创意→广告
  6. 3. 计算 动态ROI
  7. 4. 运行三维度决策引擎
  8. 5. 生成决策报告(CSV + XLSX)
  9. 用法:
  10. cd /Users/liulidong/project/agent/Agent
  11. .venv/bin/python3 examples/auto_put_ad_mini/verify_decision.py
  12. """
  13. import sys
  14. from datetime import datetime, timedelta
  15. from pathlib import Path
  16. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  17. import pandas as pd
  18. from examples.auto_put_ad_mini.config import (
  19. MIN_DAILY_COST,
  20. MIN_AD_AGE_DAYS,
  21. ROI_LOW_FACTOR,
  22. NO_SPEND_THRESHOLD,
  23. STABLE_SPEND_THRESHOLD,
  24. )
  25. from examples.auto_put_ad_mini.tools.roi_calculator import (
  26. _aggregate_creative_to_ad,
  27. _calculate_f7_dynamic_roi,
  28. _calculate_yesterday_roi,
  29. _calculate_7d_summary,
  30. _calculate_30d_summary,
  31. )
  32. from examples.auto_put_ad_mini.tools.ad_decision import (
  33. _extract_audience_tier,
  34. _calculate_ad_age_days,
  35. _detect_decay_signals,
  36. )
  37. from examples.auto_put_ad_mini.tools.report_generator import (
  38. _write_xlsx_with_format,
  39. OUTPUT_COLUMNS,
  40. )
  41. base_dir = Path(__file__).parent
  42. raw_dir = base_dir / "outputs" / "raw"
  43. ad_status_dir = base_dir / "outputs" / "ad_status"
  44. reports_dir = base_dir / "outputs" / "reports"
  45. print("=" * 60)
  46. print("V3 离线验证 — 完整流程测试")
  47. print("=" * 60)
  48. # ① 加载 30 天创意级数据
  49. print("\n① 加载创意级数据...")
  50. creative_files = sorted(raw_dir.glob("creative_*.csv"))
  51. if not creative_files:
  52. print(f"错误: {raw_dir} 下没有找到 creative_*.csv 文件")
  53. print("请先运行: .venv/bin/python3 examples/auto_put_ad_mini/fetch_data.py --days 30")
  54. sys.exit(1)
  55. print(f" 找到 {len(creative_files)} 个文件")
  56. dfs = []
  57. for f in creative_files:
  58. df = pd.read_csv(f)
  59. # 从文件名提取日期
  60. date_str = f.stem.replace("creative_", "")
  61. df["date"] = date_str
  62. dfs.append(df)
  63. print(f" {f.name}: {len(df)} 行")
  64. df_creative = pd.concat(dfs, ignore_index=True)
  65. print(f" 合并后: {len(df_creative)} 行")
  66. # ② 聚合创意→广告
  67. print("\n② 聚合创意→广告...")
  68. df_ad = _aggregate_creative_to_ad(df_creative)
  69. print(f" 广告级数据: {len(df_ad)} 行(ad_id × date)")
  70. # ③ 计算 动态ROI
  71. print("\n③ 计算 动态ROI...")
  72. df_ad = _calculate_f7_dynamic_roi(df_ad, min_daily_cost=MIN_DAILY_COST)
  73. print(f" 已添加: T0裂变系数, arpu, 当日裂变收益率, 当日回流倍数, 裂变效率稳定因子, 动态ROI, 动态ROI_7日均值")
  74. # ④ 计算昨日 ROI(使用数据中最新的日期,而非系统昨日)
  75. yesterday = df_ad["date"].max()
  76. print(f"\n④ 计算昨日 ROI ({yesterday})...")
  77. df_ad = _calculate_yesterday_roi(df_ad, yesterday)
  78. # ⑤ 计算 7 日汇总
  79. print("\n⑤ 计算 7 日汇总...")
  80. df_7d = _calculate_7d_summary(df_ad, yesterday)
  81. print(f" 7 日汇总: {len(df_7d)} 个广告")
  82. # ⑥ 计算 30 日汇总
  83. print("\n⑥ 计算 30 日汇总...")
  84. df_30d = _calculate_30d_summary(df_ad)
  85. # ⑦ 检测衰退信号(出价变化 + 创意变化)
  86. print("\n⑦ 检测衰退信号...")
  87. ad_ids = df_ad["ad_id"].unique().tolist()
  88. df_decay = _detect_decay_signals(ad_ids, raw_dir, ad_status_dir, yesterday)
  89. print(f" 衰退信号检测: {len(df_decay)} 个广告")
  90. # ⑧ 合并所有指标
  91. print("\n⑧ 合并所有指标...")
  92. # 提取最新一天的广告属性(ad_name, create_time 等)
  93. # 注意:verify 脚本加载的是 raw creative CSV,不含 configured_status/bid_amount
  94. _attr_cols = ["ad_id", "ad_name", "account_id", "create_time", "configured_status", "bid_amount"]
  95. _attr_cols = [c for c in _attr_cols if c in df_ad.columns]
  96. latest_ad_attrs = df_ad[df_ad["date"] == yesterday][_attr_cols].drop_duplicates(subset=["ad_id"])
  97. df_final = latest_ad_attrs.merge(df_7d, on="ad_id", how="left")
  98. df_final = df_final.merge(df_30d, on="ad_id", how="left")
  99. df_final = df_final.merge(df_decay, on="ad_id", how="left")
  100. # 重命名 动态ROI_latest → 动态ROI,动态ROI_7日均值_latest → 动态ROI_7日均值
  101. if "动态ROI_latest" in df_final.columns:
  102. df_final.rename(columns={"动态ROI_latest": "动态ROI"}, inplace=True)
  103. if "动态ROI_7日均值_latest" in df_final.columns:
  104. df_final.rename(columns={"动态ROI_7日均值_latest": "动态ROI_7日均值"}, inplace=True)
  105. # 填充缺失值
  106. df_final["bid_increased_7d"] = df_final["bid_increased_7d"].fillna(False)
  107. df_final["creative_changed_7d"] = df_final["creative_changed_7d"].fillna(False)
  108. # 添加人群包和广告年龄
  109. df_final["audience_tier"] = df_final["ad_name"].apply(_extract_audience_tier)
  110. df_final["ad_age_days"] = df_final["create_time"].apply(_calculate_ad_age_days)
  111. print(f" 最终数据: {len(df_final)} 个广告, {len(df_final.columns)} 列")
  112. # ⑨ 数据统计(规则引擎已废弃,仅输出指标统计)
  113. print("\n⑨ 数据统计...")
  114. f_roi_mean_all = df_final["动态ROI_7日均值"].mean()
  115. print(f" 全体 动态ROI_7日均值 均值: {f_roi_mean_all:.4f}")
  116. print(f" 关停线(均值×{ROI_LOW_FACTOR}): {f_roi_mean_all * ROI_LOW_FACTOR:.4f}")
  117. print(f" 广告总数: {len(df_final)}")
  118. # ⑩ 生成 metrics CSV(供智能引擎使用)
  119. print("\n⑩ 生成 metrics CSV...")
  120. reports_dir.mkdir(parents=True, exist_ok=True)
  121. cols = [c for c in OUTPUT_COLUMNS if c in df_final.columns]
  122. df_out = df_final[cols].copy()
  123. csv_path = reports_dir / f"metrics_{yesterday}_verify.csv"
  124. df_out.to_csv(csv_path, index=False, encoding="utf-8-sig")
  125. print(f" CSV: {csv_path}")
  126. # XLSX
  127. xlsx_path = reports_dir / f"metrics_{yesterday}_verify.xlsx"
  128. _write_xlsx_with_format(df_out, xlsx_path)
  129. print(f" XLSX: {xlsx_path}")
  130. print("\n=" * 60)
  131. print("验证完成!(注:规则引擎已废弃,决策请使用智能引擎 execute_once.py)")
  132. print("=" * 60)