| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203 |
- """
- 离线验证脚本 — auto_put_ad_mini V3
- 用本地 30 天 CSV 数据跑完整 V3 流程:
- 1. 加载创意级数据(outputs/raw/creative_*.csv)
- 2. 聚合创意→广告
- 3. 计算 动态ROI
- 4. 运行三维度决策引擎
- 5. 生成决策报告(CSV + XLSX)
- 用法:
- cd /Users/liulidong/project/agent/Agent
- .venv/bin/python3 examples/auto_put_ad_mini/verify_decision.py
- """
- import sys
- from datetime import datetime, timedelta
- from pathlib import Path
- sys.path.insert(0, str(Path(__file__).parent.parent.parent))
- import pandas as pd
- from examples.auto_put_ad_mini.config import (
- MIN_DAILY_COST,
- MIN_AD_AGE_DAYS,
- ROI_LOW_FACTOR,
- NO_SPEND_THRESHOLD,
- STABLE_SPEND_THRESHOLD,
- )
- from examples.auto_put_ad_mini.tools.roi_calculator import (
- _aggregate_creative_to_ad,
- _calculate_f7_dynamic_roi,
- _calculate_yesterday_roi,
- _calculate_7d_summary,
- _calculate_30d_summary,
- )
- from examples.auto_put_ad_mini.tools.ad_decision import (
- _extract_audience_tier,
- _calculate_ad_age_days,
- _run_decision_engine,
- _detect_decay_signals,
- )
- from examples.auto_put_ad_mini.tools.report_generator import (
- _write_xlsx_with_format,
- OUTPUT_COLUMNS,
- )
- base_dir = Path(__file__).parent
- raw_dir = base_dir / "outputs" / "raw"
- ad_status_dir = base_dir / "outputs" / "ad_status"
- reports_dir = base_dir / "outputs" / "reports"
- print("=" * 60)
- print("V3 离线验证 — 完整流程测试")
- print("=" * 60)
- # ① 加载 30 天创意级数据
- print("\n① 加载创意级数据...")
- creative_files = sorted(raw_dir.glob("creative_*.csv"))
- if not creative_files:
- print(f"错误: {raw_dir} 下没有找到 creative_*.csv 文件")
- print("请先运行: .venv/bin/python3 examples/auto_put_ad_mini/fetch_data.py --days 30")
- sys.exit(1)
- print(f" 找到 {len(creative_files)} 个文件")
- dfs = []
- for f in creative_files:
- df = pd.read_csv(f)
- # 从文件名提取日期
- date_str = f.stem.replace("creative_", "")
- df["date"] = date_str
- dfs.append(df)
- print(f" {f.name}: {len(df)} 行")
- df_creative = pd.concat(dfs, ignore_index=True)
- print(f" 合并后: {len(df_creative)} 行")
- # ② 聚合创意→广告
- print("\n② 聚合创意→广告...")
- df_ad = _aggregate_creative_to_ad(df_creative)
- print(f" 广告级数据: {len(df_ad)} 行(ad_id × date)")
- # ③ 计算 动态ROI
- print("\n③ 计算 动态ROI...")
- df_ad = _calculate_f7_dynamic_roi(df_ad, min_daily_cost=MIN_DAILY_COST)
- print(f" 已添加: T0裂变系数, arpu, 当日裂变收益率, 当日回流倍数, 裂变效率稳定因子, 动态ROI, 动态ROI_7日均值")
- # ④ 计算昨日 ROI(使用数据中最新的日期,而非系统昨日)
- yesterday = df_ad["date"].max()
- print(f"\n④ 计算昨日 ROI ({yesterday})...")
- df_ad = _calculate_yesterday_roi(df_ad, yesterday)
- # ⑤ 计算 7 日汇总
- print("\n⑤ 计算 7 日汇总...")
- df_7d = _calculate_7d_summary(df_ad, yesterday)
- print(f" 7 日汇总: {len(df_7d)} 个广告")
- # ⑥ 计算 30 日汇总
- print("\n⑥ 计算 30 日汇总...")
- df_30d = _calculate_30d_summary(df_ad)
- # ⑦ 检测衰退信号(出价变化 + 创意变化)
- print("\n⑦ 检测衰退信号...")
- ad_ids = df_ad["ad_id"].unique().tolist()
- df_decay = _detect_decay_signals(ad_ids, raw_dir, ad_status_dir, yesterday)
- print(f" 衰退信号检测: {len(df_decay)} 个广告")
- # ⑧ 合并所有指标
- print("\n⑧ 合并所有指标...")
- # 提取最新一天的广告属性(ad_name, create_time 等)
- # 注意:verify 脚本加载的是 raw creative CSV,不含 configured_status/bid_amount
- _attr_cols = ["ad_id", "ad_name", "account_id", "create_time", "configured_status", "bid_amount"]
- _attr_cols = [c for c in _attr_cols if c in df_ad.columns]
- latest_ad_attrs = df_ad[df_ad["date"] == yesterday][_attr_cols].drop_duplicates(subset=["ad_id"])
- df_final = latest_ad_attrs.merge(df_7d, on="ad_id", how="left")
- df_final = df_final.merge(df_30d, on="ad_id", how="left")
- df_final = df_final.merge(df_decay, on="ad_id", how="left")
- # 重命名 动态ROI_latest → 动态ROI,动态ROI_7日均值_latest → 动态ROI_7日均值
- if "动态ROI_latest" in df_final.columns:
- df_final.rename(columns={"动态ROI_latest": "动态ROI"}, inplace=True)
- if "动态ROI_7日均值_latest" in df_final.columns:
- df_final.rename(columns={"动态ROI_7日均值_latest": "动态ROI_7日均值"}, inplace=True)
- # 填充缺失值
- df_final["bid_increased_7d"] = df_final["bid_increased_7d"].fillna(False)
- df_final["creative_changed_7d"] = df_final["creative_changed_7d"].fillna(False)
- # 添加人群包和广告年龄
- df_final["audience_tier"] = df_final["ad_name"].apply(_extract_audience_tier)
- df_final["ad_age_days"] = df_final["create_time"].apply(_calculate_ad_age_days)
- print(f" 最终数据: {len(df_final)} 个广告, {len(df_final.columns)} 列")
- # ⑨ 运行决策引擎
- print("\n⑨ 运行三维度决策引擎...")
- f_roi_mean_all = df_final["动态ROI_7日均值"].mean()
- print(f" 全体 动态ROI_7日均值 均值: {f_roi_mean_all:.4f}")
- context = {
- "动态ROI_mean_all": f_roi_mean_all,
- "min_ad_age_days": MIN_AD_AGE_DAYS,
- "min_daily_cost": MIN_DAILY_COST,
- "roi_low_factor": ROI_LOW_FACTOR,
- "no_spend_threshold": NO_SPEND_THRESHOLD,
- "stable_spend_threshold": STABLE_SPEND_THRESHOLD,
- }
- df_decision = _run_decision_engine(df_final, context)
- print(f" 决策完成: {len(df_decision)} 个广告")
- # ⑩ 统计
- print("\n⑩ 决策统计:")
- action_counts = df_decision["action"].value_counts()
- for action, count in action_counts.items():
- print(f" {action}: {count} 个")
- dimension_counts = df_decision["dimension"].value_counts()
- print("\n 命中维度:")
- for dim, count in dimension_counts.items():
- print(f" {dim}: {count} 个")
- # ⑪ 生成报告
- print("\n⑪ 生成报告...")
- reports_dir.mkdir(parents=True, exist_ok=True)
- # 选择输出列
- cols = [c for c in OUTPUT_COLUMNS if c in df_decision.columns]
- df_out = df_decision[cols].copy()
- # 排序:关停在前
- df_out["_sort"] = (df_out["action"] == "pause").astype(int) * -1
- df_out = df_out.sort_values(["_sort", "cost_7d_total"], ascending=[True, False])
- df_out.drop(columns=["_sort"], inplace=True)
- # CSV
- csv_path = reports_dir / f"decision_{yesterday}_verify.csv"
- df_out.to_csv(csv_path, index=False, encoding="utf-8-sig")
- print(f" CSV: {csv_path}")
- # XLSX
- xlsx_path = reports_dir / f"decision_{yesterday}_verify.xlsx"
- _write_xlsx_with_format(df_out, xlsx_path)
- print(f" XLSX: {xlsx_path}")
- print("\n=" * 60)
- print("验证完成!")
- print("=" * 60)
- # 显示前 10 个关停广告
- pause_ads = df_out[df_out["action"] == "pause"].head(10)
- if not pause_ads.empty:
- print("\n前 10 个关停广告:")
- for _, row in pause_ads.iterrows():
- print(f" {int(row['ad_id'])}: {row['ad_name'][:30]}")
- print(f" 维度: {row['dimension']}, 理由: {row['reason']}")
- print(f" 动态ROI: {row.get('动态ROI', 'N/A')}, 7日消耗: {row.get('cost_7d_avg', 0):.2f}元")
- print()
|