roi_calculator.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586
  1. """
  2. ROI 计算工具 — auto_put_ad_mini V3
  3. 职责:
  4. 1. 创意级数据 → 广告级聚合(GROUP BY ad_id + date, SUM)
  5. 2. 计算 动态ROI(7 天滚动窗口)
  6. 3. 计算昨日 ROI(简单 ROI)
  7. 4. 输出广告级指标表(供决策引擎使用)
  8. 核心公式:
  9. T0裂变系数 = SUM(fission0_count) / SUM(open_count)
  10. arpu = SUM(total_revenue) / SUM(total_return_count)
  11. 当日裂变收益率 = SUM(fission0_count) * arpu / SUM(cost)
  12. 当日回流倍数 = SUM(total_return_count) / SUM(open_count)
  13. 回流倍数_7日均值 = mean(当日回流倍数) over 7天
  14. T0裂变系数_7日均值 = mean(T0裂变系数) over 7天
  15. 裂变效率稳定因子 = 回流倍数_7日均值 / T0裂变系数_7日均值
  16. 动态ROI = 当日裂变收益率(当天) * 裂变效率稳定因子
  17. 动态ROI_7日均值 = mean(动态ROI) over 7天 ← 决策参考值
  18. 前置条件:
  19. - 日消耗 < 100 元的天数不参与 ROI 计算
  20. - 至少需要 3 天有效数据才能计算 ROI 均值
  21. """
  22. import logging
  23. import sys
  24. from datetime import datetime, timedelta
  25. from pathlib import Path
  26. from typing import Dict, List, Optional
  27. import pandas as pd
  28. import numpy as np
  29. from agent.tools import tool
  30. from agent.tools.models import ToolContext, ToolResult
  31. logger = logging.getLogger(__name__)
  32. # 添加当前目录到路径以导入其他工具模块
  33. _TOOLS_DIR = Path(__file__).resolve().parent
  34. if str(_TOOLS_DIR) not in sys.path:
  35. sys.path.insert(0, str(_TOOLS_DIR))
  36. _MINI_DIR = Path(__file__).resolve().parent.parent
  37. _RAW_DIR = _MINI_DIR / "outputs" / "raw"
  38. _AD_STATUS_DIR = _MINI_DIR / "outputs" / "ad_status"
  39. _MERGED_DIR = _MINI_DIR / "outputs" / "merged"
  40. # 延迟导入 _extract_audience_tier(避免循环导入)
  41. def _get_extract_audience_tier():
  42. """延迟导入人群包提取函数"""
  43. try:
  44. from ad_decision import _extract_audience_tier
  45. return _extract_audience_tier
  46. except ImportError:
  47. logger.warning("无法导入 _extract_audience_tier,使用默认实现")
  48. # 提供一个简单的默认实现
  49. def default_extract(ad_name: str) -> str:
  50. if not ad_name:
  51. return "default"
  52. # 简化版:直接匹配常见模式
  53. ad_name_lower = str(ad_name).lower()
  54. if "r500" in ad_name_lower or "r_500" in ad_name_lower:
  55. return "R500"
  56. elif "r330+" in ad_name_lower or "回流330+" in ad_name_lower:
  57. return "R330+"
  58. elif "r330" in ad_name_lower or "r_330" in ad_name_lower or "回流330" in ad_name_lower:
  59. return "R330"
  60. elif "r180" in ad_name_lower or "r_180" in ad_name_lower or "回流180" in ad_name_lower:
  61. return "R180"
  62. elif "r100" in ad_name_lower or "r_100" in ad_name_lower or "回流100" in ad_name_lower:
  63. return "R100"
  64. elif "r50" in ad_name_lower or "r_50" in ad_name_lower or "回流50" in ad_name_lower:
  65. return "R50"
  66. elif "r10" in ad_name_lower or "r_10" in ad_name_lower:
  67. return "R10"
  68. elif "r2" in ad_name_lower or "r_2" in ad_name_lower:
  69. return "R2"
  70. return "default"
  71. return default_extract
  72. # ===== 创意 → 广告聚合 =====
  73. def _aggregate_creative_to_ad(df: pd.DataFrame) -> pd.DataFrame:
  74. """
  75. 创意级数据聚合到广告级(按 ad_id + date)。
  76. 输入:创意级 DataFrame(多日数据,包含 creative_id)
  77. 输出:广告级 DataFrame(每个 ad_id 每天一行)
  78. 聚合规则:
  79. - 数值字段:SUM(cost, revenue, open_count, fission0_count, total_return_count, view_count, etc.)
  80. - 广告属性:取 FIRST(ad_name, account_id, create_time, bid_amount, configured_status)
  81. - 创意计数:COUNT(DISTINCT creative_id)
  82. """
  83. if df.empty:
  84. return pd.DataFrame()
  85. # 添加 date 列(从文件名或 create_time 推断,这里假设已有 bizdate 列)
  86. # 如果没有,需要从外部传入或从文件名解析
  87. # 将 bizdate 映射为 date 列(bizdate 格式: 20260412)
  88. if "bizdate" in df.columns:
  89. df = df.copy()
  90. df["date"] = df["bizdate"].astype(str)
  91. elif "date" not in df.columns:
  92. logger.warning("DataFrame 缺少 bizdate/date 列,无法按日聚合")
  93. return pd.DataFrame()
  94. # 列名映射:CSV 实际列名 → 内部标准名
  95. COLUMN_RENAME = {
  96. "首层小程序打开数": "open_count",
  97. "裂变0层回流数": "fission0_count",
  98. "裂变层回流数": "fission_count",
  99. "裂变1层回流数": "fission1_count",
  100. "总回流人数": "total_return_count",
  101. "总收入": "total_revenue",
  102. "ad_status": "configured_status",
  103. }
  104. # 只重命名存在的列
  105. rename_map = {k: v for k, v in COLUMN_RENAME.items() if k in df.columns}
  106. df = df.rename(columns=rename_map)
  107. agg_dict = {
  108. # 广告属性(取第一个值)
  109. "account_id": "first",
  110. "ad_name": "first",
  111. "create_time": "first",
  112. "configured_status": "first",
  113. "bid_amount": "first",
  114. "广告优化目标": "first",
  115. "package_name": "first", # 人群包名称(如 R50*泛知识*生活科普)
  116. "人群包人数": "first",
  117. # 数值指标(SUM — 聚合后再计算派生比值,不能直接平均)
  118. "cost": "sum",
  119. "view_count": "sum",
  120. "key_page_view_count": "sum",
  121. "key_page_uv": "sum",
  122. "valid_click_count": "sum",
  123. "conversions_count": "sum",
  124. "open_count": "sum", # 首层小程序打开数
  125. "fission0_count": "sum", # 裂变0层回流数
  126. "fission_count": "sum", # 裂变层回流数(总)
  127. "fission1_count": "sum", # 裂变1层回流数
  128. "total_return_count": "sum", # 总回流人数
  129. "total_revenue": "sum", # 总收入
  130. # 创意计数
  131. "creative_id": "nunique",
  132. }
  133. # 过滤掉不存在的列
  134. agg_dict = {k: v for k, v in agg_dict.items() if k in df.columns}
  135. grouped = df.groupby(["ad_id", "date"], as_index=False).agg(agg_dict)
  136. grouped.rename(columns={"creative_id": "creative_count"}, inplace=True)
  137. return grouped
  138. # ===== 动态ROI 计算 =====
  139. def _calculate_f7_dynamic_roi(
  140. ad_df: pd.DataFrame,
  141. min_daily_cost: float = 100.0
  142. ) -> pd.DataFrame:
  143. """
  144. 计算 动态ROI(每个广告每天一个值)。
  145. 输入:广告级 DataFrame(ad_id + date + 聚合指标)
  146. 输出:添加以下列的 DataFrame
  147. - T0裂变系数, arpu(每天基础指标)
  148. - 当日裂变收益率, 当日回流倍数(每天派生指标)
  149. - T0裂变系数_7日均值, 回流倍数_7日均值(7 天滚动均值)
  150. - 裂变效率稳定因子(= 回流倍数_7日均值 / T0裂变系数_7日均值)
  151. - 动态ROI(= 当日裂变收益率 × 裂变效率稳定因子,单日值)
  152. - 动态ROI_7日均值(决策参考值)
  153. 前置条件:
  154. - 日消耗 < min_daily_cost 的天数:T0裂变系数/arpu/a/b 设为 NaN,不参与 7 日均值计算
  155. """
  156. if ad_df.empty:
  157. return ad_df
  158. # 确保按 ad_id + date 排序
  159. ad_df = ad_df.sort_values(["ad_id", "date"]).reset_index(drop=True)
  160. # 计算每天的基础指标
  161. ad_df["T0裂变系数"] = np.where(
  162. (ad_df["open_count"] > 0) & (ad_df["cost"] >= min_daily_cost),
  163. ad_df["fission0_count"] / ad_df["open_count"],
  164. np.nan
  165. )
  166. ad_df["arpu"] = np.where(
  167. (ad_df["total_return_count"] > 0) & (ad_df["cost"] >= min_daily_cost),
  168. ad_df["total_revenue"] / ad_df["total_return_count"],
  169. np.nan
  170. )
  171. # 当日裂变收益率 = T0裂变数 × arpu / cost
  172. ad_df["当日裂变收益率"] = np.where(
  173. (ad_df["cost"] > 0) & (ad_df["cost"] >= min_daily_cost),
  174. ad_df["fission0_count"] * ad_df["arpu"] / ad_df["cost"],
  175. np.nan
  176. )
  177. # 当日回流倍数 = 总回流人数 / 首层打开数
  178. ad_df["当日回流倍数"] = np.where(
  179. (ad_df["open_count"] > 0) & (ad_df["cost"] >= min_daily_cost),
  180. ad_df["total_return_count"] / ad_df["open_count"],
  181. np.nan
  182. )
  183. # 7 天滚动均值(按 ad_id 分组,至少 3 天数据即可计算)
  184. ad_df["T0裂变系数_7日均值"] = (
  185. ad_df.groupby("ad_id")["T0裂变系数"]
  186. .transform(lambda x: x.rolling(window=7, min_periods=3).mean())
  187. )
  188. ad_df["回流倍数_7日均值"] = (
  189. ad_df.groupby("ad_id")["当日回流倍数"]
  190. .transform(lambda x: x.rolling(window=7, min_periods=3).mean())
  191. )
  192. # 裂变效率稳定因子 = 回流倍数_7日均值 / T0裂变系数_7日均值
  193. ad_df["裂变效率稳定因子"] = np.where(
  194. ad_df["T0裂变系数_7日均值"] > 0,
  195. ad_df["回流倍数_7日均值"] / ad_df["T0裂变系数_7日均值"],
  196. np.nan
  197. )
  198. # 动态ROI = 当日裂变收益率 × 裂变效率稳定因子
  199. ad_df["动态ROI"] = ad_df["当日裂变收益率"] * ad_df["裂变效率稳定因子"]
  200. # 动态ROI_7日均值(决策参考值)
  201. ad_df["动态ROI_7日均值"] = (
  202. ad_df.groupby("ad_id")["动态ROI"]
  203. .transform(lambda x: x.rolling(window=7, min_periods=3).mean())
  204. )
  205. # ===== 新增:计算有效ROI数据天数(非NaN的天数)=====
  206. ad_df["roi_valid_days"] = (
  207. ad_df.groupby("ad_id")["动态ROI"]
  208. .transform(lambda x: x.notna().sum())
  209. )
  210. return ad_df
  211. # ===== 昨日 ROI 计算 =====
  212. def _calculate_yesterday_roi(ad_df: pd.DataFrame, yesterday: str) -> pd.DataFrame:
  213. """
  214. 计算昨日 ROI(简单 ROI = revenue / cost)。
  215. 输入:广告级 DataFrame
  216. 输出:添加 yesterday_roi 列
  217. """
  218. if ad_df.empty:
  219. return ad_df
  220. # 筛选昨日数据
  221. yesterday_df = ad_df[ad_df["date"] == yesterday].copy()
  222. yesterday_df["yesterday_roi"] = np.where(
  223. yesterday_df["cost"] > 0,
  224. yesterday_df["total_revenue"] / yesterday_df["cost"],
  225. np.nan
  226. )
  227. # 同时保留昨日消耗(用于关停消耗门槛判断,投手经验2.4)
  228. yesterday_df["yesterday_cost"] = yesterday_df["cost"]
  229. # 合并回原 DataFrame(保留昨日的 ROI + 昨日消耗)
  230. ad_df = ad_df.merge(
  231. yesterday_df[["ad_id", "yesterday_roi", "yesterday_cost"]],
  232. on="ad_id",
  233. how="left"
  234. )
  235. return ad_df
  236. # ===== 7 日汇总指标 =====
  237. def _calculate_7d_summary(ad_df: pd.DataFrame, end_date: str) -> pd.DataFrame:
  238. """
  239. 计算最近 7 天汇总指标(供决策引擎使用)。
  240. 输出列:
  241. - cost_7d_total, cost_7d_avg
  242. - revenue_7d_total
  243. - 动态ROI_latest(单日值,最新一天)
  244. - 动态ROI_7日均值_latest(决策参考值,最新一天的7日均值)
  245. """
  246. if ad_df.empty:
  247. return pd.DataFrame()
  248. end_dt = datetime.strptime(end_date, "%Y%m%d")
  249. start_dt = end_dt - timedelta(days=6)
  250. start_date = start_dt.strftime("%Y%m%d")
  251. # 筛选 7 天数据
  252. df_7d = ad_df[
  253. (ad_df["date"] >= start_date) & (ad_df["date"] <= end_date)
  254. ].copy()
  255. # 按 ad_id 聚合
  256. agg_cols = {"cost": "sum", "total_revenue": "sum"}
  257. # 7日累计 click/view(供人群包基线计算 CTR)
  258. if "valid_click_count" in df_7d.columns:
  259. agg_cols["valid_click_count"] = "sum"
  260. if "view_count" in df_7d.columns:
  261. agg_cols["view_count"] = "sum"
  262. summary = df_7d.groupby("ad_id", as_index=False).agg(agg_cols)
  263. summary.rename(columns={
  264. "cost": "cost_7d_total",
  265. "total_revenue": "revenue_7d_total",
  266. }, inplace=True)
  267. summary["cost_7d_avg"] = summary["cost_7d_total"] / 7
  268. # 获取最新一天的 动态ROI + T0裂变系数_7日均值
  269. latest_cols_7d = ["ad_id", "动态ROI", "动态ROI_7日均值"]
  270. if "T0裂变系数_7日均值" in ad_df.columns:
  271. latest_cols_7d.append("T0裂变系数_7日均值")
  272. latest_df = ad_df[ad_df["date"] == end_date][[c for c in latest_cols_7d if c in ad_df.columns]].copy()
  273. latest_df.rename(columns={
  274. "动态ROI": "动态ROI_latest",
  275. "动态ROI_7日均值": "动态ROI_7日均值_latest"
  276. }, inplace=True)
  277. summary = summary.merge(latest_df, on="ad_id", how="left")
  278. return summary
  279. # ===== 30 日汇总指标 =====
  280. def _calculate_30d_summary(ad_df: pd.DataFrame) -> pd.DataFrame:
  281. """
  282. 计算 30 天汇总指标。
  283. 输出列:
  284. - cost_30d_total, cost_30d_avg
  285. - stable_spend_days_30d(消耗 > 100 元的天数)
  286. """
  287. if ad_df.empty:
  288. return pd.DataFrame()
  289. summary = ad_df.groupby("ad_id", as_index=False).agg({
  290. "cost": ["sum", "mean"],
  291. })
  292. summary.columns = ["ad_id", "cost_30d_total", "cost_30d_avg"]
  293. # 稳定消耗天数(cost > 100)
  294. stable_days = (
  295. ad_df[ad_df["cost"] > 100]
  296. .groupby("ad_id", as_index=False)
  297. .size()
  298. .rename(columns={"size": "stable_spend_days_30d"})
  299. )
  300. summary = summary.merge(stable_days, on="ad_id", how="left")
  301. summary["stable_spend_days_30d"] = summary["stable_spend_days_30d"].fillna(0).astype(int)
  302. return summary
  303. # ===== 工具:计算 ROI 指标 =====
  304. @tool(description="计算 动态ROI + 昨日 ROI + 7日/30日汇总指标")
  305. async def calculate_roi_metrics(
  306. ctx: ToolContext = None,
  307. end_date: str = "yesterday",
  308. min_daily_cost: float = 100.0
  309. ) -> ToolResult:
  310. """
  311. 计算广告级 ROI 指标(V3)。
  312. 工作流:
  313. 1. 加载 30 天创意级 CSV
  314. 2. 聚合到广告级(ad_id + date)
  315. 3. 计算 动态ROI
  316. 4. 计算昨日 ROI
  317. 5. 计算 7 日/30 日汇总
  318. 6. 输出广告级指标表
  319. Args:
  320. end_date: 结束日期(YYYYMMDD 或 "yesterday")
  321. min_daily_cost: 日消耗阈值(元),低于此值的天数不参与 ROI 计算
  322. Returns:
  323. ToolResult 包含广告级指标 DataFrame
  324. """
  325. try:
  326. # 解析日期
  327. if end_date == "yesterday":
  328. end_dt = datetime.now() - timedelta(days=1)
  329. else:
  330. end_dt = datetime.strptime(end_date.replace("-", ""), "%Y%m%d")
  331. end_date_str = end_dt.strftime("%Y%m%d")
  332. start_dt = end_dt - timedelta(days=29)
  333. # 加载 30 天 merged 数据(已包含创意+广告状态)
  334. merged_dfs = []
  335. for i in range(30):
  336. date = (start_dt + timedelta(days=i)).strftime("%Y%m%d")
  337. csv_path = _MERGED_DIR / f"merged_{date}.csv"
  338. if not csv_path.exists():
  339. logger.warning("merged 数据缺失: %s", date)
  340. continue
  341. df = pd.read_csv(csv_path, dtype={"ad_id": str, "account_id": str})
  342. # bizdate 列已存在,无需手动添加 date
  343. merged_dfs.append(df)
  344. if not merged_dfs:
  345. return ToolResult(
  346. title="ROI 计算失败",
  347. output=f"未找到任何 merged 数据 CSV({_MERGED_DIR})"
  348. )
  349. creative_df = pd.concat(merged_dfs, ignore_index=True)
  350. logger.info("加载 merged 数据: %d 行(%d 天)", len(creative_df), len(merged_dfs))
  351. # Step 1: 聚合到广告级
  352. ad_df = _aggregate_creative_to_ad(creative_df)
  353. logger.info("聚合到广告级: %d 行", len(ad_df))
  354. # Step 2: 计算 动态ROI
  355. ad_df = _calculate_f7_dynamic_roi(ad_df, min_daily_cost)
  356. # Step 3: 计算昨日 ROI
  357. ad_df = _calculate_yesterday_roi(ad_df, end_date_str)
  358. # Step 4: 计算 7 日汇总
  359. summary_7d = _calculate_7d_summary(ad_df, end_date_str)
  360. # Step 5: 计算 30 日汇总
  361. summary_30d = _calculate_30d_summary(ad_df)
  362. # Step 6: 合并所有指标(取最新一天的广告属性)
  363. latest_cols = [
  364. "ad_id", "account_id", "ad_name", "create_time",
  365. "configured_status", "bid_amount", "creative_count",
  366. "package_name", # 人群包名称(如 R50*泛知识*生活科普)
  367. "yesterday_roi", "yesterday_cost", # 昨日ROI+昨日消耗(投手经验2.4关停门槛)
  368. ]
  369. # 只取存在的列(yesterday_cost 可能在 _calculate_yesterday_roi 中添加)
  370. latest_cols = [c for c in latest_cols if c in ad_df.columns]
  371. latest_ad = ad_df[ad_df["date"] == end_date_str][latest_cols].copy()
  372. result_df = latest_ad.merge(summary_7d, on="ad_id", how="left")
  373. result_df = result_df.merge(summary_30d, on="ad_id", how="left")
  374. # 计算广告年龄(天)
  375. result_df["ad_age_days"] = (
  376. (end_dt - pd.to_datetime(result_df["create_time"])).dt.days
  377. )
  378. # ===== 人群包字段 =====
  379. # audience_tier: 使用 package_name 原始值(如 R50*泛知识*生活科普)
  380. # r_tier: 从 ad_name 提取的 R 层级(如 R50),作为辅助分组
  381. extract_tier = _get_extract_audience_tier()
  382. result_df["r_tier"] = result_df["ad_name"].apply(extract_tier)
  383. # 优先使用 package_name 作为 audience_tier,缺失时用 r_tier 兜底
  384. if "package_name" in result_df.columns:
  385. result_df["audience_tier"] = result_df["package_name"].fillna("").replace("", pd.NA)
  386. result_df["audience_tier"] = result_df["audience_tier"].fillna(result_df["r_tier"])
  387. else:
  388. result_df["audience_tier"] = result_df["r_tier"]
  389. # 获取 roi_valid_days(从 ad_df 最新一天的数据)
  390. latest_roi_valid = ad_df[ad_df["date"] == end_date_str][["ad_id", "roi_valid_days"]].copy()
  391. result_df = result_df.merge(latest_roi_valid, on="ad_id", how="left")
  392. result_df["roi_valid_days"] = result_df["roi_valid_days"].fillna(0).astype(int)
  393. # 重命名输出列,统一供决策引擎使用
  394. # 动态ROI_latest → 动态ROI(单日值,反映当日效率)
  395. # 动态ROI_7日均值_latest → 动态ROI_7日均值(决策参考值)
  396. if "动态ROI_latest" in result_df.columns:
  397. result_df.rename(columns={"动态ROI_latest": "动态ROI"}, inplace=True)
  398. if "动态ROI_7日均值_latest" in result_df.columns:
  399. result_df.rename(columns={"动态ROI_7日均值_latest": "动态ROI_7日均值"}, inplace=True)
  400. # 计算全体 动态ROI_7日均值 的均值(供决策引擎做相对比较)
  401. f7_7d_mean_all = result_df["动态ROI_7日均值"].mean() if "动态ROI_7日均值" in result_df.columns else float("nan")
  402. # 保存指标 CSV(供 get_ads_for_review 读取)
  403. metrics_dir = _MINI_DIR / "outputs"
  404. metrics_dir.mkdir(parents=True, exist_ok=True)
  405. metrics_csv = metrics_dir / f"metrics_{end_date_str}.csv"
  406. result_df.to_csv(metrics_csv, index=False, encoding="utf-8-sig")
  407. logger.info("指标 CSV 已保存: %s", metrics_csv)
  408. # 同时保存为 metrics_temp.csv(最新指标,供下游工具默认路径使用)
  409. metrics_temp = metrics_dir / "metrics_temp.csv"
  410. result_df.to_csv(metrics_temp, index=False, encoding="utf-8-sig")
  411. logger.info("指标临时文件已更新: %s", metrics_temp)
  412. # ===== 自动生成 portfolio_summary(人群包基线)=====
  413. # 这是决策引擎的硬依赖,直接在 ROI 计算完成后生成
  414. portfolio_tier_count = 0
  415. try:
  416. from tools.portfolio_metrics import _describe_group, _compute_daily_tier_snapshot, _compute_market_signal
  417. portfolio_dir = _MINI_DIR / "outputs" / "portfolio_summary"
  418. portfolio_dir.mkdir(parents=True, exist_ok=True)
  419. portfolio_file = portfolio_dir / f"portfolio_summary_{end_date_str}.json"
  420. by_tier = {}
  421. if "audience_tier" in result_df.columns:
  422. for tier, group in result_df.groupby("audience_tier"):
  423. by_tier[str(tier)] = _describe_group(group)
  424. by_tier_goal = {}
  425. goal_col = "广告优化目标" if "广告优化目标" in result_df.columns else None
  426. if "audience_tier" in result_df.columns and goal_col:
  427. for (tier, goal), group in result_df.groupby(["audience_tier", goal_col]):
  428. by_tier_goal[f"{tier}_{goal}"] = _describe_group(group)
  429. global_stats = _describe_group(result_df)
  430. by_date = _compute_daily_tier_snapshot(end_dt, days=7)
  431. market_signal = _compute_market_signal(by_date)
  432. import json as _json
  433. summary = {
  434. "end_date": end_date_str,
  435. "source_csv": str(metrics_csv),
  436. "by_audience_tier": by_tier,
  437. "by_tier_goal": by_tier_goal,
  438. "global": global_stats,
  439. "by_date": by_date,
  440. "market_signal": market_signal,
  441. }
  442. portfolio_file.write_text(
  443. _json.dumps(summary, ensure_ascii=False, indent=2),
  444. encoding="utf-8",
  445. )
  446. portfolio_tier_count = len(by_tier)
  447. logger.info("人群包基线已生成: %s(%d 个人群包)", portfolio_file, portfolio_tier_count)
  448. except Exception as e:
  449. logger.warning("人群包基线生成失败(不影响主流程): %s", e)
  450. output_lines = [
  451. f"ROI 计算完成(截至 {end_date_str})",
  452. f"广告总数: {len(result_df)}",
  453. f"动态ROI_7日均值 全体均值: {f7_7d_mean_all:.4f}(决策参考)",
  454. "",
  455. f"指标 CSV: {metrics_csv}",
  456. "",
  457. "指标列:",
  458. " - 动态ROI(单日值,反映当日效率)",
  459. " - 动态ROI_7日均值(决策参考值)",
  460. " - yesterday_roi(昨日简单 ROI)",
  461. " - cost_7d_total, cost_7d_avg, revenue_7d_total",
  462. " - cost_30d_total, cost_30d_avg, stable_spend_days_30d",
  463. " - ad_age_days, creative_count",
  464. " - audience_tier(人群包名称,如 R50*泛知识*生活科普,用于同类对比)",
  465. " - r_tier(R层级,如 R50,辅助分组)",
  466. " - roi_valid_days(有效ROI数据天数,用于置信度评估)",
  467. ]
  468. return ToolResult(
  469. title=f"ROI 计算完成({len(result_df)} 个广告)",
  470. output="\n".join(output_lines),
  471. metadata={
  472. "metrics_csv": str(metrics_csv),
  473. "动态ROI_7日均值_mean_all": f7_7d_mean_all,
  474. "end_date": end_date_str,
  475. "min_daily_cost": min_daily_cost,
  476. }
  477. )
  478. except Exception as e:
  479. logger.error("ROI 计算失败: %s", e, exc_info=True)
  480. return ToolResult(
  481. title="ROI 计算失败",
  482. output=f"错误: {e}"
  483. )