report_generator.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. """
  2. 报告生成工具 — auto_put_ad_mini V3
  3. 职责:
  4. - 读取决策引擎输出
  5. - 生成 CSV(程序用)+ XLSX(人工查阅,带条件格式)
  6. - 输出到 outputs/reports/
  7. """
  8. import logging
  9. from datetime import datetime, timedelta
  10. from pathlib import Path
  11. from typing import Optional
  12. import pandas as pd
  13. from agent.tools import tool
  14. from agent.tools.models import ToolContext, ToolResult
  15. logger = logging.getLogger(__name__)
  16. _MINI_DIR = Path(__file__).resolve().parent.parent
  17. _REPORTS_DIR = _MINI_DIR / "outputs" / "reports"
  18. # 最终输出列顺序(审批表格:简洁版,去掉技术性列)
  19. OUTPUT_COLUMNS = [
  20. # 核心标识(优先显示)
  21. "account_id", "ad_id", "cost_7d_avg",
  22. # 基础信息
  23. "ad_name", "audience_tier", "create_time", "ad_age_days", "bid_amount",
  24. # 昨日表现
  25. "yesterday_cost", "yesterday_revenue", "yesterday_roi",
  26. # 7日汇总
  27. "cost_7d_total", "revenue_7d_total",
  28. # f_7日动态ROI(仅结果值,不显示组成)
  29. "f_7日动态ROI",
  30. # 30日上下文
  31. "cost_30d_total", "cost_30d_avg",
  32. "stable_spend_days_30d", "creative_count",
  33. # 决策
  34. "action", "dimension", "reason",
  35. "recommended_change_pct", "current_bid", "recommended_bid",
  36. # 参考
  37. "f_7日动态ROI_mean_all",
  38. ]
  39. # 中文列名映射
  40. CN_COLUMNS = {
  41. "approval_date": "日期",
  42. "account_id": "账户ID",
  43. "ad_id": "广告ID",
  44. "cost_7d_avg": "广告消耗(7日日均/元)",
  45. "ad_name": "广告名称",
  46. "audience_tier": "人群包",
  47. "create_time": "创建时间",
  48. "ad_age_days": "投放天数",
  49. "configured_status": "广告状态",
  50. "bid_amount": "出价(元)",
  51. "yesterday_cost": "昨日消耗(元)",
  52. "yesterday_revenue": "昨日收入(元)",
  53. "yesterday_roi": "昨日ROI",
  54. "cost_7d_total": "7日总消耗(元)",
  55. "revenue_7d_total": "7日总收入(元)",
  56. "T0裂变系数_latest": "T0裂变系数(最新)",
  57. "arpu_latest": "ARPU(最新)",
  58. "a_latest": "a值(最新)",
  59. "b_7d_mean": "b值(7日均值)",
  60. "T0裂变系数_7d_mean": "T0裂变系数(7日均值)",
  61. "e_factor": "e因子",
  62. "f_7日动态ROI": "7日均值动态ROI",
  63. "动态ROI_7日均值": "7日均值动态ROI",
  64. "cost_30d_total": "30日总消耗(元)",
  65. "cost_30d_avg": "30日日均消耗(元)",
  66. "stable_spend_days_30d": "稳定消耗天数(30日)",
  67. "creative_count": "创意数",
  68. "action": "决策动作",
  69. "dimension": "命中维度",
  70. "reason": "决策理由",
  71. "recommended_change_pct": "建议调幅(%)",
  72. "current_bid": "当前出价(元)",
  73. "recommended_bid": "建议出价(元)",
  74. "guardrail_reason": "护栏说明",
  75. "execution_status": "执行状态",
  76. "f_7日动态ROI_mean_all": "全体动态ROI均值",
  77. "source": "数据来源",
  78. }
  79. # 动作中文映射
  80. ACTION_CN_MAP = {
  81. "pause": "关停",
  82. "bid_down": "降价",
  83. "bid_up": "提价",
  84. "hold": "保持",
  85. }
  86. def _write_xlsx_with_format(df: pd.DataFrame, path: Path) -> None:
  87. """生成带条件格式的 XLSX 文件。"""
  88. try:
  89. import openpyxl
  90. from openpyxl.styles import Font, Alignment, PatternFill
  91. from openpyxl.utils import get_column_letter
  92. except ImportError:
  93. logger.warning("openpyxl 未安装,跳过 XLSX 生成")
  94. return
  95. # 动作中文化
  96. if "action" in df.columns:
  97. df["action"] = df["action"].map(ACTION_CN_MAP).fillna(df["action"])
  98. # 中文列名
  99. df_cn = df.rename(columns=CN_COLUMNS)
  100. df_cn.to_excel(path, index=False, engine="openpyxl")
  101. wb = openpyxl.load_workbook(path)
  102. ws = wb.active
  103. # 表头样式(加粗 + 灰色背景)
  104. header_font = Font(bold=True, size=10)
  105. header_fill = PatternFill(start_color="D3D3D3", end_color="D3D3D3", fill_type="solid") # 灰色背景
  106. center_alignment = Alignment(horizontal="center", vertical="center")
  107. for cell in ws[1]:
  108. cell.font = header_font
  109. cell.fill = header_fill
  110. cell.alignment = center_alignment
  111. # 所有数据单元格居中对齐
  112. for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
  113. for cell in row:
  114. cell.alignment = center_alignment
  115. # 自动列宽(增加30%)
  116. for col_idx in range(1, ws.max_column + 1):
  117. col_letter = get_column_letter(col_idx)
  118. max_len = max(
  119. len(str(ws.cell(row=r, column=col_idx).value or ""))
  120. for r in range(1, min(ws.max_row + 1, 50))
  121. )
  122. # 列宽增加30%:原公式 max_len + 4,现改为 (max_len + 4) * 1.3
  123. ws.column_dimensions[col_letter].width = min((max_len + 4) * 1.3, 40)
  124. # 决策动作列条件格式化
  125. action_col_idx = None
  126. for idx, cell in enumerate(ws[1], 1):
  127. if cell.value == "决策动作":
  128. action_col_idx = idx
  129. # E1 单元格(决策动作表头)黄色高亮
  130. yellow_header_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
  131. cell.fill = yellow_header_fill
  132. break
  133. if action_col_idx:
  134. # 颜色定义
  135. yellow_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid") # 黄色(整列默认)
  136. green_fill = PatternFill(start_color="90EE90", end_color="90EE90", fill_type="solid") # 浅绿色
  137. red_fill = PatternFill(start_color="FF6B6B", end_color="FF6B6B", fill_type="solid") # 红色
  138. orange_fill = PatternFill(start_color="FFB84D", end_color="FFB84D", fill_type="solid") # 橘黄色
  139. # 为决策动作列的所有数据单元格设置颜色
  140. for row_idx in range(2, ws.max_row + 1):
  141. cell = ws.cell(row=row_idx, column=action_col_idx)
  142. value = str(cell.value).strip() if cell.value else ""
  143. # 根据值设置颜色
  144. if value == "保持":
  145. cell.fill = green_fill
  146. elif value == "关停":
  147. cell.fill = red_fill
  148. elif value == "降价":
  149. cell.fill = orange_fill
  150. else:
  151. # 其他值(如"提价")使用黄色
  152. cell.fill = yellow_fill
  153. # 启用自动筛选(首行)
  154. ws.auto_filter.ref = ws.dimensions
  155. # 冻结窗格:锁定第一行和前五列(冻结到F2单元格)
  156. ws.freeze_panes = "F2"
  157. wb.save(path)
  158. logger.info("XLSX 已生成: %s", path)
  159. @tool(description="生成决策报告(CSV + XLSX 带条件格式)")
  160. async def generate_report(
  161. ctx: ToolContext,
  162. decision_csv: str = "",
  163. end_date: str = "yesterday",
  164. ) -> ToolResult:
  165. """
  166. 生成决策报告。
  167. Args:
  168. decision_csv: 决策结果 CSV 路径(analyze_ads 输出)
  169. end_date: 结束日期
  170. Returns:
  171. ToolResult,包含报告路径
  172. """
  173. try:
  174. if not decision_csv:
  175. decision_csv = str(_MINI_DIR / "outputs" / "decision_temp.csv")
  176. df = pd.read_csv(decision_csv)
  177. if df.empty:
  178. return ToolResult(title="报告生成", output="决策数据为空")
  179. if end_date == "yesterday":
  180. end_date = (datetime.now() - timedelta(days=1)).strftime("%Y%m%d")
  181. _REPORTS_DIR.mkdir(parents=True, exist_ok=True)
  182. # 选择输出列(存在的列)
  183. cols = [c for c in OUTPUT_COLUMNS if c in df.columns]
  184. df_out = df[cols].copy()
  185. # 排序:有消耗的在前,无消耗的在后,同组内按消耗降序
  186. sort_cols = []
  187. ascending_flags = []
  188. if "cost_7d_avg" in df_out.columns:
  189. # 无消耗(cost_7d_avg<10元)放最后
  190. df_out["_has_spend"] = (df_out["cost_7d_avg"] >= 10.0).astype(int)
  191. sort_cols.append("_has_spend")
  192. ascending_flags.append(False) # 1在前,0在后
  193. if "cost_7d_total" in df_out.columns:
  194. sort_cols.append("cost_7d_total")
  195. ascending_flags.append(False)
  196. if sort_cols:
  197. df_out = df_out.sort_values(sort_cols, ascending=ascending_flags)
  198. if "_has_spend" in df_out.columns:
  199. df_out.drop(columns=["_has_spend"], inplace=True)
  200. # CSV
  201. csv_path = _REPORTS_DIR / f"decision_{end_date}.csv"
  202. df_out.to_csv(csv_path, index=False, encoding="utf-8-sig")
  203. logger.info("CSV 已生成: %s", csv_path)
  204. # XLSX
  205. xlsx_path = _REPORTS_DIR / f"decision_{end_date}.xlsx"
  206. _write_xlsx_with_format(df_out, xlsx_path)
  207. pause_count = (df_out["action"] == "pause").sum() if "action" in df_out.columns else 0
  208. total_count = len(df_out)
  209. return ToolResult(
  210. title=f"报告已生成({total_count}个广告)",
  211. output=f"报告已生成:\n CSV: {csv_path}\n XLSX: {xlsx_path}\n\n共 {total_count} 个广告,其中 {pause_count} 个建议关停",
  212. metadata={
  213. "csv_path": str(csv_path),
  214. "xlsx_path": str(xlsx_path),
  215. "total_ads": total_count,
  216. "pause_ads": pause_count,
  217. },
  218. )
  219. except Exception as e:
  220. logger.error("报告生成失败: %s", e, exc_info=True)
  221. return ToolResult(title="报告生成失败", output=str(e))
  222. # ═══════════════════════════════════════════
  223. # 双引擎对比工具
  224. # ═══════════════════════════════════════════
  225. @tool(description="对比规则引擎与智能引擎的决策差异")
  226. async def compare_decisions(
  227. ctx: ToolContext,
  228. end_date: str = "yesterday",
  229. ) -> ToolResult:
  230. """
  231. 加载同日期的规则引擎和智能引擎决策结果,输出对比报告。
  232. 对比标签:
  233. - agree:两个引擎一致(都 pause 或都 hold)
  234. - rule_only_pause:只有规则引擎关停
  235. - llm_only_pause:只有智能引擎关停
  236. - disagree:决策不同(其他情况)
  237. Args:
  238. end_date: 结束日期(YYYYMMDD 或 "yesterday")
  239. """
  240. try:
  241. if end_date == "yesterday":
  242. end_date = (datetime.now() - timedelta(days=1)).strftime("%Y%m%d")
  243. rule_path = _REPORTS_DIR / f"decision_{end_date}.csv"
  244. llm_path = _REPORTS_DIR / f"llm_decisions_{end_date}.csv"
  245. if not rule_path.exists():
  246. return ToolResult(
  247. title="compare_decisions",
  248. output=f"规则引擎结果不存在: {rule_path}",
  249. )
  250. if not llm_path.exists():
  251. return ToolResult(
  252. title="compare_decisions",
  253. output=f"智能引擎结果不存在: {llm_path}",
  254. )
  255. df_rule = pd.read_csv(rule_path)[["ad_id", "action", "dimension", "reason"]].copy()
  256. df_rule.columns = ["ad_id", "rule_action", "rule_dimension", "rule_reason"]
  257. df_llm = pd.read_csv(llm_path)[["ad_id", "action", "dimension", "reason", "confidence"]].copy()
  258. df_llm.columns = ["ad_id", "llm_action", "llm_dimension", "llm_reason", "llm_confidence"]
  259. # 外连接合并
  260. df = pd.merge(df_rule, df_llm, on="ad_id", how="outer")
  261. # 填充缺失
  262. df["rule_action"] = df["rule_action"].fillna("no_data")
  263. df["llm_action"] = df["llm_action"].fillna("no_data")
  264. # 打标签
  265. def _label(row):
  266. r = row["rule_action"]
  267. l = row["llm_action"]
  268. if r == l:
  269. return "agree"
  270. if r == "pause" and l != "pause":
  271. return "rule_only_pause"
  272. if l == "pause" and r != "pause":
  273. return "llm_only_pause"
  274. return "disagree"
  275. df["comparison"] = df.apply(_label, axis=1)
  276. # 保存对比报告
  277. _REPORTS_DIR.mkdir(parents=True, exist_ok=True)
  278. out_path = _REPORTS_DIR / f"comparison_{end_date}.csv"
  279. df.to_csv(out_path, index=False, encoding="utf-8-sig")
  280. # 统计摘要
  281. total = len(df)
  282. counts = df["comparison"].value_counts().to_dict()
  283. agree = counts.get("agree", 0)
  284. rule_only = counts.get("rule_only_pause", 0)
  285. llm_only = counts.get("llm_only_pause", 0)
  286. disagree = counts.get("disagree", 0)
  287. rule_pause = (df["rule_action"] == "pause").sum()
  288. llm_pause = (df["llm_action"] == "pause").sum()
  289. agree_rate = round(agree / total * 100, 1) if total > 0 else 0.0
  290. summary = (
  291. f"双引擎对比报告: {out_path}\n\n"
  292. f"总广告数: {total}\n"
  293. f"一致率: {agree_rate}%({agree}/{total})\n\n"
  294. f"规则引擎关停: {rule_pause} 个\n"
  295. f"智能引擎关停: {llm_pause} 个\n\n"
  296. f"分类明细:\n"
  297. f" agree(两者一致): {agree}\n"
  298. f" rule_only_pause(仅规则关停): {rule_only}\n"
  299. f" llm_only_pause(仅智能关停): {llm_only}\n"
  300. f" disagree(其他差异): {disagree}"
  301. )
  302. return ToolResult(
  303. title=f"双引擎对比(一致率 {agree_rate}%)",
  304. output=summary,
  305. metadata={
  306. "csv_path": str(out_path),
  307. "total": total,
  308. "agree_rate": agree_rate,
  309. "rule_pause": int(rule_pause),
  310. "llm_pause": int(llm_pause),
  311. "agree": agree,
  312. "rule_only_pause": rule_only,
  313. "llm_only_pause": llm_only,
  314. "disagree": disagree,
  315. "end_date": end_date,
  316. },
  317. )
  318. except Exception as e:
  319. logger.error("compare_decisions 失败: %s", e, exc_info=True)
  320. return ToolResult(title="compare_decisions 失败", output=str(e))