report_generator.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. """
  2. 报告生成工具 — auto_put_ad_mini V3
  3. 职责:
  4. - 读取决策引擎输出
  5. - 生成 CSV(程序用)+ XLSX(人工查阅,带条件格式)
  6. - 输出到 outputs/reports/
  7. """
  8. import logging
  9. from datetime import datetime, timedelta
  10. from pathlib import Path
  11. from typing import Optional
  12. import pandas as pd
  13. from agent.tools import tool
  14. from agent.tools.models import ToolContext, ToolResult
  15. logger = logging.getLogger(__name__)
  16. _MINI_DIR = Path(__file__).resolve().parent.parent
  17. _REPORTS_DIR = _MINI_DIR / "outputs" / "reports"
  18. # 最终输出列顺序
  19. OUTPUT_COLUMNS = [
  20. "ad_id", "account_id", "ad_name", "audience_tier",
  21. "create_time", "ad_age_days", "configured_status", "bid_amount",
  22. # 昨日表现
  23. "yesterday_cost", "yesterday_revenue", "yesterday_roi",
  24. # 7日汇总
  25. "cost_7d_total", "cost_7d_avg", "revenue_7d_total",
  26. # f_7日动态ROI 组成
  27. "T0裂变系数_latest", "arpu_latest", "a_latest",
  28. "b_7d_mean", "T0裂变系数_7d_mean", "e_factor",
  29. "f_7日动态ROI",
  30. # 30日上下文
  31. "cost_30d_total", "cost_30d_avg",
  32. "stable_spend_days_30d", "creative_count",
  33. # 决策
  34. "action", "dimension", "reason",
  35. "recommended_change_pct", "current_bid", "recommended_bid",
  36. # 护栏 & 执行
  37. "guardrail_status", "guardrail_reason", "final_action", "final_bid",
  38. "execution_status",
  39. # 参考
  40. "f_7日动态ROI_mean_all",
  41. ]
  42. # 中文列名映射
  43. CN_COLUMNS = {
  44. "ad_id": "广告ID",
  45. "account_id": "账户ID",
  46. "ad_name": "广告名称",
  47. "audience_tier": "人群包",
  48. "create_time": "创建时间",
  49. "ad_age_days": "投放天数",
  50. "configured_status": "广告状态",
  51. "bid_amount": "出价(元)",
  52. "yesterday_cost": "昨日消耗(元)",
  53. "yesterday_revenue": "昨日收入(元)",
  54. "yesterday_roi": "昨日ROI",
  55. "cost_7d_total": "7日总消耗(元)",
  56. "cost_7d_avg": "7日日均消耗(元)",
  57. "revenue_7d_total": "7日总收入(元)",
  58. "T0裂变系数_latest": "T0裂变系数(最新)",
  59. "arpu_latest": "ARPU(最新)",
  60. "a_latest": "a值(最新)",
  61. "b_7d_mean": "b值(7日均值)",
  62. "T0裂变系数_7d_mean": "T0裂变系数(7日均值)",
  63. "e_factor": "e因子",
  64. "f_7日动态ROI": "f_7日动态ROI",
  65. "cost_30d_total": "30日总消耗(元)",
  66. "cost_30d_avg": "30日日均消耗(元)",
  67. "stable_spend_days_30d": "稳定消耗天数(30日)",
  68. "creative_count": "创意数",
  69. "action": "决策动作",
  70. "dimension": "命中维度",
  71. "reason": "决策理由",
  72. "recommended_change_pct": "建议调幅(%)",
  73. "current_bid": "当前出价(元)",
  74. "recommended_bid": "建议出价(元)",
  75. "guardrail_status": "护栏状态",
  76. "guardrail_reason": "护栏说明",
  77. "final_action": "最终动作",
  78. "final_bid": "最终出价(元)",
  79. "execution_status": "执行状态",
  80. "f_7日动态ROI_mean_all": "全体动态ROI均值",
  81. }
  82. def _write_xlsx_with_format(df: pd.DataFrame, path: Path) -> None:
  83. """生成带条件格式的 XLSX 文件。"""
  84. try:
  85. import openpyxl
  86. from openpyxl.styles import Font, PatternFill, Alignment
  87. from openpyxl.utils import get_column_letter
  88. except ImportError:
  89. logger.warning("openpyxl 未安装,跳过 XLSX 生成")
  90. return
  91. # 中文列名
  92. df_cn = df.rename(columns=CN_COLUMNS)
  93. df_cn.to_excel(path, index=False, engine="openpyxl")
  94. wb = openpyxl.load_workbook(path)
  95. ws = wb.active
  96. # 表头样式
  97. header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
  98. header_font = Font(color="FFFFFF", bold=True, size=10)
  99. for cell in ws[1]:
  100. cell.fill = header_fill
  101. cell.font = header_font
  102. cell.alignment = Alignment(horizontal="center")
  103. # 条件格式:不同动作不同颜色
  104. red_fill = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")
  105. red_font = Font(color="9C0006")
  106. yellow_fill = PatternFill(start_color="FFEB9C", end_color="FFEB9C", fill_type="solid")
  107. yellow_font = Font(color="9C6500")
  108. green_fill = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
  109. green_font = Font(color="006100")
  110. action_col_idx = None
  111. for idx, cell in enumerate(ws[1], 1):
  112. if cell.value == "决策动作":
  113. action_col_idx = idx
  114. break
  115. if action_col_idx:
  116. for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
  117. action_val = row[action_col_idx - 1].value
  118. if action_val == "pause":
  119. for cell in row:
  120. cell.fill = red_fill
  121. cell.font = red_font
  122. elif action_val == "bid_down":
  123. for cell in row:
  124. cell.fill = yellow_fill
  125. cell.font = yellow_font
  126. elif action_val == "bid_up":
  127. for cell in row:
  128. cell.fill = green_fill
  129. cell.font = green_font
  130. # 自动列宽
  131. for col_idx in range(1, ws.max_column + 1):
  132. col_letter = get_column_letter(col_idx)
  133. max_len = max(
  134. len(str(ws.cell(row=r, column=col_idx).value or ""))
  135. for r in range(1, min(ws.max_row + 1, 50))
  136. )
  137. ws.column_dimensions[col_letter].width = min(max_len + 4, 30)
  138. wb.save(path)
  139. logger.info("XLSX 已生成: %s", path)
  140. @tool(description="生成决策报告(CSV + XLSX 带条件格式)")
  141. async def generate_report(
  142. ctx: ToolContext,
  143. decision_csv: str = "",
  144. end_date: str = "yesterday",
  145. ) -> ToolResult:
  146. """
  147. 生成决策报告。
  148. Args:
  149. decision_csv: 决策结果 CSV 路径(analyze_ads 输出)
  150. end_date: 结束日期
  151. Returns:
  152. ToolResult,包含报告路径
  153. """
  154. try:
  155. if not decision_csv:
  156. decision_csv = str(_MINI_DIR / "outputs" / "decision_temp.csv")
  157. df = pd.read_csv(decision_csv)
  158. if df.empty:
  159. return ToolResult(title="报告生成", output="决策数据为空")
  160. if end_date == "yesterday":
  161. end_date = (datetime.now() - timedelta(days=1)).strftime("%Y%m%d")
  162. _REPORTS_DIR.mkdir(parents=True, exist_ok=True)
  163. # 选择输出列(存在的列)
  164. cols = [c for c in OUTPUT_COLUMNS if c in df.columns]
  165. df_out = df[cols].copy()
  166. # 排序:关停在前,按消耗降序
  167. sort_cols = []
  168. ascending_flags = []
  169. if "action" in df_out.columns:
  170. df_out["_sort_action"] = (df_out["action"] == "pause").astype(int) * -1
  171. sort_cols.append("_sort_action")
  172. ascending_flags.append(True)
  173. if "cost_7d_total" in df_out.columns:
  174. sort_cols.append("cost_7d_total")
  175. ascending_flags.append(False)
  176. if sort_cols:
  177. df_out = df_out.sort_values(sort_cols, ascending=ascending_flags)
  178. if "_sort_action" in df_out.columns:
  179. df_out.drop(columns=["_sort_action"], inplace=True)
  180. # CSV
  181. csv_path = _REPORTS_DIR / f"decision_{end_date}.csv"
  182. df_out.to_csv(csv_path, index=False, encoding="utf-8-sig")
  183. logger.info("CSV 已生成: %s", csv_path)
  184. # XLSX
  185. xlsx_path = _REPORTS_DIR / f"decision_{end_date}.xlsx"
  186. _write_xlsx_with_format(df_out, xlsx_path)
  187. pause_count = (df_out["action"] == "pause").sum() if "action" in df_out.columns else 0
  188. total_count = len(df_out)
  189. return ToolResult(
  190. title=f"报告已生成({total_count}个广告)",
  191. output=f"报告已生成:\n CSV: {csv_path}\n XLSX: {xlsx_path}\n\n共 {total_count} 个广告,其中 {pause_count} 个建议关停",
  192. metadata={
  193. "csv_path": str(csv_path),
  194. "xlsx_path": str(xlsx_path),
  195. "total_ads": total_count,
  196. "pause_ads": pause_count,
  197. },
  198. )
  199. except Exception as e:
  200. logger.error("报告生成失败: %s", e, exc_info=True)
  201. return ToolResult(title="报告生成失败", output=str(e))
  202. # ═══════════════════════════════════════════
  203. # 双引擎对比工具
  204. # ═══════════════════════════════════════════
  205. @tool(description="对比规则引擎与智能引擎的决策差异")
  206. async def compare_decisions(
  207. ctx: ToolContext,
  208. end_date: str = "yesterday",
  209. ) -> ToolResult:
  210. """
  211. 加载同日期的规则引擎和智能引擎决策结果,输出对比报告。
  212. 对比标签:
  213. - agree:两个引擎一致(都 pause 或都 hold)
  214. - rule_only_pause:只有规则引擎关停
  215. - llm_only_pause:只有智能引擎关停
  216. - disagree:决策不同(其他情况)
  217. Args:
  218. end_date: 结束日期(YYYYMMDD 或 "yesterday")
  219. """
  220. try:
  221. if end_date == "yesterday":
  222. end_date = (datetime.now() - timedelta(days=1)).strftime("%Y%m%d")
  223. rule_path = _REPORTS_DIR / f"decision_{end_date}.csv"
  224. llm_path = _REPORTS_DIR / f"llm_decisions_{end_date}.csv"
  225. if not rule_path.exists():
  226. return ToolResult(
  227. title="compare_decisions",
  228. output=f"规则引擎结果不存在: {rule_path}",
  229. )
  230. if not llm_path.exists():
  231. return ToolResult(
  232. title="compare_decisions",
  233. output=f"智能引擎结果不存在: {llm_path}",
  234. )
  235. df_rule = pd.read_csv(rule_path)[["ad_id", "action", "dimension", "reason"]].copy()
  236. df_rule.columns = ["ad_id", "rule_action", "rule_dimension", "rule_reason"]
  237. df_llm = pd.read_csv(llm_path)[["ad_id", "action", "dimension", "reason", "confidence"]].copy()
  238. df_llm.columns = ["ad_id", "llm_action", "llm_dimension", "llm_reason", "llm_confidence"]
  239. # 外连接合并
  240. df = pd.merge(df_rule, df_llm, on="ad_id", how="outer")
  241. # 填充缺失
  242. df["rule_action"] = df["rule_action"].fillna("no_data")
  243. df["llm_action"] = df["llm_action"].fillna("no_data")
  244. # 打标签
  245. def _label(row):
  246. r = row["rule_action"]
  247. l = row["llm_action"]
  248. if r == l:
  249. return "agree"
  250. if r == "pause" and l != "pause":
  251. return "rule_only_pause"
  252. if l == "pause" and r != "pause":
  253. return "llm_only_pause"
  254. return "disagree"
  255. df["comparison"] = df.apply(_label, axis=1)
  256. # 保存对比报告
  257. _REPORTS_DIR.mkdir(parents=True, exist_ok=True)
  258. out_path = _REPORTS_DIR / f"comparison_{end_date}.csv"
  259. df.to_csv(out_path, index=False, encoding="utf-8-sig")
  260. # 统计摘要
  261. total = len(df)
  262. counts = df["comparison"].value_counts().to_dict()
  263. agree = counts.get("agree", 0)
  264. rule_only = counts.get("rule_only_pause", 0)
  265. llm_only = counts.get("llm_only_pause", 0)
  266. disagree = counts.get("disagree", 0)
  267. rule_pause = (df["rule_action"] == "pause").sum()
  268. llm_pause = (df["llm_action"] == "pause").sum()
  269. agree_rate = round(agree / total * 100, 1) if total > 0 else 0.0
  270. summary = (
  271. f"双引擎对比报告: {out_path}\n\n"
  272. f"总广告数: {total}\n"
  273. f"一致率: {agree_rate}%({agree}/{total})\n\n"
  274. f"规则引擎关停: {rule_pause} 个\n"
  275. f"智能引擎关停: {llm_pause} 个\n\n"
  276. f"分类明细:\n"
  277. f" agree(两者一致): {agree}\n"
  278. f" rule_only_pause(仅规则关停): {rule_only}\n"
  279. f" llm_only_pause(仅智能关停): {llm_only}\n"
  280. f" disagree(其他差异): {disagree}"
  281. )
  282. return ToolResult(
  283. title=f"双引擎对比(一致率 {agree_rate}%)",
  284. output=summary,
  285. metadata={
  286. "csv_path": str(out_path),
  287. "total": total,
  288. "agree_rate": agree_rate,
  289. "rule_pause": int(rule_pause),
  290. "llm_pause": int(llm_pause),
  291. "agree": agree,
  292. "rule_only_pause": rule_only,
  293. "llm_only_pause": llm_only,
  294. "disagree": disagree,
  295. "end_date": end_date,
  296. },
  297. )
  298. except Exception as e:
  299. logger.error("compare_decisions 失败: %s", e, exc_info=True)
  300. return ToolResult(title="compare_decisions 失败", output=str(e))