test_single_ad.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. """
  2. 单广告决策测试 — 控制变量实验
  3. 只给 LLM 1 条广告数据(93249577421),看它是否还会:
  4. 1. 幻觉关停线 2.66(实际 2.524)
  5. 2. 忽略裂变 +35% 正向信号而决策 pause
  6. """
  7. import os, sys, json
  8. from pathlib import Path
  9. # 代理 + 项目根
  10. os.environ.setdefault("HTTP_PROXY", "http://127.0.0.1:29758")
  11. os.environ.setdefault("HTTPS_PROXY", "http://127.0.0.1:29758")
  12. PROJECT_ROOT = Path("/Users/liulidong/project/agent/Agent")
  13. sys.path.insert(0, str(PROJECT_ROOT))
  14. from dotenv import load_dotenv
  15. load_dotenv(PROJECT_ROOT / ".env")
  16. import requests
  17. # ═══════════════════════════════════════════
  18. # 1. 构造这条广告的 ad_dict(与 get_ads_for_review 输出一致)
  19. # ═══════════════════════════════════════════
  20. CHANNEL_ROI_P50 = 3.3654
  21. ad_dict = {
  22. "ad_id": 93249577421,
  23. "ad_name": "4.8-朋友圈+公众号-关键页-R50-惊奇-60+不限性别",
  24. "动态ROI_7日均值": 2.1964,
  25. "cost_7d_avg": 5702.44,
  26. "cost_7d_total": 39917.09,
  27. "ad_age_days": 10,
  28. "bid_increased_7d": True,
  29. "creative_changed_7d": True,
  30. "stable_spend_days_30d": 9,
  31. "bid_amount": 0.39,
  32. "roi_zone": "bid_down_zone", # ROI 在降价区间(裂变判断交给 LLM)
  33. "fission_vs_tier": "high", # 裂变率高于同类均值 10%+
  34. "bid_up_candidate": False,
  35. "scale_up_candidate": False,
  36. "ad_fission": 1.61,
  37. "ad_ctr": 0.032,
  38. "yesterday_cost": 5702.44,
  39. "audience_tier": "R50*泛惊奇*奇观技艺",
  40. "roi_valid_days": 9,
  41. # 同类对比
  42. "tier_fission_mean": 1.19,
  43. "tier_fission_p50": 1.15,
  44. "tier_ctr_mean": 0.028,
  45. "tier_bid_mean": 0.35,
  46. "bid_up_target_min": 0.3675,
  47. "bid_up_target_max": 0.385,
  48. # 阈值线
  49. "pause_line_min": round(CHANNEL_ROI_P50 * 0.70, 4),
  50. "pause_line_max": round(CHANNEL_ROI_P50 * 0.75, 4),
  51. "bid_down_line_min": round(CHANNEL_ROI_P50 * 0.85, 4),
  52. "bid_down_line_max": round(CHANNEL_ROI_P50 * 0.90, 4),
  53. "bid_up_line_min": round(CHANNEL_ROI_P50 * 1.05, 4),
  54. "bid_up_line_max": round(CHANNEL_ROI_P50 * 1.10, 4),
  55. # 年龄
  56. "age_segment": "mature",
  57. "age_protection_level": "正常调控(成熟期)",
  58. "allow_bid_down": True,
  59. "allow_bid_up": True,
  60. "max_bid_down_pct": 0.05,
  61. "high_burn_alert": True,
  62. # 调幅
  63. "bid_change_min_pct": 0.03,
  64. "bid_change_max_pct": 0.10,
  65. }
  66. # ═══════════════════════════════════════════
  67. # 2. 读取 system prompt + skills
  68. # ═══════════════════════════════════════════
  69. MINI_DIR = PROJECT_ROOT / "examples" / "auto_put_ad_mini"
  70. system_prompt = (MINI_DIR / "prompts" / "system.prompt").read_text()
  71. skills_text = ""
  72. skills_dir = MINI_DIR / "skills"
  73. for skill_name in ["ad_domain", "platform_rules", "decision_strategy", "posterior_wisdom"]:
  74. fpath = skills_dir / f"{skill_name.replace('-', '_')}.md"
  75. if not fpath.exists():
  76. fpath = skills_dir / f"{skill_name.replace('_', '-')}.md"
  77. if fpath.exists():
  78. skills_text += f"\n\n--- SKILL: {skill_name} ---\n" + fpath.read_text()
  79. # ═══════════════════════════════════════════
  80. # 3. 构造 LLM 请求
  81. # ═══════════════════════════════════════════
  82. tool_result = json.dumps({
  83. "summary": {
  84. "total": 1,
  85. "need_review_ads_total": 1,
  86. "tier_groups": 1,
  87. },
  88. "distribution": {
  89. "channel_roi_p50": CHANNEL_ROI_P50,
  90. "p25": 1.8,
  91. "p50": CHANNEL_ROI_P50,
  92. "p75": 5.2,
  93. "p90": 8.1,
  94. },
  95. "thresholds_used": {
  96. "ROI_LOW_FACTOR": 0.75,
  97. "BID_DOWN_ROI_FACTOR": 0.90,
  98. "BID_UP_ROI_FACTOR": 1.05,
  99. "channel_roi_p50": CHANNEL_ROI_P50,
  100. "pause_line": round(CHANNEL_ROI_P50 * 0.75, 4),
  101. "bid_down_line": round(CHANNEL_ROI_P50 * 0.90, 4),
  102. "bid_up_line": round(CHANNEL_ROI_P50 * 1.05, 4),
  103. },
  104. # 按 tier 分批结构(方案 1:tier 分批)
  105. "tier_batches": [
  106. {
  107. "audience_tier": ad_dict["audience_tier"],
  108. "count": 1,
  109. "ads": [ad_dict],
  110. }
  111. ],
  112. }, ensure_ascii=False, indent=2)
  113. user_msg = f"""请对以下 1 条待评估广告做出决策。
  114. 数据如下:
  115. {tool_result}
  116. 请以 JSON 格式返回决策列表,每条包含:
  117. - ad_id
  118. - action: pause / bid_down / bid_up / scale_up / creative_adjust / observe / hold
  119. - dimension: 简短描述
  120. - reason: 包含 ROI值、对比基准、偏离百分比、裂变率对比、行动建议
  121. - recommended_change_pct: 数字(降价为负,提价为正,其他为0)
  122. """
  123. messages = [
  124. {"role": "system", "content": system_prompt + "\n\n" + skills_text},
  125. {"role": "user", "content": user_msg},
  126. ]
  127. # ═══════════════════════════════════════════
  128. # 4. 调用 OpenRouter API
  129. # ═══════════════════════════════════════════
  130. api_key = os.environ.get("OPEN_ROUTER_API_KEY") or os.environ.get("OPENROUTER_API_KEY")
  131. if not api_key:
  132. print("❌ 找不到 OPEN_ROUTER_API_KEY")
  133. sys.exit(1)
  134. print("=" * 60)
  135. print(f" 模型: anthropic/claude-sonnet-4.5")
  136. print(f" 广告数: 1 条(93249577421)")
  137. print(f" 关停线(正确值): {round(CHANNEL_ROI_P50 * 0.75, 4)}")
  138. print(f" 广告动态ROI: 2.1964")
  139. print(f" 裂变率 vs 同类: 1.61 vs 1.19 (+35%)")
  140. print("=" * 60)
  141. print("\n调用 LLM 中...\n")
  142. resp = requests.post(
  143. "https://openrouter.ai/api/v1/chat/completions",
  144. headers={
  145. "Authorization": f"Bearer {api_key}",
  146. "Content-Type": "application/json",
  147. },
  148. json={
  149. "model": "anthropic/claude-sonnet-4.5",
  150. "temperature": 0.3,
  151. "max_tokens": 4000,
  152. "messages": messages,
  153. },
  154. timeout=120,
  155. )
  156. if resp.status_code != 200:
  157. print(f"❌ API 错误: {resp.status_code}")
  158. print(resp.text[:500])
  159. sys.exit(1)
  160. result = resp.json()
  161. content = result["choices"][0]["message"]["content"]
  162. print("=" * 60)
  163. print(" LLM 输出(单条广告)")
  164. print("=" * 60)
  165. print(content)
  166. print()
  167. # ═══════════════════════════════════════════
  168. # 5. 快速验证
  169. # ═══════════════════════════════════════════
  170. print("=" * 60)
  171. print(" 验证")
  172. print("=" * 60)
  173. if "2.66" in content:
  174. print(" ❌ 仍然幻觉关停线 2.66")
  175. elif "2.524" in content or "2.52" in content:
  176. print(" ✅ 关停线数字正确(2.524)")
  177. else:
  178. print(" ⚠️ 关停线数字未出现在输出中")
  179. if '"pause"' in content.lower() or "'pause'" in content.lower():
  180. print(" ❌ 仍然决策 pause(裂变+35% 被忽略)")
  181. elif '"bid_down"' in content.lower():
  182. print(" ✅ 决策 bid_down(裂变好 + ROI 低 → 降价而非关停)")
  183. elif '"observe"' in content.lower():
  184. print(" ✅ 决策 observe(近期已调整,等稳定)")
  185. elif '"hold"' in content.lower():
  186. print(" ⚠️ 决策 hold(保守但可接受)")
  187. else:
  188. print(" ⚠️ action 不在预期范围内")