""" 单广告决策测试 — 控制变量实验 只给 LLM 1 条广告数据(93249577421),看它是否还会: 1. 幻觉关停线 2.66(实际 2.524) 2. 忽略裂变 +35% 正向信号而决策 pause """ import os, sys, json from pathlib import Path # 代理 + 项目根 os.environ.setdefault("HTTP_PROXY", "http://127.0.0.1:29758") os.environ.setdefault("HTTPS_PROXY", "http://127.0.0.1:29758") PROJECT_ROOT = Path("/Users/liulidong/project/agent/Agent") sys.path.insert(0, str(PROJECT_ROOT)) from dotenv import load_dotenv load_dotenv(PROJECT_ROOT / ".env") import requests # ═══════════════════════════════════════════ # 1. 构造这条广告的 ad_dict(与 get_ads_for_review 输出一致) # ═══════════════════════════════════════════ CHANNEL_ROI_P50 = 3.3654 ad_dict = { "ad_id": 93249577421, "ad_name": "4.8-朋友圈+公众号-关键页-R50-惊奇-60+不限性别", "动态ROI_7日均值": 2.1964, "cost_7d_avg": 5702.44, "cost_7d_total": 39917.09, "ad_age_days": 10, "bid_increased_7d": True, "creative_changed_7d": True, "stable_spend_days_30d": 9, "bid_amount": 0.39, "roi_zone": "bid_down_zone", # ROI 在降价区间(裂变判断交给 LLM) "fission_vs_tier": "high", # 裂变率高于同类均值 10%+ "bid_up_candidate": False, "scale_up_candidate": False, "ad_fission": 1.61, "ad_ctr": 0.032, "yesterday_cost": 5702.44, "audience_tier": "R50*泛惊奇*奇观技艺", "roi_valid_days": 9, # 同类对比 "tier_fission_mean": 1.19, "tier_fission_p50": 1.15, "tier_ctr_mean": 0.028, "tier_bid_mean": 0.35, "bid_up_target_min": 0.3675, "bid_up_target_max": 0.385, # 阈值线 "pause_line_min": round(CHANNEL_ROI_P50 * 0.70, 4), "pause_line_max": round(CHANNEL_ROI_P50 * 0.75, 4), "bid_down_line_min": round(CHANNEL_ROI_P50 * 0.85, 4), "bid_down_line_max": round(CHANNEL_ROI_P50 * 0.90, 4), "bid_up_line_min": round(CHANNEL_ROI_P50 * 1.05, 4), "bid_up_line_max": round(CHANNEL_ROI_P50 * 1.10, 4), # 年龄 "age_segment": "mature", "age_protection_level": "正常调控(成熟期)", "allow_bid_down": True, "allow_bid_up": True, "max_bid_down_pct": 0.05, "high_burn_alert": True, # 调幅 "bid_change_min_pct": 0.03, "bid_change_max_pct": 0.10, } # ═══════════════════════════════════════════ # 2. 读取 system prompt + skills # ═══════════════════════════════════════════ MINI_DIR = PROJECT_ROOT / "examples" / "auto_put_ad_mini" system_prompt = (MINI_DIR / "prompts" / "system.prompt").read_text() skills_text = "" skills_dir = MINI_DIR / "skills" for skill_name in ["ad_domain", "platform_rules", "decision_strategy", "posterior_wisdom"]: fpath = skills_dir / f"{skill_name.replace('-', '_')}.md" if not fpath.exists(): fpath = skills_dir / f"{skill_name.replace('_', '-')}.md" if fpath.exists(): skills_text += f"\n\n--- SKILL: {skill_name} ---\n" + fpath.read_text() # ═══════════════════════════════════════════ # 3. 构造 LLM 请求 # ═══════════════════════════════════════════ tool_result = json.dumps({ "summary": { "total": 1, "need_review_ads_total": 1, "tier_groups": 1, }, "distribution": { "channel_roi_p50": CHANNEL_ROI_P50, "p25": 1.8, "p50": CHANNEL_ROI_P50, "p75": 5.2, "p90": 8.1, }, "thresholds_used": { "ROI_LOW_FACTOR": 0.75, "BID_DOWN_ROI_FACTOR": 0.90, "BID_UP_ROI_FACTOR": 1.05, "channel_roi_p50": CHANNEL_ROI_P50, "pause_line": round(CHANNEL_ROI_P50 * 0.75, 4), "bid_down_line": round(CHANNEL_ROI_P50 * 0.90, 4), "bid_up_line": round(CHANNEL_ROI_P50 * 1.05, 4), }, # 按 tier 分批结构(方案 1:tier 分批) "tier_batches": [ { "audience_tier": ad_dict["audience_tier"], "count": 1, "ads": [ad_dict], } ], }, ensure_ascii=False, indent=2) user_msg = f"""请对以下 1 条待评估广告做出决策。 数据如下: {tool_result} 请以 JSON 格式返回决策列表,每条包含: - ad_id - action: pause / bid_down / bid_up / scale_up / creative_adjust / observe / hold - dimension: 简短描述 - reason: 包含 ROI值、对比基准、偏离百分比、裂变率对比、行动建议 - recommended_change_pct: 数字(降价为负,提价为正,其他为0) """ messages = [ {"role": "system", "content": system_prompt + "\n\n" + skills_text}, {"role": "user", "content": user_msg}, ] # ═══════════════════════════════════════════ # 4. 调用 OpenRouter API # ═══════════════════════════════════════════ api_key = os.environ.get("OPEN_ROUTER_API_KEY") or os.environ.get("OPENROUTER_API_KEY") if not api_key: print("❌ 找不到 OPEN_ROUTER_API_KEY") sys.exit(1) print("=" * 60) print(f" 模型: anthropic/claude-sonnet-4.5") print(f" 广告数: 1 条(93249577421)") print(f" 关停线(正确值): {round(CHANNEL_ROI_P50 * 0.75, 4)}") print(f" 广告动态ROI: 2.1964") print(f" 裂变率 vs 同类: 1.61 vs 1.19 (+35%)") print("=" * 60) print("\n调用 LLM 中...\n") resp = requests.post( "https://openrouter.ai/api/v1/chat/completions", headers={ "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", }, json={ "model": "anthropic/claude-sonnet-4.5", "temperature": 0.3, "max_tokens": 4000, "messages": messages, }, timeout=120, ) if resp.status_code != 200: print(f"❌ API 错误: {resp.status_code}") print(resp.text[:500]) sys.exit(1) result = resp.json() content = result["choices"][0]["message"]["content"] print("=" * 60) print(" LLM 输出(单条广告)") print("=" * 60) print(content) print() # ═══════════════════════════════════════════ # 5. 快速验证 # ═══════════════════════════════════════════ print("=" * 60) print(" 验证") print("=" * 60) if "2.66" in content: print(" ❌ 仍然幻觉关停线 2.66") elif "2.524" in content or "2.52" in content: print(" ✅ 关停线数字正确(2.524)") else: print(" ⚠️ 关停线数字未出现在输出中") if '"pause"' in content.lower() or "'pause'" in content.lower(): print(" ❌ 仍然决策 pause(裂变+35% 被忽略)") elif '"bid_down"' in content.lower(): print(" ✅ 决策 bid_down(裂变好 + ROI 低 → 降价而非关停)") elif '"observe"' in content.lower(): print(" ✅ 决策 observe(近期已调整,等稳定)") elif '"hold"' in content.lower(): print(" ⚠️ 决策 hold(保守但可接受)") else: print(" ⚠️ action 不在预期范围内")