| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210 |
- """
- 单广告决策测试 — 控制变量实验
- 只给 LLM 1 条广告数据(93249577421),看它是否还会:
- 1. 幻觉关停线 2.66(实际 2.524)
- 2. 忽略裂变 +35% 正向信号而决策 pause
- """
- import os, sys, json
- from pathlib import Path
- # 代理 + 项目根
- os.environ.setdefault("HTTP_PROXY", "http://127.0.0.1:29758")
- os.environ.setdefault("HTTPS_PROXY", "http://127.0.0.1:29758")
- PROJECT_ROOT = Path("/Users/liulidong/project/agent/Agent")
- sys.path.insert(0, str(PROJECT_ROOT))
- from dotenv import load_dotenv
- load_dotenv(PROJECT_ROOT / ".env")
- import requests
- # ═══════════════════════════════════════════
- # 1. 构造这条广告的 ad_dict(与 get_ads_for_review 输出一致)
- # ═══════════════════════════════════════════
- CHANNEL_ROI_P50 = 3.3654
- ad_dict = {
- "ad_id": 93249577421,
- "ad_name": "4.8-朋友圈+公众号-关键页-R50-惊奇-60+不限性别",
- "动态ROI_7日均值": 2.1964,
- "cost_7d_avg": 5702.44,
- "cost_7d_total": 39917.09,
- "ad_age_days": 10,
- "bid_increased_7d": True,
- "creative_changed_7d": True,
- "stable_spend_days_30d": 9,
- "bid_amount": 0.39,
- "roi_zone": "bid_down_zone", # ROI 在降价区间(裂变判断交给 LLM)
- "fission_vs_tier": "high", # 裂变率高于同类均值 10%+
- "bid_up_candidate": False,
- "scale_up_candidate": False,
- "ad_fission": 1.61,
- "ad_ctr": 0.032,
- "yesterday_cost": 5702.44,
- "audience_tier": "R50*泛惊奇*奇观技艺",
- "roi_valid_days": 9,
- # 同类对比
- "tier_fission_mean": 1.19,
- "tier_fission_p50": 1.15,
- "tier_ctr_mean": 0.028,
- "tier_bid_mean": 0.35,
- "bid_up_target_min": 0.3675,
- "bid_up_target_max": 0.385,
- # 阈值线
- "pause_line_min": round(CHANNEL_ROI_P50 * 0.70, 4),
- "pause_line_max": round(CHANNEL_ROI_P50 * 0.75, 4),
- "bid_down_line_min": round(CHANNEL_ROI_P50 * 0.85, 4),
- "bid_down_line_max": round(CHANNEL_ROI_P50 * 0.90, 4),
- "bid_up_line_min": round(CHANNEL_ROI_P50 * 1.05, 4),
- "bid_up_line_max": round(CHANNEL_ROI_P50 * 1.10, 4),
- # 年龄
- "age_segment": "mature",
- "age_protection_level": "正常调控(成熟期)",
- "allow_bid_down": True,
- "allow_bid_up": True,
- "max_bid_down_pct": 0.05,
- "high_burn_alert": True,
- # 调幅
- "bid_change_min_pct": 0.03,
- "bid_change_max_pct": 0.10,
- }
- # ═══════════════════════════════════════════
- # 2. 读取 system prompt + skills
- # ═══════════════════════════════════════════
- MINI_DIR = PROJECT_ROOT / "examples" / "auto_put_ad_mini"
- system_prompt = (MINI_DIR / "prompts" / "system.prompt").read_text()
- skills_text = ""
- skills_dir = MINI_DIR / "skills"
- for skill_name in ["ad_domain", "platform_rules", "decision_strategy", "posterior_wisdom"]:
- fpath = skills_dir / f"{skill_name.replace('-', '_')}.md"
- if not fpath.exists():
- fpath = skills_dir / f"{skill_name.replace('_', '-')}.md"
- if fpath.exists():
- skills_text += f"\n\n--- SKILL: {skill_name} ---\n" + fpath.read_text()
- # ═══════════════════════════════════════════
- # 3. 构造 LLM 请求
- # ═══════════════════════════════════════════
- tool_result = json.dumps({
- "summary": {
- "total": 1,
- "need_review_ads_total": 1,
- "tier_groups": 1,
- },
- "distribution": {
- "channel_roi_p50": CHANNEL_ROI_P50,
- "p25": 1.8,
- "p50": CHANNEL_ROI_P50,
- "p75": 5.2,
- "p90": 8.1,
- },
- "thresholds_used": {
- "ROI_LOW_FACTOR": 0.75,
- "BID_DOWN_ROI_FACTOR": 0.90,
- "BID_UP_ROI_FACTOR": 1.05,
- "channel_roi_p50": CHANNEL_ROI_P50,
- "pause_line": round(CHANNEL_ROI_P50 * 0.75, 4),
- "bid_down_line": round(CHANNEL_ROI_P50 * 0.90, 4),
- "bid_up_line": round(CHANNEL_ROI_P50 * 1.05, 4),
- },
- # 按 tier 分批结构(方案 1:tier 分批)
- "tier_batches": [
- {
- "audience_tier": ad_dict["audience_tier"],
- "count": 1,
- "ads": [ad_dict],
- }
- ],
- }, ensure_ascii=False, indent=2)
- user_msg = f"""请对以下 1 条待评估广告做出决策。
- 数据如下:
- {tool_result}
- 请以 JSON 格式返回决策列表,每条包含:
- - ad_id
- - action: pause / bid_down / bid_up / scale_up / creative_adjust / observe / hold
- - dimension: 简短描述
- - reason: 包含 ROI值、对比基准、偏离百分比、裂变率对比、行动建议
- - recommended_change_pct: 数字(降价为负,提价为正,其他为0)
- """
- messages = [
- {"role": "system", "content": system_prompt + "\n\n" + skills_text},
- {"role": "user", "content": user_msg},
- ]
- # ═══════════════════════════════════════════
- # 4. 调用 OpenRouter API
- # ═══════════════════════════════════════════
- api_key = os.environ.get("OPEN_ROUTER_API_KEY") or os.environ.get("OPENROUTER_API_KEY")
- if not api_key:
- print("❌ 找不到 OPEN_ROUTER_API_KEY")
- sys.exit(1)
- print("=" * 60)
- print(f" 模型: anthropic/claude-sonnet-4.5")
- print(f" 广告数: 1 条(93249577421)")
- print(f" 关停线(正确值): {round(CHANNEL_ROI_P50 * 0.75, 4)}")
- print(f" 广告动态ROI: 2.1964")
- print(f" 裂变率 vs 同类: 1.61 vs 1.19 (+35%)")
- print("=" * 60)
- print("\n调用 LLM 中...\n")
- resp = requests.post(
- "https://openrouter.ai/api/v1/chat/completions",
- headers={
- "Authorization": f"Bearer {api_key}",
- "Content-Type": "application/json",
- },
- json={
- "model": "anthropic/claude-sonnet-4.5",
- "temperature": 0.3,
- "max_tokens": 4000,
- "messages": messages,
- },
- timeout=120,
- )
- if resp.status_code != 200:
- print(f"❌ API 错误: {resp.status_code}")
- print(resp.text[:500])
- sys.exit(1)
- result = resp.json()
- content = result["choices"][0]["message"]["content"]
- print("=" * 60)
- print(" LLM 输出(单条广告)")
- print("=" * 60)
- print(content)
- print()
- # ═══════════════════════════════════════════
- # 5. 快速验证
- # ═══════════════════════════════════════════
- print("=" * 60)
- print(" 验证")
- print("=" * 60)
- if "2.66" in content:
- print(" ❌ 仍然幻觉关停线 2.66")
- elif "2.524" in content or "2.52" in content:
- print(" ✅ 关停线数字正确(2.524)")
- else:
- print(" ⚠️ 关停线数字未出现在输出中")
- if '"pause"' in content.lower() or "'pause'" in content.lower():
- print(" ❌ 仍然决策 pause(裂变+35% 被忽略)")
- elif '"bid_down"' in content.lower():
- print(" ✅ 决策 bid_down(裂变好 + ROI 低 → 降价而非关停)")
- elif '"observe"' in content.lower():
- print(" ✅ 决策 observe(近期已调整,等稳定)")
- elif '"hold"' in content.lower():
- print(" ⚠️ 决策 hold(保守但可接受)")
- else:
- print(" ⚠️ action 不在预期范围内")
|