howard
/
Agent


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
							"""
单广告决策测试 — 控制变量实验
只给 LLM 1 条广告数据（93249577421），看它是否还会：
  1. 幻觉关停线 2.66（实际 2.524）
  2. 忽略裂变 +35% 正向信号而决策 pause
"""
import os, sys, json
from pathlib import Path

# 代理 + 项目根
os.environ.setdefault("HTTP_PROXY", "http://127.0.0.1:29758")
os.environ.setdefault("HTTPS_PROXY", "http://127.0.0.1:29758")

PROJECT_ROOT = Path("/Users/liulidong/project/agent/Agent")
sys.path.insert(0, str(PROJECT_ROOT))

from dotenv import load_dotenv
load_dotenv(PROJECT_ROOT / ".env")

import requests

# ═══════════════════════════════════════════
# 1. 构造这条广告的 ad_dict（与 get_ads_for_review 输出一致）
# ═══════════════════════════════════════════
CHANNEL_ROI_P50 = 3.3654

ad_dict = {
    "ad_id": 93249577421,
    "ad_name": "4.8-朋友圈+公众号-关键页-R50-惊奇-60+不限性别",
    "动态ROI_7日均值": 2.1964,
    "cost_7d_avg": 5702.44,
    "cost_7d_total": 39917.09,
    "ad_age_days": 10,
    "bid_increased_7d": True,
    "creative_changed_7d": True,
    "stable_spend_days_30d": 9,
    "bid_amount": 0.39,
    "roi_zone": "bid_down_zone",  # ROI 在降价区间（裂变判断交给 LLM）
    "fission_vs_tier": "high",    # 裂变率高于同类均值 10%+
    "bid_up_candidate": False,
    "scale_up_candidate": False,
    "ad_fission": 1.61,
    "ad_ctr": 0.032,
    "yesterday_cost": 5702.44,
    "audience_tier": "R50*泛惊奇*奇观技艺",
    "roi_valid_days": 9,
    # 同类对比
    "tier_fission_mean": 1.19,
    "tier_fission_p50": 1.15,
    "tier_ctr_mean": 0.028,
    "tier_bid_mean": 0.35,
    "bid_up_target_min": 0.3675,
    "bid_up_target_max": 0.385,
    # 阈值线
    "pause_line_min": round(CHANNEL_ROI_P50 * 0.70, 4),
    "pause_line_max": round(CHANNEL_ROI_P50 * 0.75, 4),
    "bid_down_line_min": round(CHANNEL_ROI_P50 * 0.85, 4),
    "bid_down_line_max": round(CHANNEL_ROI_P50 * 0.90, 4),
    "bid_up_line_min": round(CHANNEL_ROI_P50 * 1.05, 4),
    "bid_up_line_max": round(CHANNEL_ROI_P50 * 1.10, 4),
    # 年龄
    "age_segment": "mature",
    "age_protection_level": "正常调控（成熟期）",
    "allow_bid_down": True,
    "allow_bid_up": True,
    "max_bid_down_pct": 0.05,
    "high_burn_alert": True,
    # 调幅
    "bid_change_min_pct": 0.03,
    "bid_change_max_pct": 0.10,
}

# ═══════════════════════════════════════════
# 2. 读取 system prompt + skills
# ═══════════════════════════════════════════
MINI_DIR = PROJECT_ROOT / "examples" / "auto_put_ad_mini"

system_prompt = (MINI_DIR / "prompts" / "system.prompt").read_text()

skills_text = ""
skills_dir = MINI_DIR / "skills"
for skill_name in ["ad_domain", "platform_rules", "decision_strategy", "posterior_wisdom"]:
    fpath = skills_dir / f"{skill_name.replace('-', '_')}.md"
    if not fpath.exists():
        fpath = skills_dir / f"{skill_name.replace('_', '-')}.md"
    if fpath.exists():
        skills_text += f"\n\n--- SKILL: {skill_name} ---\n" + fpath.read_text()

# ═══════════════════════════════════════════
# 3. 构造 LLM 请求
# ═══════════════════════════════════════════
tool_result = json.dumps({
    "summary": {
        "total": 1,
        "need_review_ads_total": 1,
        "tier_groups": 1,
    },
    "distribution": {
        "channel_roi_p50": CHANNEL_ROI_P50,
        "p25": 1.8,
        "p50": CHANNEL_ROI_P50,
        "p75": 5.2,
        "p90": 8.1,
    },
    "thresholds_used": {
        "ROI_LOW_FACTOR": 0.75,
        "BID_DOWN_ROI_FACTOR": 0.90,
        "BID_UP_ROI_FACTOR": 1.05,
        "channel_roi_p50": CHANNEL_ROI_P50,
        "pause_line": round(CHANNEL_ROI_P50 * 0.75, 4),
        "bid_down_line": round(CHANNEL_ROI_P50 * 0.90, 4),
        "bid_up_line": round(CHANNEL_ROI_P50 * 1.05, 4),
    },
    # 按 tier 分批结构（方案 1：tier 分批）
    "tier_batches": [
        {
            "audience_tier": ad_dict["audience_tier"],
            "count": 1,
            "ads": [ad_dict],
        }
    ],
}, ensure_ascii=False, indent=2)

user_msg = f"""请对以下 1 条待评估广告做出决策。

数据如下：
{tool_result}

请以 JSON 格式返回决策列表，每条包含：
- ad_id
- action: pause / bid_down / bid_up / scale_up / creative_adjust / observe / hold
- dimension: 简短描述
- reason: 包含 ROI值、对比基准、偏离百分比、裂变率对比、行动建议
- recommended_change_pct: 数字（降价为负，提价为正，其他为0）
"""

messages = [
    {"role": "system", "content": system_prompt + "\n\n" + skills_text},
    {"role": "user", "content": user_msg},
]

# ═══════════════════════════════════════════
# 4. 调用 OpenRouter API
# ═══════════════════════════════════════════
api_key = os.environ.get("OPEN_ROUTER_API_KEY") or os.environ.get("OPENROUTER_API_KEY")
if not api_key:
    print("❌ 找不到 OPEN_ROUTER_API_KEY")
    sys.exit(1)

print("=" * 60)
print(f"  模型: anthropic/claude-sonnet-4.5")
print(f"  广告数: 1 条（93249577421）")
print(f"  关停线（正确值）: {round(CHANNEL_ROI_P50 * 0.75, 4)}")
print(f"  广告动态ROI: 2.1964")
print(f"  裂变率 vs 同类: 1.61 vs 1.19 (+35%)")
print("=" * 60)
print("\n调用 LLM 中...\n")

resp = requests.post(
    "https://openrouter.ai/api/v1/chat/completions",
    headers={
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
    },
    json={
        "model": "anthropic/claude-sonnet-4.5",
        "temperature": 0.3,
        "max_tokens": 4000,
        "messages": messages,
    },
    timeout=120,
)

if resp.status_code != 200:
    print(f"❌ API 错误: {resp.status_code}")
    print(resp.text[:500])
    sys.exit(1)

result = resp.json()
content = result["choices"][0]["message"]["content"]

print("=" * 60)
print("  LLM 输出（单条广告）")
print("=" * 60)
print(content)
print()

# ═══════════════════════════════════════════
# 5. 快速验证
# ═══════════════════════════════════════════
print("=" * 60)
print("  验证")
print("=" * 60)
if "2.66" in content:
    print("  ❌ 仍然幻觉关停线 2.66")
elif "2.524" in content or "2.52" in content:
    print("  ✅ 关停线数字正确（2.524）")
else:
    print("  ⚠️  关停线数字未出现在输出中")

if '"pause"' in content.lower() or "'pause'" in content.lower():
    print("  ❌ 仍然决策 pause（裂变+35% 被忽略）")
elif '"bid_down"' in content.lower():
    print("  ✅ 决策 bid_down（裂变好 + ROI 低 → 降价而非关停）")
elif '"observe"' in content.lower():
    print("  ✅ 决策 observe（近期已调整，等稳定）")
elif '"hold"' in content.lower():
    print("  ⚠️  决策 hold（保守但可接受）")
else:
    print("  ⚠️  action 不在预期范围内")