yangxiaohui
/
kg_agent


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831
							import asyncio
import json
import os
import sys
import argparse
from datetime import datetime
from typing import Literal

from agents import Agent, Runner
from lib.my_trace import set_trace
from pydantic import BaseModel, Field

from lib.utils import read_file_as_string
from lib.client import get_model
MODEL_NAME = "google/gemini-2.5-flash"
from script.search_recommendations.xiaohongshu_search_recommendations import XiaohongshuSearchRecommendations
from script.search.xiaohongshu_search import XiaohongshuSearch


# ============================================================================
# 数据模型
# ============================================================================

class Seg(BaseModel):
    """分词"""
    text: str
    score_with_o: float = 0.0  # 与原始问题的评分
    reason: str = ""  # 评分理由
    from_o: str = ""  # 原始问题


class Word(BaseModel):
    """词"""
    text: str
    score_with_o: float = 0.0  # 与原始问题的评分
    from_o: str = ""  # 原始问题


class QFromQ(BaseModel):
    """Q来源信息（用于Sug中记录）"""
    text: str
    score_with_o: float = 0.0


class Q(BaseModel):
    """查询"""
    text: str
    score_with_o: float = 0.0  # 与原始问题的评分
    reason: str = ""  # 评分理由
    from_source: str = ""  # seg/sug/add（加词）


class Sug(BaseModel):
    """建议词"""
    text: str
    score_with_o: float = 0.0  # 与原始问题的评分
    reason: str = ""  # 评分理由
    from_q: QFromQ | None = None  # 来自的q


class Seed(BaseModel):
    """种子"""
    text: str
    added_words: list[str] = Field(default_factory=list)  # 已经增加的words
    from_type: str = ""  # seg/sug
    score_with_o: float = 0.0  # 与原始问题的评分


class Post(BaseModel):
    """帖子"""
    title: str = ""
    body_text: str = ""
    type: str = "normal"  # video/normal
    images: list[str] = Field(default_factory=list)  # 图片url列表，第一张为封面
    video: str = ""  # 视频url
    interact_info: dict = Field(default_factory=dict)  # 互动信息
    note_id: str = ""
    note_url: str = ""


class Search(Sug):
    """搜索结果（继承Sug）"""
    post_list: list[Post] = Field(default_factory=list)  # 搜索得到的帖子列表


class RunContext(BaseModel):
    """运行上下文"""
    version: str
    input_files: dict[str, str]
    c: str  # 原始需求
    o: str  # 原始问题
    log_url: str
    log_dir: str

    # 每轮的数据
    rounds: list[dict] = Field(default_factory=list)  # 每轮的详细数据

    # 最终结果
    final_output: str | None = None


# ============================================================================
# Agent 定义
# ============================================================================

# Agent 1: 分词专家
class WordSegmentation(BaseModel):
    """分词结果"""
    words: list[str] = Field(..., description="分词结果列表")
    reasoning: str = Field(..., description="分词理由")

word_segmentation_instructions = """
你是分词专家。给定一个query，将其拆分成有意义的最小单元。

## 分词原则
1. 保留有搜索意义的词汇
2. 拆分成独立的概念
3. 保留专业术语的完整性
4. 去除虚词（的、吗、呢等）

## 输出要求
返回分词列表和分词理由。
""".strip()

word_segmenter = Agent[None](
    name="分词专家",
    instructions=word_segmentation_instructions,
    model=get_model(MODEL_NAME),
    output_type=WordSegmentation,
)


# Agent 2: 相关度评估专家
class RelevanceEvaluation(BaseModel):
    """相关度评估"""
    relevance_score: float = Field(..., description="相关性分数 0-1")
    reason: str = Field(..., description="评估理由")

relevance_evaluation_instructions = """
你是相关度评估专家。

## 任务
评估当前文本与原始问题的匹配程度。

## 评估标准
- 主题相关性
- 要素覆盖度
- 意图匹配度

## 输出
- relevance_score: 0-1的相关性分数
- reason: 详细理由
""".strip()

relevance_evaluator = Agent[None](
    name="相关度评估专家",
    instructions=relevance_evaluation_instructions,
    model=get_model(MODEL_NAME),
    output_type=RelevanceEvaluation,
)


# Agent 3: 加词选择专家
class WordSelection(BaseModel):
    """加词选择结果"""
    selected_word: str = Field(..., description="选择的词")
    combined_query: str = Field(..., description="组合后的新query")
    reasoning: str = Field(..., description="选择理由")

word_selection_instructions = """
你是加词选择专家。

## 任务
从候选词列表中选择一个最合适的词，与当前seed组合成新的query。

## 原则
1. 选择与当前seed最相关的词
2. 组合后的query要语义通顺
3. 符合搜索习惯
4. 优先选择能扩展搜索范围的词

## 输出
- selected_word: 选中的词
- combined_query: 组合后的新query
- reasoning: 选择理由
""".strip()

word_selector = Agent[None](
    name="加词选择专家",
    instructions=word_selection_instructions,
    model=get_model(MODEL_NAME),
    output_type=WordSelection,
)


# ============================================================================
# 辅助函数
# ============================================================================

def process_note_data(note: dict) -> Post:
    """处理搜索接口返回的帖子数据"""
    note_card = note.get("note_card", {})
    image_list = note_card.get("image_list", [])
    interact_info = note_card.get("interact_info", {})
    user_info = note_card.get("user", {})

    # 提取图片URL - 使用新的字段名 image_url
    images = []
    for img in image_list:
        if isinstance(img, dict):
            # 尝试新字段名 image_url，如果不存在则尝试旧字段名 url_default
            img_url = img.get("image_url") or img.get("url_default")
            if img_url:
                images.append(img_url)

    # 判断类型
    note_type = note_card.get("type", "normal")
    video_url = ""
    if note_type == "video":
        video_info = note_card.get("video", {})
        if isinstance(video_info, dict):
            # 尝试获取视频URL
            video_url = video_info.get("media", {}).get("stream", {}).get("h264", [{}])[0].get("master_url", "")

    return Post(
        note_id=note.get("id", ""),
        title=note_card.get("display_title", ""),
        body_text=note_card.get("desc", ""),
        type=note_type,
        images=images,
        video=video_url,
        interact_info={
            "liked_count": interact_info.get("liked_count", 0),
            "collected_count": interact_info.get("collected_count", 0),
            "comment_count": interact_info.get("comment_count", 0),
            "shared_count": interact_info.get("shared_count", 0)
        },
        note_url=f"https://www.xiaohongshu.com/explore/{note.get('id', '')}"
    )


async def evaluate_with_o(text: str, o: str) -> tuple[float, str]:
    """评估文本与原始问题o的相关度

    Returns:
        tuple[float, str]: (相关度分数, 评估理由)
    """
    eval_input = f"""
<原始问题>
{o}
</原始问题>

<当前文本>
{text}
</当前文本>

请评估当前文本与原始问题的相关度。
"""
    result = await Runner.run(relevance_evaluator, eval_input)
    evaluation: RelevanceEvaluation = result.final_output
    return evaluation.relevance_score, evaluation.reason


# ============================================================================
# 核心流程函数
# ============================================================================

async def initialize(o: str, context: RunContext) -> tuple[list[Seg], list[Word], list[Q], list[Seed]]:
    """
    初始化阶段

    Returns:
        (seg_list, word_list_1, q_list_1, seed_list)
    """
    print(f"\n{'='*60}")
    print(f"初始化阶段")
    print(f"{'='*60}")

    # 1. 分词：原始问题(o) ->分词-> seg_list
    print(f"\n[步骤1] 分词...")
    result = await Runner.run(word_segmenter, o)
    segmentation: WordSegmentation = result.final_output

    seg_list = []
    for word in segmentation.words:
        seg_list.append(Seg(text=word, from_o=o))

    print(f"分词结果: {[s.text for s in seg_list]}")
    print(f"分词理由: {segmentation.reasoning}")

    # 2. 分词评估：seg_list -> 每个seg与o进行评分（并发）
    print(f"\n[步骤2] 评估每个分词与原始问题的相关度...")

    async def evaluate_seg(seg: Seg) -> Seg:
        seg.score_with_o, seg.reason = await evaluate_with_o(seg.text, o)
        return seg

    if seg_list:
        eval_tasks = [evaluate_seg(seg) for seg in seg_list]
        await asyncio.gather(*eval_tasks)

    for seg in seg_list:
        print(f"  {seg.text}: {seg.score_with_o:.2f}")

    # 3. 构建word_list_1: seg_list -> word_list_1
    print(f"\n[步骤3] 构建word_list_1...")
    word_list_1 = []
    for seg in seg_list:
        word_list_1.append(Word(
            text=seg.text,
            score_with_o=seg.score_with_o,
            from_o=o
        ))
    print(f"word_list_1: {[w.text for w in word_list_1]}")

    # 4. 构建q_list_1：seg_list 作为 q_list_1
    print(f"\n[步骤4] 构建q_list_1...")
    q_list_1 = []
    for seg in seg_list:
        q_list_1.append(Q(
            text=seg.text,
            score_with_o=seg.score_with_o,
            reason=seg.reason,
            from_source="seg"
        ))
    print(f"q_list_1: {[q.text for q in q_list_1]}")

    # 5. 构建seed_list: seg_list -> seed_list
    print(f"\n[步骤5] 构建seed_list...")
    seed_list = []
    for seg in seg_list:
        seed_list.append(Seed(
            text=seg.text,
            added_words=[],
            from_type="seg",
            score_with_o=seg.score_with_o
        ))
    print(f"seed_list: {[s.text for s in seed_list]}")

    return seg_list, word_list_1, q_list_1, seed_list


async def run_round(
    round_num: int,
    q_list: list[Q],
    word_list: list[Word],
    seed_list: list[Seed],
    o: str,
    context: RunContext,
    xiaohongshu_api: XiaohongshuSearchRecommendations,
    xiaohongshu_search: XiaohongshuSearch,
    sug_threshold: float = 0.7
) -> tuple[list[Word], list[Q], list[Seed], list[Search]]:
    """
    运行一轮

    Args:
        round_num: 轮次编号
        q_list: 当前轮的q列表
        word_list: 当前的word列表
        seed_list: 当前的seed列表
        o: 原始问题
        context: 运行上下文
        xiaohongshu_api: 建议词API
        xiaohongshu_search: 搜索API
        sug_threshold: suggestion的阈值

    Returns:
        (word_list_next, q_list_next, seed_list_next, search_list)
    """
    print(f"\n{'='*60}")
    print(f"第{round_num}轮")
    print(f"{'='*60}")

    round_data = {
        "round_num": round_num,
        "input_q_list": [{"text": q.text, "score": q.score_with_o} for q in q_list],
        "input_word_list_size": len(word_list),
        "input_seed_list_size": len(seed_list)
    }

    # 1. 请求sug：q_list -> 每个q请求sug接口 -> sug_list_list
    print(f"\n[步骤1] 为每个q请求建议词...")
    sug_list_list = []  # list of list
    for q in q_list:
        print(f"\n  处理q: {q.text}")
        suggestions = xiaohongshu_api.get_recommendations(keyword=q.text)

        q_sug_list = []
        if suggestions:
            print(f"    获取到 {len(suggestions)} 个建议词")
            for sug_text in suggestions:
                sug = Sug(
                    text=sug_text,
                    from_q=QFromQ(text=q.text, score_with_o=q.score_with_o)
                )
                q_sug_list.append(sug)
        else:
            print(f"    未获取到建议词")

        sug_list_list.append(q_sug_list)

    # 2. sug评估：sug_list_list -> 每个sug与o进评分（并发）
    print(f"\n[步骤2] 评估每个建议词与原始问题的相关度...")

    # 2.1 收集所有需要评估的sug，并记录它们所属的q
    all_sugs = []
    sug_to_q_map = {}  # 记录每个sug属于哪个q
    for i, q_sug_list in enumerate(sug_list_list):
        if q_sug_list:
            q_text = q_list[i].text
            for sug in q_sug_list:
                all_sugs.append(sug)
                sug_to_q_map[id(sug)] = q_text

    # 2.2 并发评估所有sug
    async def evaluate_sug(sug: Sug) -> Sug:
        sug.score_with_o, sug.reason = await evaluate_with_o(sug.text, o)
        return sug

    if all_sugs:
        eval_tasks = [evaluate_sug(sug) for sug in all_sugs]
        await asyncio.gather(*eval_tasks)

    # 2.3 打印结果并组织到sug_details
    sug_details = {}  # 保存每个Q对应的sug列表
    for i, q_sug_list in enumerate(sug_list_list):
        if q_sug_list:
            q_text = q_list[i].text
            print(f"\n  来自q '{q_text}' 的建议词:")
            sug_details[q_text] = []
            for sug in q_sug_list:
                print(f"    {sug.text}: {sug.score_with_o:.2f}")
                # 保存到sug_details
                sug_details[q_text].append({
                    "text": sug.text,
                    "score": sug.score_with_o,
                    "reason": sug.reason
                })

    # 3. search_list构建
    print(f"\n[步骤3] 构建search_list（阈值>{sug_threshold}）...")
    search_list = []
    high_score_sugs = [sug for sug in all_sugs if sug.score_with_o > sug_threshold]

    if high_score_sugs:
        print(f"  找到 {len(high_score_sugs)} 个高分建议词")

        # 并发搜索
        async def search_for_sug(sug: Sug) -> Search:
            print(f"    搜索: {sug.text}")
            try:
                search_result = xiaohongshu_search.search(keyword=sug.text)
                result_str = search_result.get("result", "{}")
                if isinstance(result_str, str):
                    result_data = json.loads(result_str)
                else:
                    result_data = result_str

                notes = result_data.get("data", {}).get("data", [])
                post_list = []
                for note in notes[:10]:  # 只取前10个
                    post = process_note_data(note)
                    post_list.append(post)

                print(f"      → 找到 {len(post_list)} 个帖子")

                return Search(
                    text=sug.text,
                    score_with_o=sug.score_with_o,
                    from_q=sug.from_q,
                    post_list=post_list
                )
            except Exception as e:
                print(f"      ✗ 搜索失败: {e}")
                return Search(
                    text=sug.text,
                    score_with_o=sug.score_with_o,
                    from_q=sug.from_q,
                    post_list=[]
                )

        search_tasks = [search_for_sug(sug) for sug in high_score_sugs]
        search_list = await asyncio.gather(*search_tasks)
    else:
        print(f"  没有高分建议词，search_list为空")

    # 4. 构建word_list_next: word_list -> word_list_next（先直接复制）
    print(f"\n[步骤4] 构建word_list_next（暂时直接复制）...")
    word_list_next = word_list.copy()

    # 5. 构建q_list_next
    print(f"\n[步骤5] 构建q_list_next...")
    q_list_next = []
    add_word_details = {}  # 保存每个seed对应的组合词列表

    # 5.1 对于seed_list中的每个seed，从word_list_next中选一个未加过的词
    print(f"\n  5.1 为每个seed加词...")
    for seed in seed_list:
        print(f"\n    处理seed: {seed.text}")

        # 简单过滤：找出不在seed.text中且未被添加过的词
        candidate_words = []
        for word in word_list_next:
            # 检查词是否已在seed中
            if word.text in seed.text:
                continue
            # 检查词是否已被添加过
            if word.text in seed.added_words:
                continue
            candidate_words.append(word)

        if not candidate_words:
            print(f"      没有可用的候选词")
            continue

        print(f"      候选词: {[w.text for w in candidate_words]}")

        # 使用Agent选择最合适的词
        selection_input = f"""
<原始问题>
{o}
</原始问题>

<当前Seed>
{seed.text}
</当前Seed>

<候选词列表>
{', '.join([w.text for w in candidate_words])}
</候选词列表>

请从候选词中选择一个最合适的词，与当前seed组合成新的query。
"""
        result = await Runner.run(word_selector, selection_input)
        selection: WordSelection = result.final_output

        # 验证选择的词是否在候选列表中
        if selection.selected_word not in [w.text for w in candidate_words]:
            print(f"      ✗ Agent选择的词 '{selection.selected_word}' 不在候选列表中，跳过")
            continue

        print(f"      ✓ 选择词: {selection.selected_word}")
        print(f"      ✓ 新query: {selection.combined_query}")
        print(f"      理由: {selection.reasoning}")

        # 评估新query
        new_q_score, new_q_reason = await evaluate_with_o(selection.combined_query, o)
        print(f"      新query评分: {new_q_score:.2f}")

        # 创建新的q
        new_q = Q(
            text=selection.combined_query,
            score_with_o=new_q_score,
            reason=new_q_reason,
            from_source="add"
        )
        q_list_next.append(new_q)

        # 更新seed的added_words
        seed.added_words.append(selection.selected_word)

        # 保存到add_word_details
        if seed.text not in add_word_details:
            add_word_details[seed.text] = []
        add_word_details[seed.text].append({
            "text": selection.combined_query,
            "score": new_q_score,
            "reason": new_q_reason,
            "selected_word": selection.selected_word
        })

    # 5.2 对于sug_list_list中，每个sug大于来自的query分数，加到q_list_next
    print(f"\n  5.2 将高分sug加入q_list_next...")
    for sug in all_sugs:
        if sug.from_q and sug.score_with_o > sug.from_q.score_with_o:
            new_q = Q(
                text=sug.text,
                score_with_o=sug.score_with_o,
                reason=sug.reason,
                from_source="sug"
            )
            q_list_next.append(new_q)
            print(f"    ✓ {sug.text} (分数: {sug.score_with_o:.2f} > {sug.from_q.score_with_o:.2f})")

    # 6. 更新seed_list
    print(f"\n[步骤6] 更新seed_list...")
    seed_list_next = seed_list.copy()  # 保留原有的seed

    # 对于sug_list_list中，每个sug分数大于来源query分数的，且没在seed_list中出现过的，加入
    existing_seed_texts = {seed.text for seed in seed_list_next}
    for sug in all_sugs:
        # 新逻辑：sug分数 > 对应query分数
        if sug.from_q and sug.score_with_o > sug.from_q.score_with_o and sug.text not in existing_seed_texts:
            new_seed = Seed(
                text=sug.text,
                added_words=[],
                from_type="sug",
                score_with_o=sug.score_with_o
            )
            seed_list_next.append(new_seed)
            existing_seed_texts.add(sug.text)
            print(f"  ✓ 新seed: {sug.text} (分数: {sug.score_with_o:.2f} > 来源query: {sug.from_q.score_with_o:.2f})")

    # 记录本轮数据
    round_data.update({
        "sug_count": len(all_sugs),
        "high_score_sug_count": len(high_score_sugs),
        "search_count": len(search_list),
        "total_posts": sum(len(s.post_list) for s in search_list),
        "q_list_next_size": len(q_list_next),
        "seed_list_next_size": len(seed_list_next),
        "word_list_next_size": len(word_list_next),
        "output_q_list": [{"text": q.text, "score": q.score_with_o, "reason": q.reason, "from": q.from_source} for q in q_list_next],
        "seed_list_next": [{"text": seed.text, "from": seed.from_type, "score": seed.score_with_o} for seed in seed_list_next],  # 下一轮种子列表
        "sug_details": sug_details,  # 每个Q对应的sug列表
        "add_word_details": add_word_details  # 每个seed对应的组合词列表
    })
    context.rounds.append(round_data)

    print(f"\n本轮总结:")
    print(f"  建议词数量: {len(all_sugs)}")
    print(f"  高分建议词: {len(high_score_sugs)}")
    print(f"  搜索数量: {len(search_list)}")
    print(f"  帖子总数: {sum(len(s.post_list) for s in search_list)}")
    print(f"  下轮q数量: {len(q_list_next)}")
    print(f"  seed数量: {len(seed_list_next)}")

    return word_list_next, q_list_next, seed_list_next, search_list


async def iterative_loop(
    context: RunContext,
    max_rounds: int = 2,
    sug_threshold: float = 0.7
):
    """主迭代循环"""

    print(f"\n{'='*60}")
    print(f"开始迭代循环")
    print(f"最大轮数: {max_rounds}")
    print(f"sug阈值: {sug_threshold}")
    print(f"{'='*60}")

    # 初始化
    seg_list, word_list, q_list, seed_list = await initialize(context.o, context)

    # API实例
    xiaohongshu_api = XiaohongshuSearchRecommendations()
    xiaohongshu_search = XiaohongshuSearch()

    # 保存初始化数据
    context.rounds.append({
        "round_num": 0,
        "type": "initialization",
        "seg_list": [{"text": s.text, "score": s.score_with_o, "reason": s.reason} for s in seg_list],
        "word_list_1": [{"text": w.text, "score": w.score_with_o} for w in word_list],
        "q_list_1": [{"text": q.text, "score": q.score_with_o, "reason": q.reason} for q in q_list],
        "seed_list": [{"text": s.text, "from_type": s.from_type, "score": s.score_with_o} for s in seed_list]
    })

    # 收集所有搜索结果
    all_search_list = []

    # 迭代
    round_num = 1
    while q_list and round_num <= max_rounds:
        word_list, q_list, seed_list, search_list = await run_round(
            round_num=round_num,
            q_list=q_list,
            word_list=word_list,
            seed_list=seed_list,
            o=context.o,
            context=context,
            xiaohongshu_api=xiaohongshu_api,
            xiaohongshu_search=xiaohongshu_search,
            sug_threshold=sug_threshold
        )

        all_search_list.extend(search_list)
        round_num += 1

    print(f"\n{'='*60}")
    print(f"迭代完成")
    print(f"  总轮数: {round_num - 1}")
    print(f"  总搜索次数: {len(all_search_list)}")
    print(f"  总帖子数: {sum(len(s.post_list) for s in all_search_list)}")
    print(f"{'='*60}")

    return all_search_list


# ============================================================================
# 主函数
# ============================================================================

async def main(input_dir: str, max_rounds: int = 2, sug_threshold: float = 0.7, visualize: bool = False):
    """主函数"""
    current_time, log_url = set_trace()

    # 读取输入
    input_context_file = os.path.join(input_dir, 'context.md')
    input_q_file = os.path.join(input_dir, 'q.md')

    c = read_file_as_string(input_context_file)  # 原始需求
    o = read_file_as_string(input_q_file)  # 原始问题

    # 版本信息
    version = os.path.basename(__file__)
    version_name = os.path.splitext(version)[0]

    # 日志目录
    log_dir = os.path.join(input_dir, "output", version_name, current_time)

    # 创建运行上下文
    run_context = RunContext(
        version=version,
        input_files={
            "input_dir": input_dir,
            "context_file": input_context_file,
            "q_file": input_q_file,
        },
        c=c,
        o=o,
        log_dir=log_dir,
        log_url=log_url,
    )

    # 执行迭代
    all_search_list = await iterative_loop(
        run_context,
        max_rounds=max_rounds,
        sug_threshold=sug_threshold
    )

    # 格式化输出
    output = f"原始需求：{run_context.c}\n"
    output += f"原始问题：{run_context.o}\n"
    output += f"总搜索次数：{len(all_search_list)}\n"
    output += f"总帖子数：{sum(len(s.post_list) for s in all_search_list)}\n"
    output += "\n" + "="*60 + "\n"

    if all_search_list:
        output += "【搜索结果】\n\n"
        for idx, search in enumerate(all_search_list, 1):
            output += f"{idx}. 搜索词: {search.text} (分数: {search.score_with_o:.2f})\n"
            output += f"   帖子数: {len(search.post_list)}\n"
            if search.post_list:
                for post_idx, post in enumerate(search.post_list[:3], 1):  # 只显示前3个
                    output += f"   {post_idx}) {post.title}\n"
                    output += f"      URL: {post.note_url}\n"
            output += "\n"
    else:
        output += "未找到搜索结果\n"

    run_context.final_output = output

    print(f"\n{'='*60}")
    print("最终结果")
    print(f"{'='*60}")
    print(output)

    # 保存日志
    os.makedirs(run_context.log_dir, exist_ok=True)

    context_file_path = os.path.join(run_context.log_dir, "run_context.json")
    context_dict = run_context.model_dump()
    with open(context_file_path, "w", encoding="utf-8") as f:
        json.dump(context_dict, f, ensure_ascii=False, indent=2)
    print(f"\nRunContext saved to: {context_file_path}")

    # 保存详细的搜索结果
    search_results_path = os.path.join(run_context.log_dir, "search_results.json")
    search_results_data = [s.model_dump() for s in all_search_list]
    with open(search_results_path, "w", encoding="utf-8") as f:
        json.dump(search_results_data, f, ensure_ascii=False, indent=2)
    print(f"Search results saved to: {search_results_path}")

    # 可视化
    if visualize:
        import subprocess
        output_html = os.path.join(run_context.log_dir, "visualization.html")
        print(f"\n🎨 生成可视化HTML...")

        # 获取绝对路径
        abs_context_file = os.path.abspath(context_file_path)
        abs_output_html = os.path.abspath(output_html)

        # 运行可视化脚本
        result = subprocess.run([
            "node",
            "visualization/sug_v6_1_2_8/index.js",
            abs_context_file,
            abs_output_html
        ])

        if result.returncode == 0:
            print(f"✅ 可视化已生成: {output_html}")
        else:
            print(f"❌ 可视化生成失败")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="搜索query优化工具 - v6.1.2.8 轮次迭代版")
    parser.add_argument(
        "--input-dir",
        type=str,
        default="input/旅游-逸趣玩旅行/如何获取能体现川西秋季特色的高质量风光摄影素材？",
        help="输入目录路径，默认: input/旅游-逸趣玩旅行/如何获取能体现川西秋季特色的高质量风光摄影素材？"
    )
    parser.add_argument(
        "--max-rounds",
        type=int,
        default=4,
        help="最大轮数，默认: 2"
    )
    parser.add_argument(
        "--sug-threshold",
        type=float,
        default=0.7,
        help="suggestion阈值，默认: 0.7"
    )
    parser.add_argument(
        "--visualize",
        action="store_true",
        default=True,
        help="运行完成后自动生成可视化HTML"
    )
    args = parser.parse_args()

    asyncio.run(main(args.input_dir, max_rounds=args.max_rounds, sug_threshold=args.sug_threshold, visualize=args.visualize))