|
@@ -29,21 +29,26 @@ class QueryState(BaseModel):
|
|
|
relevance_score: float = 0.0 # 与原始需求的相关度
|
|
relevance_score: float = 0.0 # 与原始需求的相关度
|
|
|
parent_query: str | None = None # 父query
|
|
parent_query: str | None = None # 父query
|
|
|
strategy: str | None = None # 生成策略:direct_sug, rewrite, add_word
|
|
strategy: str | None = None # 生成策略:direct_sug, rewrite, add_word
|
|
|
|
|
+ is_terminated: bool = False # 是否已终止(不再处理)
|
|
|
|
|
|
|
|
|
|
|
|
|
class WordLibrary(BaseModel):
|
|
class WordLibrary(BaseModel):
|
|
|
"""动态分词库"""
|
|
"""动态分词库"""
|
|
|
words: set[str] = Field(default_factory=set)
|
|
words: set[str] = Field(default_factory=set)
|
|
|
|
|
+ word_sources: dict[str, str] = Field(default_factory=dict) # 记录词的来源:word -> source(note_id或"initial")
|
|
|
|
|
|
|
|
- def add_word(self, word: str):
|
|
|
|
|
|
|
+ def add_word(self, word: str, source: str = "unknown"):
|
|
|
"""添加单词到分词库"""
|
|
"""添加单词到分词库"""
|
|
|
if word and word.strip():
|
|
if word and word.strip():
|
|
|
- self.words.add(word.strip())
|
|
|
|
|
|
|
+ word = word.strip()
|
|
|
|
|
+ self.words.add(word)
|
|
|
|
|
+ if word not in self.word_sources:
|
|
|
|
|
+ self.word_sources[word] = source
|
|
|
|
|
|
|
|
- def add_words(self, words: list[str]):
|
|
|
|
|
|
|
+ def add_words(self, words: list[str], source: str = "unknown"):
|
|
|
"""批量添加单词"""
|
|
"""批量添加单词"""
|
|
|
for word in words:
|
|
for word in words:
|
|
|
- self.add_word(word)
|
|
|
|
|
|
|
+ self.add_word(word, source)
|
|
|
|
|
|
|
|
def get_unused_word(self, current_query: str) -> str | None:
|
|
def get_unused_word(self, current_query: str) -> str | None:
|
|
|
"""获取一个当前query中没有的词"""
|
|
"""获取一个当前query中没有的词"""
|
|
@@ -54,7 +59,10 @@ class WordLibrary(BaseModel):
|
|
|
|
|
|
|
|
def model_dump(self):
|
|
def model_dump(self):
|
|
|
"""序列化为dict"""
|
|
"""序列化为dict"""
|
|
|
- return {"words": list(self.words)}
|
|
|
|
|
|
|
+ return {
|
|
|
|
|
+ "words": list(self.words),
|
|
|
|
|
+ "word_sources": self.word_sources
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
|
|
|
|
|
class RunContext(BaseModel):
|
|
class RunContext(BaseModel):
|
|
@@ -72,6 +80,9 @@ class RunContext(BaseModel):
|
|
|
query_states: list[dict] = Field(default_factory=list)
|
|
query_states: list[dict] = Field(default_factory=list)
|
|
|
steps: list[dict] = Field(default_factory=list)
|
|
steps: list[dict] = Field(default_factory=list)
|
|
|
|
|
|
|
|
|
|
+ # Query演化图
|
|
|
|
|
+ query_graph: dict = Field(default_factory=dict) # 记录Query的演化路径和关系
|
|
|
|
|
+
|
|
|
# 最终结果
|
|
# 最终结果
|
|
|
satisfied_notes: list[dict] = Field(default_factory=list)
|
|
satisfied_notes: list[dict] = Field(default_factory=list)
|
|
|
final_output: str | None = None
|
|
final_output: str | None = None
|
|
@@ -309,6 +320,93 @@ def add_step(context: RunContext, step_name: str, step_type: str, data: dict):
|
|
|
return step
|
|
return step
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+def add_query_to_graph(context: RunContext, query_state: QueryState, iteration: int, evaluation_reason: str = "", is_selected: bool = True):
|
|
|
|
|
+ """添加Query节点到演化图
|
|
|
|
|
+
|
|
|
|
|
+ Args:
|
|
|
|
|
+ context: 运行上下文
|
|
|
|
|
+ query_state: Query状态
|
|
|
|
|
+ iteration: 迭代次数
|
|
|
|
|
+ evaluation_reason: 评估原因(可选)
|
|
|
|
|
+ is_selected: 是否被选中进入处理队列(默认True)
|
|
|
|
|
+ """
|
|
|
|
|
+ query_id = query_state.query # 直接使用query作为ID
|
|
|
|
|
+
|
|
|
|
|
+ # 初始化图结构
|
|
|
|
|
+ if "nodes" not in context.query_graph:
|
|
|
|
|
+ context.query_graph["nodes"] = {}
|
|
|
|
|
+ context.query_graph["edges"] = []
|
|
|
|
|
+ context.query_graph["iterations"] = {}
|
|
|
|
|
+
|
|
|
|
|
+ # 添加Query节点(type: query)
|
|
|
|
|
+ context.query_graph["nodes"][query_id] = {
|
|
|
|
|
+ "type": "query",
|
|
|
|
|
+ "query": query_state.query,
|
|
|
|
|
+ "level": query_state.level,
|
|
|
|
|
+ "relevance_score": query_state.relevance_score,
|
|
|
|
|
+ "strategy": query_state.strategy,
|
|
|
|
|
+ "parent_query": query_state.parent_query,
|
|
|
|
|
+ "iteration": iteration,
|
|
|
|
|
+ "is_terminated": query_state.is_terminated,
|
|
|
|
|
+ "no_suggestion_rounds": query_state.no_suggestion_rounds,
|
|
|
|
|
+ "evaluation_reason": evaluation_reason, # 评估原因
|
|
|
|
|
+ "is_selected": is_selected # 是否被选中
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ # 添加边(父子关系)
|
|
|
|
|
+ if query_state.parent_query:
|
|
|
|
|
+ parent_id = query_state.parent_query
|
|
|
|
|
+ if parent_id in context.query_graph["nodes"]:
|
|
|
|
|
+ context.query_graph["edges"].append({
|
|
|
|
|
+ "from": parent_id,
|
|
|
|
|
+ "to": query_id,
|
|
|
|
|
+ "edge_type": "query_to_query",
|
|
|
|
|
+ "strategy": query_state.strategy,
|
|
|
|
|
+ "score_improvement": query_state.relevance_score - context.query_graph["nodes"][parent_id]["relevance_score"]
|
|
|
|
|
+ })
|
|
|
|
|
+
|
|
|
|
|
+ # 按迭代分组
|
|
|
|
|
+ if iteration not in context.query_graph["iterations"]:
|
|
|
|
|
+ context.query_graph["iterations"][iteration] = []
|
|
|
|
|
+ context.query_graph["iterations"][iteration].append(query_id)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def add_note_to_graph(context: RunContext, query: str, note: dict):
|
|
|
|
|
+ """添加Note节点到演化图,并连接到对应的Query"""
|
|
|
|
|
+ note_id = note["note_id"]
|
|
|
|
|
+
|
|
|
|
|
+ # 初始化图结构
|
|
|
|
|
+ if "nodes" not in context.query_graph:
|
|
|
|
|
+ context.query_graph["nodes"] = {}
|
|
|
|
|
+ context.query_graph["edges"] = []
|
|
|
|
|
+ context.query_graph["iterations"] = {}
|
|
|
|
|
+
|
|
|
|
|
+ # 添加Note节点(type: note),包含完整的元信息
|
|
|
|
|
+ context.query_graph["nodes"][note_id] = {
|
|
|
|
|
+ "type": "note",
|
|
|
|
|
+ "note_id": note_id,
|
|
|
|
|
+ "title": note["title"],
|
|
|
|
|
+ "desc": note.get("desc", ""), # 完整描述,不截断
|
|
|
|
|
+ "note_url": note.get("note_url", ""),
|
|
|
|
|
+ "image_list": note.get("image_list", []), # 图片列表
|
|
|
|
|
+ "interact_info": note.get("interact_info", {}), # 互动信息(点赞、收藏、评论、分享)
|
|
|
|
|
+ "match_level": note["evaluation"]["match_level"],
|
|
|
|
|
+ "relevance_score": note["evaluation"]["relevance_score"],
|
|
|
|
|
+ "evaluation_reason": note["evaluation"].get("reason", ""), # 评估原因
|
|
|
|
|
+ "found_by_query": query
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ # 添加边:Query → Note
|
|
|
|
|
+ if query in context.query_graph["nodes"]:
|
|
|
|
|
+ context.query_graph["edges"].append({
|
|
|
|
|
+ "from": query,
|
|
|
|
|
+ "to": note_id,
|
|
|
|
|
+ "edge_type": "query_to_note",
|
|
|
|
|
+ "match_level": note["evaluation"]["match_level"],
|
|
|
|
|
+ "relevance_score": note["evaluation"]["relevance_score"]
|
|
|
|
|
+ })
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
def process_note_data(note: dict) -> dict:
|
|
def process_note_data(note: dict) -> dict:
|
|
|
"""处理搜索接口返回的帖子数据"""
|
|
"""处理搜索接口返回的帖子数据"""
|
|
|
note_card = note.get("note_card", {})
|
|
note_card = note.get("note_card", {})
|
|
@@ -349,7 +447,7 @@ async def initialize_word_library(original_query: str, context: RunContext) -> W
|
|
|
segmentation: WordSegmentation = result.final_output
|
|
segmentation: WordSegmentation = result.final_output
|
|
|
|
|
|
|
|
word_lib = WordLibrary()
|
|
word_lib = WordLibrary()
|
|
|
- word_lib.add_words(segmentation.words)
|
|
|
|
|
|
|
+ word_lib.add_words(segmentation.words, source="initial")
|
|
|
|
|
|
|
|
print(f"初始分词库: {list(word_lib.words)}")
|
|
print(f"初始分词库: {list(word_lib.words)}")
|
|
|
print(f"分词理由: {segmentation.reasoning}")
|
|
print(f"分词理由: {segmentation.reasoning}")
|
|
@@ -358,9 +456,15 @@ async def initialize_word_library(original_query: str, context: RunContext) -> W
|
|
|
context.word_library = word_lib.model_dump()
|
|
context.word_library = word_lib.model_dump()
|
|
|
|
|
|
|
|
add_step(context, "初始化分词库", "word_library_init", {
|
|
add_step(context, "初始化分词库", "word_library_init", {
|
|
|
- "original_query": original_query,
|
|
|
|
|
- "words": list(word_lib.words),
|
|
|
|
|
- "reasoning": segmentation.reasoning
|
|
|
|
|
|
|
+ "agent": "分词专家",
|
|
|
|
|
+ "input": original_query,
|
|
|
|
|
+ "output": {
|
|
|
|
|
+ "words": segmentation.words,
|
|
|
|
|
+ "reasoning": segmentation.reasoning
|
|
|
|
|
+ },
|
|
|
|
|
+ "result": {
|
|
|
|
|
+ "word_library": list(word_lib.words)
|
|
|
|
|
+ }
|
|
|
})
|
|
})
|
|
|
|
|
|
|
|
return word_lib
|
|
return word_lib
|
|
@@ -400,12 +504,16 @@ async def process_suggestions(
|
|
|
original_need: str,
|
|
original_need: str,
|
|
|
word_lib: WordLibrary,
|
|
word_lib: WordLibrary,
|
|
|
context: RunContext,
|
|
context: RunContext,
|
|
|
- xiaohongshu_api: XiaohongshuSearchRecommendations
|
|
|
|
|
|
|
+ xiaohongshu_api: XiaohongshuSearchRecommendations,
|
|
|
|
|
+ iteration: int
|
|
|
) -> list[QueryState]:
|
|
) -> list[QueryState]:
|
|
|
"""处理suggestion分支,返回新的query states"""
|
|
"""处理suggestion分支,返回新的query states"""
|
|
|
|
|
|
|
|
print(f"\n [Suggestion分支] 处理query: {query}")
|
|
print(f"\n [Suggestion分支] 处理query: {query}")
|
|
|
|
|
|
|
|
|
|
+ # 收集本次分支处理中的所有Agent调用
|
|
|
|
|
+ agent_calls = []
|
|
|
|
|
+
|
|
|
# 1. 获取suggestions
|
|
# 1. 获取suggestions
|
|
|
suggestions = xiaohongshu_api.get_recommendations(keyword=query)
|
|
suggestions = xiaohongshu_api.get_recommendations(keyword=query)
|
|
|
|
|
|
|
@@ -432,7 +540,8 @@ async def process_suggestions(
|
|
|
suggestion_evaluations = []
|
|
suggestion_evaluations = []
|
|
|
|
|
|
|
|
for sug in suggestions[:5]: # 限制处理数量
|
|
for sug in suggestions[:5]: # 限制处理数量
|
|
|
- # 评估sug的相关度
|
|
|
|
|
|
|
+ # 评估sug与原始需求的相关度(注意:这里是与原始需求original_need对比,而非当前query)
|
|
|
|
|
+ # 这样可以确保生成的suggestion始终围绕用户的核心需求
|
|
|
sug_eval = await evaluate_query_relevance(sug, original_need, query_state.relevance_score, context)
|
|
sug_eval = await evaluate_query_relevance(sug, original_need, query_state.relevance_score, context)
|
|
|
|
|
|
|
|
sug_eval_record = {
|
|
sug_eval_record = {
|
|
@@ -443,25 +552,37 @@ async def process_suggestions(
|
|
|
}
|
|
}
|
|
|
suggestion_evaluations.append(sug_eval_record)
|
|
suggestion_evaluations.append(sug_eval_record)
|
|
|
|
|
|
|
|
- # 判断是否比当前query更好
|
|
|
|
|
- if sug_eval.is_improved and sug_eval.relevance_score > query_state.relevance_score:
|
|
|
|
|
|
|
+ # 创建query state(所有suggestion都作为query节点)
|
|
|
|
|
+ sug_state = QueryState(
|
|
|
|
|
+ query=sug,
|
|
|
|
|
+ level=query_state.level + 1,
|
|
|
|
|
+ relevance_score=sug_eval.relevance_score,
|
|
|
|
|
+ parent_query=query,
|
|
|
|
|
+ strategy="direct_sug"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 判断是否比当前query更好(只有提升的才加入待处理队列)
|
|
|
|
|
+ is_selected = sug_eval.is_improved and sug_eval.relevance_score > query_state.relevance_score
|
|
|
|
|
+
|
|
|
|
|
+ # 将所有suggestion添加到演化图(包括未提升的)
|
|
|
|
|
+ add_query_to_graph(
|
|
|
|
|
+ context,
|
|
|
|
|
+ sug_state,
|
|
|
|
|
+ iteration,
|
|
|
|
|
+ evaluation_reason=sug_eval.reason,
|
|
|
|
|
+ is_selected=is_selected
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ if is_selected:
|
|
|
print(f" ✓ {sug} (分数: {sug_eval.relevance_score:.2f}, 提升: {sug_eval.is_improved})")
|
|
print(f" ✓ {sug} (分数: {sug_eval.relevance_score:.2f}, 提升: {sug_eval.is_improved})")
|
|
|
-
|
|
|
|
|
- # 创建新的query state(直接使用suggestion)
|
|
|
|
|
- new_state = QueryState(
|
|
|
|
|
- query=sug,
|
|
|
|
|
- level=query_state.level + 1,
|
|
|
|
|
- relevance_score=sug_eval.relevance_score,
|
|
|
|
|
- parent_query=query,
|
|
|
|
|
- strategy="direct_sug"
|
|
|
|
|
- )
|
|
|
|
|
- new_queries.append(new_state)
|
|
|
|
|
|
|
+ new_queries.append(sug_state)
|
|
|
else:
|
|
else:
|
|
|
print(f" ✗ {sug} (分数: {sug_eval.relevance_score:.2f}, 未提升)")
|
|
print(f" ✗ {sug} (分数: {sug_eval.relevance_score:.2f}, 未提升)")
|
|
|
|
|
|
|
|
# 3. 改写策略(向上抽象或同义改写)
|
|
# 3. 改写策略(向上抽象或同义改写)
|
|
|
if len(new_queries) < 3: # 如果直接使用sug的数量不够,尝试改写
|
|
if len(new_queries) < 3: # 如果直接使用sug的数量不够,尝试改写
|
|
|
- rewrite_input = f"""
|
|
|
|
|
|
|
+ # 尝试向上抽象
|
|
|
|
|
+ rewrite_input_abstract = f"""
|
|
|
<当前Query>
|
|
<当前Query>
|
|
|
{query}
|
|
{query}
|
|
|
</当前Query>
|
|
</当前Query>
|
|
@@ -472,22 +593,111 @@ async def process_suggestions(
|
|
|
|
|
|
|
|
请改写这个query。
|
|
请改写这个query。
|
|
|
"""
|
|
"""
|
|
|
- result = await Runner.run(query_rewriter, rewrite_input)
|
|
|
|
|
|
|
+ result = await Runner.run(query_rewriter, rewrite_input_abstract)
|
|
|
rewrite: QueryRewrite = result.final_output
|
|
rewrite: QueryRewrite = result.final_output
|
|
|
|
|
|
|
|
|
|
+ # 收集改写Agent的输入输出
|
|
|
|
|
+ rewrite_agent_call = {
|
|
|
|
|
+ "agent": "Query改写专家",
|
|
|
|
|
+ "action": "向上抽象改写",
|
|
|
|
|
+ "input": {
|
|
|
|
|
+ "query": query,
|
|
|
|
|
+ "rewrite_type": "abstract"
|
|
|
|
|
+ },
|
|
|
|
|
+ "output": {
|
|
|
|
|
+ "rewritten_query": rewrite.rewritten_query,
|
|
|
|
|
+ "rewrite_type": rewrite.rewrite_type,
|
|
|
|
|
+ "reasoning": rewrite.reasoning
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ agent_calls.append(rewrite_agent_call)
|
|
|
|
|
+
|
|
|
# 评估改写后的query
|
|
# 评估改写后的query
|
|
|
rewrite_eval = await evaluate_query_relevance(rewrite.rewritten_query, original_need, query_state.relevance_score, context)
|
|
rewrite_eval = await evaluate_query_relevance(rewrite.rewritten_query, original_need, query_state.relevance_score, context)
|
|
|
|
|
|
|
|
|
|
+ # 创建改写后的query state
|
|
|
|
|
+ new_state = QueryState(
|
|
|
|
|
+ query=rewrite.rewritten_query,
|
|
|
|
|
+ level=query_state.level + 1,
|
|
|
|
|
+ relevance_score=rewrite_eval.relevance_score,
|
|
|
|
|
+ parent_query=query,
|
|
|
|
|
+ strategy="rewrite_abstract"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 添加到演化图(无论是否提升)
|
|
|
|
|
+ add_query_to_graph(
|
|
|
|
|
+ context,
|
|
|
|
|
+ new_state,
|
|
|
|
|
+ iteration,
|
|
|
|
|
+ evaluation_reason=rewrite_eval.reason,
|
|
|
|
|
+ is_selected=rewrite_eval.is_improved
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
if rewrite_eval.is_improved:
|
|
if rewrite_eval.is_improved:
|
|
|
- print(f" ✓ 改写: {rewrite.rewritten_query} (分数: {rewrite_eval.relevance_score:.2f})")
|
|
|
|
|
- new_state = QueryState(
|
|
|
|
|
- query=rewrite.rewritten_query,
|
|
|
|
|
- level=query_state.level + 1,
|
|
|
|
|
- relevance_score=rewrite_eval.relevance_score,
|
|
|
|
|
- parent_query=query,
|
|
|
|
|
- strategy="rewrite"
|
|
|
|
|
- )
|
|
|
|
|
|
|
+ print(f" ✓ 改写(抽象): {rewrite.rewritten_query} (分数: {rewrite_eval.relevance_score:.2f})")
|
|
|
new_queries.append(new_state)
|
|
new_queries.append(new_state)
|
|
|
|
|
+ else:
|
|
|
|
|
+ print(f" ✗ 改写(抽象): {rewrite.rewritten_query} (分数: {rewrite_eval.relevance_score:.2f}, 未提升)")
|
|
|
|
|
+
|
|
|
|
|
+ # 3.2. 同义改写策略
|
|
|
|
|
+ if len(new_queries) < 4: # 如果还不够,尝试同义改写
|
|
|
|
|
+ rewrite_input_synonym = f"""
|
|
|
|
|
+<当前Query>
|
|
|
|
|
+{query}
|
|
|
|
|
+</当前Query>
|
|
|
|
|
+
|
|
|
|
|
+<改写要求>
|
|
|
|
|
+类型: synonym (同义改写)
|
|
|
|
|
+使用同义词或相关表达来改写query,保持语义相同但表达方式不同。
|
|
|
|
|
+</改写要求>
|
|
|
|
|
+
|
|
|
|
|
+请改写这个query。
|
|
|
|
|
+"""
|
|
|
|
|
+ result = await Runner.run(query_rewriter, rewrite_input_synonym)
|
|
|
|
|
+ rewrite_syn: QueryRewrite = result.final_output
|
|
|
|
|
+
|
|
|
|
|
+ # 收集同义改写Agent的输入输出
|
|
|
|
|
+ rewrite_syn_agent_call = {
|
|
|
|
|
+ "agent": "Query改写专家",
|
|
|
|
|
+ "action": "同义改写",
|
|
|
|
|
+ "input": {
|
|
|
|
|
+ "query": query,
|
|
|
|
|
+ "rewrite_type": "synonym"
|
|
|
|
|
+ },
|
|
|
|
|
+ "output": {
|
|
|
|
|
+ "rewritten_query": rewrite_syn.rewritten_query,
|
|
|
|
|
+ "rewrite_type": rewrite_syn.rewrite_type,
|
|
|
|
|
+ "reasoning": rewrite_syn.reasoning
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ agent_calls.append(rewrite_syn_agent_call)
|
|
|
|
|
+
|
|
|
|
|
+ # 评估改写后的query
|
|
|
|
|
+ rewrite_syn_eval = await evaluate_query_relevance(rewrite_syn.rewritten_query, original_need, query_state.relevance_score, context)
|
|
|
|
|
+
|
|
|
|
|
+ # 创建改写后的query state
|
|
|
|
|
+ new_state = QueryState(
|
|
|
|
|
+ query=rewrite_syn.rewritten_query,
|
|
|
|
|
+ level=query_state.level + 1,
|
|
|
|
|
+ relevance_score=rewrite_syn_eval.relevance_score,
|
|
|
|
|
+ parent_query=query,
|
|
|
|
|
+ strategy="rewrite_synonym"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 添加到演化图(无论是否提升)
|
|
|
|
|
+ add_query_to_graph(
|
|
|
|
|
+ context,
|
|
|
|
|
+ new_state,
|
|
|
|
|
+ iteration,
|
|
|
|
|
+ evaluation_reason=rewrite_syn_eval.reason,
|
|
|
|
|
+ is_selected=rewrite_syn_eval.is_improved
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ if rewrite_syn_eval.is_improved:
|
|
|
|
|
+ print(f" ✓ 改写(同义): {rewrite_syn.rewritten_query} (分数: {rewrite_syn_eval.relevance_score:.2f})")
|
|
|
|
|
+ new_queries.append(new_state)
|
|
|
|
|
+ else:
|
|
|
|
|
+ print(f" ✗ 改写(同义): {rewrite_syn.rewritten_query} (分数: {rewrite_syn_eval.relevance_score:.2f}, 未提升)")
|
|
|
|
|
|
|
|
# 4. 加词策略
|
|
# 4. 加词策略
|
|
|
unused_word = word_lib.get_unused_word(query)
|
|
unused_word = word_lib.get_unused_word(query)
|
|
@@ -506,21 +716,50 @@ async def process_suggestions(
|
|
|
result = await Runner.run(word_inserter, insertion_input)
|
|
result = await Runner.run(word_inserter, insertion_input)
|
|
|
insertion: WordInsertion = result.final_output
|
|
insertion: WordInsertion = result.final_output
|
|
|
|
|
|
|
|
|
|
+ # 收集加词Agent的输入输出
|
|
|
|
|
+ insertion_agent_call = {
|
|
|
|
|
+ "agent": "加词位置评估专家",
|
|
|
|
|
+ "action": "加词",
|
|
|
|
|
+ "input": {
|
|
|
|
|
+ "query": query,
|
|
|
|
|
+ "word_to_add": unused_word
|
|
|
|
|
+ },
|
|
|
|
|
+ "output": {
|
|
|
|
|
+ "new_query": insertion.new_query,
|
|
|
|
|
+ "insertion_position": insertion.insertion_position,
|
|
|
|
|
+ "reasoning": insertion.reasoning
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ agent_calls.append(insertion_agent_call)
|
|
|
|
|
+
|
|
|
# 评估加词后的query
|
|
# 评估加词后的query
|
|
|
insertion_eval = await evaluate_query_relevance(insertion.new_query, original_need, query_state.relevance_score, context)
|
|
insertion_eval = await evaluate_query_relevance(insertion.new_query, original_need, query_state.relevance_score, context)
|
|
|
|
|
|
|
|
|
|
+ # 创建加词后的query state
|
|
|
|
|
+ new_state = QueryState(
|
|
|
|
|
+ query=insertion.new_query,
|
|
|
|
|
+ level=query_state.level + 1,
|
|
|
|
|
+ relevance_score=insertion_eval.relevance_score,
|
|
|
|
|
+ parent_query=query,
|
|
|
|
|
+ strategy="add_word"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 添加到演化图(无论是否提升)
|
|
|
|
|
+ add_query_to_graph(
|
|
|
|
|
+ context,
|
|
|
|
|
+ new_state,
|
|
|
|
|
+ iteration,
|
|
|
|
|
+ evaluation_reason=insertion_eval.reason,
|
|
|
|
|
+ is_selected=insertion_eval.is_improved
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
if insertion_eval.is_improved:
|
|
if insertion_eval.is_improved:
|
|
|
print(f" ✓ 加词: {insertion.new_query} (分数: {insertion_eval.relevance_score:.2f})")
|
|
print(f" ✓ 加词: {insertion.new_query} (分数: {insertion_eval.relevance_score:.2f})")
|
|
|
- new_state = QueryState(
|
|
|
|
|
- query=insertion.new_query,
|
|
|
|
|
- level=query_state.level + 1,
|
|
|
|
|
- relevance_score=insertion_eval.relevance_score,
|
|
|
|
|
- parent_query=query,
|
|
|
|
|
- strategy="add_word"
|
|
|
|
|
- )
|
|
|
|
|
new_queries.append(new_state)
|
|
new_queries.append(new_state)
|
|
|
|
|
+ else:
|
|
|
|
|
+ print(f" ✗ 加词: {insertion.new_query} (分数: {insertion_eval.relevance_score:.2f}, 未提升)")
|
|
|
|
|
|
|
|
- # 记录完整的suggestion分支处理结果
|
|
|
|
|
|
|
+ # 记录完整的suggestion分支处理结果(层级化)
|
|
|
add_step(context, f"Suggestion分支 - {query}", "suggestion_branch", {
|
|
add_step(context, f"Suggestion分支 - {query}", "suggestion_branch", {
|
|
|
"query": query,
|
|
"query": query,
|
|
|
"query_level": query_state.level,
|
|
"query_level": query_state.level,
|
|
@@ -528,6 +767,7 @@ async def process_suggestions(
|
|
|
"suggestions_count": len(suggestions),
|
|
"suggestions_count": len(suggestions),
|
|
|
"suggestions_evaluated": len(suggestion_evaluations),
|
|
"suggestions_evaluated": len(suggestion_evaluations),
|
|
|
"suggestion_evaluations": suggestion_evaluations[:10], # 只保存前10个
|
|
"suggestion_evaluations": suggestion_evaluations[:10], # 只保存前10个
|
|
|
|
|
+ "agent_calls": agent_calls, # 所有Agent调用的详细记录
|
|
|
"new_queries_generated": len(new_queries),
|
|
"new_queries_generated": len(new_queries),
|
|
|
"new_queries": [{"query": nq.query, "score": nq.relevance_score, "strategy": nq.strategy} for nq in new_queries],
|
|
"new_queries": [{"query": nq.query, "score": nq.relevance_score, "strategy": nq.strategy} for nq in new_queries],
|
|
|
"no_suggestion_rounds": query_state.no_suggestion_rounds
|
|
"no_suggestion_rounds": query_state.no_suggestion_rounds
|
|
@@ -543,7 +783,8 @@ async def process_search_results(
|
|
|
word_lib: WordLibrary,
|
|
word_lib: WordLibrary,
|
|
|
context: RunContext,
|
|
context: RunContext,
|
|
|
xiaohongshu_search: XiaohongshuSearch,
|
|
xiaohongshu_search: XiaohongshuSearch,
|
|
|
- relevance_threshold: float = 0.6
|
|
|
|
|
|
|
+ relevance_threshold: float,
|
|
|
|
|
+ iteration: int
|
|
|
) -> tuple[list[dict], list[QueryState]]:
|
|
) -> tuple[list[dict], list[QueryState]]:
|
|
|
"""
|
|
"""
|
|
|
处理搜索结果分支
|
|
处理搜索结果分支
|
|
@@ -552,6 +793,9 @@ async def process_search_results(
|
|
|
|
|
|
|
|
print(f"\n [Result分支] 搜索query: {query}")
|
|
print(f"\n [Result分支] 搜索query: {query}")
|
|
|
|
|
|
|
|
|
|
+ # 收集本次分支处理中的所有Agent调用
|
|
|
|
|
+ agent_calls = []
|
|
|
|
|
+
|
|
|
# 1. 判断query相关度是否达到门槛
|
|
# 1. 判断query相关度是否达到门槛
|
|
|
if query_state.relevance_score < relevance_threshold:
|
|
if query_state.relevance_score < relevance_threshold:
|
|
|
print(f" ✗ 相关度 {query_state.relevance_score:.2f} 低于门槛 {relevance_threshold},跳过搜索")
|
|
print(f" ✗ 相关度 {query_state.relevance_score:.2f} 低于门槛 {relevance_threshold},跳过搜索")
|
|
@@ -607,6 +851,24 @@ async def process_search_results(
|
|
|
result = await Runner.run(result_evaluator, eval_input)
|
|
result = await Runner.run(result_evaluator, eval_input)
|
|
|
evaluation: ResultEvaluation = result.final_output
|
|
evaluation: ResultEvaluation = result.final_output
|
|
|
|
|
|
|
|
|
|
+ # 收集Result评估Agent的输入输出
|
|
|
|
|
+ result_eval_agent_call = {
|
|
|
|
|
+ "agent": "Result匹配度评估专家",
|
|
|
|
|
+ "action": "评估帖子匹配度",
|
|
|
|
|
+ "input": {
|
|
|
|
|
+ "note_id": note_data.get("note_id"),
|
|
|
|
|
+ "title": title,
|
|
|
|
|
+ "desc": desc[:200] if len(desc) > 200 else desc # 限制长度
|
|
|
|
|
+ },
|
|
|
|
|
+ "output": {
|
|
|
|
|
+ "match_level": evaluation.match_level,
|
|
|
|
|
+ "relevance_score": evaluation.relevance_score,
|
|
|
|
|
+ "missing_aspects": evaluation.missing_aspects,
|
|
|
|
|
+ "reason": evaluation.reason
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ agent_calls.append(result_eval_agent_call)
|
|
|
|
|
+
|
|
|
note_data["evaluation"] = {
|
|
note_data["evaluation"] = {
|
|
|
"match_level": evaluation.match_level,
|
|
"match_level": evaluation.match_level,
|
|
|
"relevance_score": evaluation.relevance_score,
|
|
"relevance_score": evaluation.relevance_score,
|
|
@@ -614,35 +876,42 @@ async def process_search_results(
|
|
|
"reason": evaluation.reason
|
|
"reason": evaluation.reason
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ # 将所有评估过的帖子添加到演化图(包括satisfied、partial、unsatisfied)
|
|
|
|
|
+ add_note_to_graph(context, query, note_data)
|
|
|
|
|
+
|
|
|
if evaluation.match_level == "satisfied":
|
|
if evaluation.match_level == "satisfied":
|
|
|
satisfied_notes.append(note_data)
|
|
satisfied_notes.append(note_data)
|
|
|
print(f" ✓ 满足: {title[:30] if len(title) > 30 else title}... (分数: {evaluation.relevance_score:.2f})")
|
|
print(f" ✓ 满足: {title[:30] if len(title) > 30 else title}... (分数: {evaluation.relevance_score:.2f})")
|
|
|
elif evaluation.match_level == "partial":
|
|
elif evaluation.match_level == "partial":
|
|
|
partial_notes.append(note_data)
|
|
partial_notes.append(note_data)
|
|
|
print(f" ~ 部分: {title[:30] if len(title) > 30 else title}... (缺失: {', '.join(evaluation.missing_aspects[:2])})")
|
|
print(f" ~ 部分: {title[:30] if len(title) > 30 else title}... (缺失: {', '.join(evaluation.missing_aspects[:2])})")
|
|
|
|
|
+ else: # unsatisfied
|
|
|
|
|
+ print(f" ✗ 不满足: {title[:30] if len(title) > 30 else title}... (分数: {evaluation.relevance_score:.2f})")
|
|
|
|
|
|
|
|
- # 4. 处理满足的帖子:提取关键词并扩充分词库
|
|
|
|
|
|
|
+ # 4. 处理满足的帖子:不再扩充分词库(避免无限扩张)
|
|
|
new_queries = []
|
|
new_queries = []
|
|
|
|
|
|
|
|
if satisfied_notes:
|
|
if satisfied_notes:
|
|
|
- print(f"\n 从 {len(satisfied_notes)} 个满足的帖子中提取关键词...")
|
|
|
|
|
- for note in satisfied_notes[:3]: # 限制处理数量
|
|
|
|
|
- extract_input = f"""
|
|
|
|
|
-<帖子>
|
|
|
|
|
-标题: {note['title']}
|
|
|
|
|
-描述: {note['desc']}
|
|
|
|
|
-</帖子>
|
|
|
|
|
-
|
|
|
|
|
-请提取核心关键词。
|
|
|
|
|
-"""
|
|
|
|
|
- result = await Runner.run(keyword_extractor, extract_input)
|
|
|
|
|
- extraction: KeywordExtraction = result.final_output
|
|
|
|
|
-
|
|
|
|
|
- # 添加新词到分词库
|
|
|
|
|
- for keyword in extraction.keywords:
|
|
|
|
|
- if keyword not in word_lib.words:
|
|
|
|
|
- word_lib.add_word(keyword)
|
|
|
|
|
- print(f" + 新词入库: {keyword}")
|
|
|
|
|
|
|
+ print(f"\n ✓ 找到 {len(satisfied_notes)} 个满足的帖子,不再提取关键词入库")
|
|
|
|
|
+ # 注释掉关键词提取逻辑,保持分词库稳定
|
|
|
|
|
+ # for note in satisfied_notes[:3]:
|
|
|
|
|
+ # extract_input = f"""
|
|
|
|
|
+ # <帖子>
|
|
|
|
|
+ # 标题: {note['title']}
|
|
|
|
|
+ # 描述: {note['desc']}
|
|
|
|
|
+ # </帖子>
|
|
|
|
|
+ #
|
|
|
|
|
+ # 请提取核心关键词。
|
|
|
|
|
+ # """
|
|
|
|
|
+ # result = await Runner.run(keyword_extractor, extract_input)
|
|
|
|
|
+ # extraction: KeywordExtraction = result.final_output
|
|
|
|
|
+ #
|
|
|
|
|
+ # # 添加新词到分词库,标记来源
|
|
|
|
|
+ # note_id = note.get('note_id', 'unknown')
|
|
|
|
|
+ # for keyword in extraction.keywords:
|
|
|
|
|
+ # if keyword not in word_lib.words:
|
|
|
|
|
+ # word_lib.add_word(keyword, source=f"note:{note_id}")
|
|
|
|
|
+ # print(f" + 新词入库: {keyword} (来源: {note_id})")
|
|
|
|
|
|
|
|
# 5. 处理部分匹配的帖子:改造query
|
|
# 5. 处理部分匹配的帖子:改造query
|
|
|
if partial_notes and len(satisfied_notes) < 5: # 如果满足的不够,基于部分匹配改进
|
|
if partial_notes and len(satisfied_notes) < 5: # 如果满足的不够,基于部分匹配改进
|
|
@@ -667,21 +936,174 @@ async def process_search_results(
|
|
|
result = await Runner.run(query_improver, improvement_input)
|
|
result = await Runner.run(query_improver, improvement_input)
|
|
|
improvement: QueryImprovement = result.final_output
|
|
improvement: QueryImprovement = result.final_output
|
|
|
|
|
|
|
|
|
|
+ # 收集Query改造Agent的输入输出
|
|
|
|
|
+ improvement_agent_call = {
|
|
|
|
|
+ "agent": "Query改造专家",
|
|
|
|
|
+ "action": "基于缺失方面改造Query",
|
|
|
|
|
+ "input": {
|
|
|
|
|
+ "query": query,
|
|
|
|
|
+ "missing_aspects": list(set(all_missing[:5]))
|
|
|
|
|
+ },
|
|
|
|
|
+ "output": {
|
|
|
|
|
+ "improved_query": improvement.improved_query,
|
|
|
|
|
+ "added_aspects": improvement.added_aspects,
|
|
|
|
|
+ "reasoning": improvement.reasoning
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ agent_calls.append(improvement_agent_call)
|
|
|
|
|
+
|
|
|
# 评估改进后的query
|
|
# 评估改进后的query
|
|
|
improved_eval = await evaluate_query_relevance(improvement.improved_query, original_need, query_state.relevance_score, context)
|
|
improved_eval = await evaluate_query_relevance(improvement.improved_query, original_need, query_state.relevance_score, context)
|
|
|
|
|
|
|
|
|
|
+ # 创建改进后的query state
|
|
|
|
|
+ new_state = QueryState(
|
|
|
|
|
+ query=improvement.improved_query,
|
|
|
|
|
+ level=query_state.level + 1,
|
|
|
|
|
+ relevance_score=improved_eval.relevance_score,
|
|
|
|
|
+ parent_query=query,
|
|
|
|
|
+ strategy="improve_from_partial"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 添加到演化图(无论是否提升)
|
|
|
|
|
+ add_query_to_graph(
|
|
|
|
|
+ context,
|
|
|
|
|
+ new_state,
|
|
|
|
|
+ iteration,
|
|
|
|
|
+ evaluation_reason=improved_eval.reason,
|
|
|
|
|
+ is_selected=improved_eval.is_improved
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
if improved_eval.is_improved:
|
|
if improved_eval.is_improved:
|
|
|
print(f" ✓ 改进: {improvement.improved_query} (添加: {', '.join(improvement.added_aspects[:2])})")
|
|
print(f" ✓ 改进: {improvement.improved_query} (添加: {', '.join(improvement.added_aspects[:2])})")
|
|
|
- new_state = QueryState(
|
|
|
|
|
- query=improvement.improved_query,
|
|
|
|
|
- level=query_state.level + 1,
|
|
|
|
|
- relevance_score=improved_eval.relevance_score,
|
|
|
|
|
- parent_query=query,
|
|
|
|
|
- strategy="improve_from_partial"
|
|
|
|
|
- )
|
|
|
|
|
new_queries.append(new_state)
|
|
new_queries.append(new_state)
|
|
|
|
|
+ else:
|
|
|
|
|
+ print(f" ✗ 改进: {improvement.improved_query} (分数: {improved_eval.relevance_score:.2f}, 未提升)")
|
|
|
|
|
+
|
|
|
|
|
+ # 6. Result分支的改写策略(向上抽象和同义改写)
|
|
|
|
|
+ # 如果搜索结果不理想且新queries不够,尝试改写当前query
|
|
|
|
|
+ if len(satisfied_notes) < 3 and len(new_queries) < 2:
|
|
|
|
|
+ print(f"\n 搜索结果不理想,尝试改写query...")
|
|
|
|
|
+
|
|
|
|
|
+ # 6.1 向上抽象
|
|
|
|
|
+ if len(new_queries) < 3:
|
|
|
|
|
+ rewrite_input_abstract = f"""
|
|
|
|
|
+<当前Query>
|
|
|
|
|
+{query}
|
|
|
|
|
+</当前Query>
|
|
|
|
|
+
|
|
|
|
|
+<改写要求>
|
|
|
|
|
+类型: abstract (向上抽象)
|
|
|
|
|
+</改写要求>
|
|
|
|
|
+
|
|
|
|
|
+请改写这个query。
|
|
|
|
|
+"""
|
|
|
|
|
+ result = await Runner.run(query_rewriter, rewrite_input_abstract)
|
|
|
|
|
+ rewrite: QueryRewrite = result.final_output
|
|
|
|
|
+
|
|
|
|
|
+ # 收集Result分支改写(抽象)Agent的输入输出
|
|
|
|
|
+ rewrite_agent_call = {
|
|
|
|
|
+ "agent": "Query改写专家",
|
|
|
|
|
+ "action": "向上抽象改写(Result分支)",
|
|
|
|
|
+ "input": {
|
|
|
|
|
+ "query": query,
|
|
|
|
|
+ "rewrite_type": "abstract"
|
|
|
|
|
+ },
|
|
|
|
|
+ "output": {
|
|
|
|
|
+ "rewritten_query": rewrite.rewritten_query,
|
|
|
|
|
+ "rewrite_type": rewrite.rewrite_type,
|
|
|
|
|
+ "reasoning": rewrite.reasoning
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ agent_calls.append(rewrite_agent_call)
|
|
|
|
|
+
|
|
|
|
|
+ # 评估改写后的query
|
|
|
|
|
+ rewrite_eval = await evaluate_query_relevance(rewrite.rewritten_query, original_need, query_state.relevance_score, context)
|
|
|
|
|
+
|
|
|
|
|
+ # 创建改写后的query state
|
|
|
|
|
+ new_state = QueryState(
|
|
|
|
|
+ query=rewrite.rewritten_query,
|
|
|
|
|
+ level=query_state.level + 1,
|
|
|
|
|
+ relevance_score=rewrite_eval.relevance_score,
|
|
|
|
|
+ parent_query=query,
|
|
|
|
|
+ strategy="result_rewrite_abstract"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 添加到演化图(无论是否提升)
|
|
|
|
|
+ add_query_to_graph(
|
|
|
|
|
+ context,
|
|
|
|
|
+ new_state,
|
|
|
|
|
+ iteration,
|
|
|
|
|
+ evaluation_reason=rewrite_eval.reason,
|
|
|
|
|
+ is_selected=rewrite_eval.is_improved
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ if rewrite_eval.is_improved:
|
|
|
|
|
+ print(f" ✓ 改写(抽象): {rewrite.rewritten_query} (分数: {rewrite_eval.relevance_score:.2f})")
|
|
|
|
|
+ new_queries.append(new_state)
|
|
|
|
|
+ else:
|
|
|
|
|
+ print(f" ✗ 改写(抽象): {rewrite.rewritten_query} (分数: {rewrite_eval.relevance_score:.2f}, 未提升)")
|
|
|
|
|
|
|
|
- # 记录完整的result分支处理结果
|
|
|
|
|
|
|
+ # 6.2 同义改写
|
|
|
|
|
+ if len(new_queries) < 4:
|
|
|
|
|
+ rewrite_input_synonym = f"""
|
|
|
|
|
+<当前Query>
|
|
|
|
|
+{query}
|
|
|
|
|
+</当前Query>
|
|
|
|
|
+
|
|
|
|
|
+<改写要求>
|
|
|
|
|
+类型: synonym (同义改写)
|
|
|
|
|
+使用同义词或相关表达来改写query,保持语义相同但表达方式不同。
|
|
|
|
|
+</改写要求>
|
|
|
|
|
+
|
|
|
|
|
+请改写这个query。
|
|
|
|
|
+"""
|
|
|
|
|
+ result = await Runner.run(query_rewriter, rewrite_input_synonym)
|
|
|
|
|
+ rewrite_syn: QueryRewrite = result.final_output
|
|
|
|
|
+
|
|
|
|
|
+ # 收集Result分支改写(同义)Agent的输入输出
|
|
|
|
|
+ rewrite_syn_agent_call = {
|
|
|
|
|
+ "agent": "Query改写专家",
|
|
|
|
|
+ "action": "同义改写(Result分支)",
|
|
|
|
|
+ "input": {
|
|
|
|
|
+ "query": query,
|
|
|
|
|
+ "rewrite_type": "synonym"
|
|
|
|
|
+ },
|
|
|
|
|
+ "output": {
|
|
|
|
|
+ "rewritten_query": rewrite_syn.rewritten_query,
|
|
|
|
|
+ "rewrite_type": rewrite_syn.rewrite_type,
|
|
|
|
|
+ "reasoning": rewrite_syn.reasoning
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ agent_calls.append(rewrite_syn_agent_call)
|
|
|
|
|
+
|
|
|
|
|
+ # 评估改写后的query
|
|
|
|
|
+ rewrite_syn_eval = await evaluate_query_relevance(rewrite_syn.rewritten_query, original_need, query_state.relevance_score, context)
|
|
|
|
|
+
|
|
|
|
|
+ # 创建改写后的query state
|
|
|
|
|
+ new_state = QueryState(
|
|
|
|
|
+ query=rewrite_syn.rewritten_query,
|
|
|
|
|
+ level=query_state.level + 1,
|
|
|
|
|
+ relevance_score=rewrite_syn_eval.relevance_score,
|
|
|
|
|
+ parent_query=query,
|
|
|
|
|
+ strategy="result_rewrite_synonym"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 添加到演化图(无论是否提升)
|
|
|
|
|
+ add_query_to_graph(
|
|
|
|
|
+ context,
|
|
|
|
|
+ new_state,
|
|
|
|
|
+ iteration,
|
|
|
|
|
+ evaluation_reason=rewrite_syn_eval.reason,
|
|
|
|
|
+ is_selected=rewrite_syn_eval.is_improved
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ if rewrite_syn_eval.is_improved:
|
|
|
|
|
+ print(f" ✓ 改写(同义): {rewrite_syn.rewritten_query} (分数: {rewrite_syn_eval.relevance_score:.2f})")
|
|
|
|
|
+ new_queries.append(new_state)
|
|
|
|
|
+ else:
|
|
|
|
|
+ print(f" ✗ 改写(同义): {rewrite_syn.rewritten_query} (分数: {rewrite_syn_eval.relevance_score:.2f}, 未提升)")
|
|
|
|
|
+
|
|
|
|
|
+ # 记录完整的result分支处理结果(层级化)
|
|
|
add_step(context, f"Result分支 - {query}", "result_branch", {
|
|
add_step(context, f"Result分支 - {query}", "result_branch", {
|
|
|
"query": query,
|
|
"query": query,
|
|
|
"query_level": query_state.level,
|
|
"query_level": query_state.level,
|
|
@@ -700,9 +1122,9 @@ async def process_search_results(
|
|
|
}
|
|
}
|
|
|
for note in satisfied_notes[:10] # 只保存前10个
|
|
for note in satisfied_notes[:10] # 只保存前10个
|
|
|
],
|
|
],
|
|
|
|
|
+ "agent_calls": agent_calls, # 所有Agent调用的详细记录
|
|
|
"new_queries_generated": len(new_queries),
|
|
"new_queries_generated": len(new_queries),
|
|
|
- "new_queries": [{"query": nq.query, "score": nq.relevance_score, "strategy": nq.strategy} for nq in new_queries],
|
|
|
|
|
- "word_library_expanded": len(new_queries) > 0 # 是否扩充了分词库
|
|
|
|
|
|
|
+ "new_queries": [{"query": nq.query, "score": nq.relevance_score, "strategy": nq.strategy} for nq in new_queries]
|
|
|
})
|
|
})
|
|
|
|
|
|
|
|
return satisfied_notes, new_queries
|
|
return satisfied_notes, new_queries
|
|
@@ -731,6 +1153,16 @@ async def iterative_search_loop(
|
|
|
print(f"开始迭代搜索循环")
|
|
print(f"开始迭代搜索循环")
|
|
|
print(f"{'='*60}")
|
|
print(f"{'='*60}")
|
|
|
|
|
|
|
|
|
|
+ # 0. 添加原始问题作为根节点
|
|
|
|
|
+ root_query_state = QueryState(
|
|
|
|
|
+ query=context.q,
|
|
|
|
|
+ level=0,
|
|
|
|
|
+ relevance_score=1.0, # 原始问题本身相关度为1.0
|
|
|
|
|
+ strategy="root"
|
|
|
|
|
+ )
|
|
|
|
|
+ add_query_to_graph(context, root_query_state, 0, evaluation_reason="原始问题,作为搜索的根节点", is_selected=True)
|
|
|
|
|
+ print(f"[根节点] 原始问题: {context.q}")
|
|
|
|
|
+
|
|
|
# 1. 初始化分词库
|
|
# 1. 初始化分词库
|
|
|
word_lib = await initialize_word_library(context.q, context)
|
|
word_lib = await initialize_word_library(context.q, context)
|
|
|
|
|
|
|
@@ -755,15 +1187,26 @@ async def iterative_search_loop(
|
|
|
word_scores.sort(key=lambda x: x['score'], reverse=True)
|
|
word_scores.sort(key=lambda x: x['score'], reverse=True)
|
|
|
selected_words = word_scores[:3]
|
|
selected_words = word_scores[:3]
|
|
|
|
|
|
|
|
- for item in selected_words:
|
|
|
|
|
- query_queue.append(QueryState(
|
|
|
|
|
|
|
+ # 将所有分词添加到演化图(包括未被选中的)
|
|
|
|
|
+ for item in word_scores:
|
|
|
|
|
+ is_selected = item in selected_words
|
|
|
|
|
+ query_state = QueryState(
|
|
|
query=item['word'],
|
|
query=item['word'],
|
|
|
level=1,
|
|
level=1,
|
|
|
relevance_score=item['score'],
|
|
relevance_score=item['score'],
|
|
|
- strategy="initial"
|
|
|
|
|
- ))
|
|
|
|
|
|
|
+ strategy="initial",
|
|
|
|
|
+ parent_query=context.q # 父节点是原始问题
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 添加到演化图(会自动创建从parent_query到该query的边)
|
|
|
|
|
+ add_query_to_graph(context, query_state, 0, evaluation_reason=item['eval'].reason, is_selected=is_selected)
|
|
|
|
|
+
|
|
|
|
|
+ # 只有被选中的才加入队列
|
|
|
|
|
+ if is_selected:
|
|
|
|
|
+ query_queue.append(query_state)
|
|
|
|
|
|
|
|
print(f"\n初始query队列(按相关度选择): {[(q.query, f'{q.relevance_score:.2f}') for q in query_queue]}")
|
|
print(f"\n初始query队列(按相关度选择): {[(q.query, f'{q.relevance_score:.2f}') for q in query_queue]}")
|
|
|
|
|
+ print(f" (共评估了 {len(word_scores)} 个分词,选择了前 {len(query_queue)} 个)")
|
|
|
|
|
|
|
|
# 3. API实例
|
|
# 3. API实例
|
|
|
xiaohongshu_api = XiaohongshuSearchRecommendations()
|
|
xiaohongshu_api = XiaohongshuSearchRecommendations()
|
|
@@ -798,17 +1241,18 @@ async def iterative_search_loop(
|
|
|
print(f"\n处理Query [{query_state.level}]: {query_state.query} (分数: {query_state.relevance_score:.2f})")
|
|
print(f"\n处理Query [{query_state.level}]: {query_state.query} (分数: {query_state.relevance_score:.2f})")
|
|
|
|
|
|
|
|
# 检查终止条件
|
|
# 检查终止条件
|
|
|
- if query_state.no_suggestion_rounds >= 2:
|
|
|
|
|
- print(f" ✗ 连续2轮无suggestion,终止该分支")
|
|
|
|
|
|
|
+ if query_state.is_terminated or query_state.no_suggestion_rounds >= 2:
|
|
|
|
|
+ print(f" ✗ 已终止或连续2轮无suggestion,跳过该query")
|
|
|
|
|
+ query_state.is_terminated = True
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
# 并行处理两个分支
|
|
# 并行处理两个分支
|
|
|
sug_task = process_suggestions(
|
|
sug_task = process_suggestions(
|
|
|
- query_state.query, query_state, context.q, word_lib, context, xiaohongshu_api
|
|
|
|
|
|
|
+ query_state.query, query_state, context.q, word_lib, context, xiaohongshu_api, iteration
|
|
|
)
|
|
)
|
|
|
result_task = process_search_results(
|
|
result_task = process_search_results(
|
|
|
query_state.query, query_state, context.q, word_lib, context,
|
|
query_state.query, query_state, context.q, word_lib, context,
|
|
|
- xiaohongshu_search, relevance_threshold
|
|
|
|
|
|
|
+ xiaohongshu_search, relevance_threshold, iteration
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
# 等待两个分支完成
|
|
# 等待两个分支完成
|
|
@@ -817,19 +1261,32 @@ async def iterative_search_loop(
|
|
|
result_task
|
|
result_task
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
+ # 如果suggestion分支返回空,说明没有获取到suggestion,需要继承no_suggestion_rounds
|
|
|
|
|
+ # 注意:process_suggestions内部已经更新了query_state.no_suggestion_rounds
|
|
|
|
|
+ # 所以这里生成的新queries需要继承父query的no_suggestion_rounds(如果sug分支也返回空)
|
|
|
|
|
+ if not sug_queries and not result_queries:
|
|
|
|
|
+ # 两个分支都没有产生新query,标记当前query为终止
|
|
|
|
|
+ query_state.is_terminated = True
|
|
|
|
|
+ print(f" ⚠ 两个分支均未产生新query,标记该query为终止")
|
|
|
|
|
+
|
|
|
new_queries_from_sug.extend(sug_queries)
|
|
new_queries_from_sug.extend(sug_queries)
|
|
|
new_queries_from_result.extend(result_queries)
|
|
new_queries_from_result.extend(result_queries)
|
|
|
all_satisfied_notes.extend(satisfied_notes)
|
|
all_satisfied_notes.extend(satisfied_notes)
|
|
|
|
|
|
|
|
# 更新队列
|
|
# 更新队列
|
|
|
all_new_queries = new_queries_from_sug + new_queries_from_result
|
|
all_new_queries = new_queries_from_sug + new_queries_from_result
|
|
|
|
|
+
|
|
|
|
|
+ # 将新生成的queries添加到演化图
|
|
|
|
|
+ for new_q in all_new_queries:
|
|
|
|
|
+ add_query_to_graph(context, new_q, iteration)
|
|
|
|
|
+
|
|
|
query_queue.extend(all_new_queries)
|
|
query_queue.extend(all_new_queries)
|
|
|
|
|
|
|
|
- # 去重(基于query文本)
|
|
|
|
|
|
|
+ # 去重(基于query文本)并过滤已终止的query
|
|
|
seen = set()
|
|
seen = set()
|
|
|
unique_queue = []
|
|
unique_queue = []
|
|
|
for q in query_queue:
|
|
for q in query_queue:
|
|
|
- if q.query not in seen:
|
|
|
|
|
|
|
+ if q.query not in seen and not q.is_terminated:
|
|
|
seen.add(q.query)
|
|
seen.add(q.query)
|
|
|
unique_queue.append(q)
|
|
unique_queue.append(q)
|
|
|
query_queue = unique_queue
|
|
query_queue = unique_queue
|
|
@@ -961,6 +1418,12 @@ async def main(input_dir: str, max_iterations: int = 20, visualize: bool = False
|
|
|
json.dump(run_context.steps, f, ensure_ascii=False, indent=2)
|
|
json.dump(run_context.steps, f, ensure_ascii=False, indent=2)
|
|
|
print(f"Steps log saved to: {steps_file_path}")
|
|
print(f"Steps log saved to: {steps_file_path}")
|
|
|
|
|
|
|
|
|
|
+ # 保存Query演化图
|
|
|
|
|
+ query_graph_file_path = os.path.join(run_context.log_dir, "query_graph.json")
|
|
|
|
|
+ with open(query_graph_file_path, "w", encoding="utf-8") as f:
|
|
|
|
|
+ json.dump(run_context.query_graph, f, ensure_ascii=False, indent=2)
|
|
|
|
|
+ print(f"Query graph saved to: {query_graph_file_path}")
|
|
|
|
|
+
|
|
|
# 可视化
|
|
# 可视化
|
|
|
if visualize:
|
|
if visualize:
|
|
|
import subprocess
|
|
import subprocess
|