|
|
@@ -8,22 +8,52 @@ import re
|
|
|
# 将项目根目录加入,方便导入内部包
|
|
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
|
|
|
|
|
|
-from agent.llm.qwen import qwen_llm_call
|
|
|
+from agent.tools.builtin.toolhub import toolhub_call
|
|
|
+from agent.llm.gemini import create_gemini_llm_call
|
|
|
+
|
|
|
+from dotenv import load_dotenv
|
|
|
+load_dotenv()
|
|
|
+
|
|
|
+try:
|
|
|
+ gemini_llm_call = create_gemini_llm_call()
|
|
|
+except ValueError as e:
|
|
|
+ print(f"初始化 Gemini 失败: {e},请检查 .env。")
|
|
|
+ sys.exit(1)
|
|
|
+
|
|
|
from agent.tools.builtin.search import search_posts
|
|
|
|
|
|
+# -----------------
|
|
|
+# Utility Functions
|
|
|
+# -----------------
|
|
|
+def encode_image(image_path: str) -> str:
|
|
|
+ with open(image_path, "rb") as image_file:
|
|
|
+ return base64.b64encode(image_file.read()).decode('utf-8')
|
|
|
+
|
|
|
+def get_base64_url(image_path: str) -> str:
|
|
|
+ b64_data = encode_image(image_path)
|
|
|
+ ext = image_path.split('.')[-1].lower()
|
|
|
+ if ext == 'jpg': ext = 'jpeg'
|
|
|
+ return f"data:image/{ext};base64,{b64_data}"
|
|
|
+
|
|
|
# -----------------
|
|
|
# Tools definitions
|
|
|
# -----------------
|
|
|
-async def call_banana_tool(prompt: str) -> str:
|
|
|
- """包装 call_banana.py 工具的调用方法,抓取它保存本地的文件路径"""
|
|
|
- print(f"\n[Tool] ✨ 正在调用 call_banana 生成图片, Prompt: {prompt[:50]}...")
|
|
|
+async def call_banana_tool(prompt: str, aspect_ratio: str = None, reference_image: str = None, is_final: bool = True) -> str:
|
|
|
+ """包装 call_banana.py 生成图片,返回一张图的路径"""
|
|
|
+ print(f"\n[Tool] ✨ 正在调用 call_banana 生成图片 (is_final={is_final}), Prompt: {prompt[:50]}...")
|
|
|
script_path = os.path.join(os.path.dirname(__file__), "call_banana.py")
|
|
|
|
|
|
- # 设置环境变量走兼容模式,同时强制指定 UTF-8 编码避免 Windows 下输出由于表情符号崩溃
|
|
|
env = os.environ.copy()
|
|
|
env["PYTHONIOENCODING"] = "utf-8"
|
|
|
+
|
|
|
+ cmd_args = [sys.executable, script_path, "-p", prompt]
|
|
|
+ if aspect_ratio:
|
|
|
+ cmd_args.extend(["-a", aspect_ratio])
|
|
|
+ if reference_image:
|
|
|
+ cmd_args.extend(["-i", reference_image])
|
|
|
+
|
|
|
process = await asyncio.create_subprocess_exec(
|
|
|
- sys.executable, script_path, "-p", prompt,
|
|
|
+ *cmd_args,
|
|
|
stdout=asyncio.subprocess.PIPE,
|
|
|
stderr=asyncio.subprocess.PIPE,
|
|
|
env=env
|
|
|
@@ -34,14 +64,13 @@ async def call_banana_tool(prompt: str) -> str:
|
|
|
if err_output:
|
|
|
output += "\n" + err_output
|
|
|
|
|
|
- # 解析输出:"💾 已保存到本地 -> banana_output_0.jpeg"
|
|
|
match = re.search(r"已保存到本地 -> (.+)", output)
|
|
|
if match:
|
|
|
path = match.group(1).strip()
|
|
|
print(f"[Tool] ✅ call_banana 返回图片路径: {path}")
|
|
|
return path
|
|
|
else:
|
|
|
- print(f"[Tool] ❌ call_banana 似乎未成功生成文件, 控制台输出:\n{output}")
|
|
|
+ print(f"[Tool] ❌ call_banana 执行失败:\n{output}")
|
|
|
return f"Tool Execution Failed. output:\n{output}"
|
|
|
|
|
|
async def search_tool(keyword: str) -> str:
|
|
|
@@ -82,6 +111,18 @@ def get_agent_tools():
|
|
|
"prompt": {
|
|
|
"type": "string",
|
|
|
"description": "英语或中文详细的生图提示词"
|
|
|
+ },
|
|
|
+ "aspect_ratio": {
|
|
|
+ "type": "string",
|
|
|
+ "description": "(可选)你期望生成的图片宽高比,例如 3:4, 16:9, 1:1,请根据目标参考图的比例传入该参数"
|
|
|
+ },
|
|
|
+ "reference_image": {
|
|
|
+ "type": "string",
|
|
|
+ "description": "(动作控制底图)如果你在这一步设 is_final=true,请将你在上一阶段生成的【辅助骨架素材(is_final=false)】产生的本地路径填入此处。绝对禁止传入原始目标照片!"
|
|
|
+ },
|
|
|
+ "is_final": {
|
|
|
+ "type": "boolean",
|
|
|
+ "description": "指示本次生成是否是本轮次的最终产物。如果你需要先生成一张『白底火柴人/3D骨架』作为辅助垫图素材,请设为 false;拿到素材后,你必须继续将它的本地路径填给 `reference_image` 并使用最终 Prompt 和 is_final=true 完成最后合成。"
|
|
|
}
|
|
|
},
|
|
|
"required": ["prompt"]
|
|
|
@@ -90,67 +131,7 @@ def get_agent_tools():
|
|
|
}
|
|
|
]
|
|
|
|
|
|
-# -----------------
|
|
|
-# Agent 2: Image Evaluator (Qwen-VL-Max)
|
|
|
-# -----------------
|
|
|
-async def evaluate_images(target_image_path: str, generated_image_path: str, previous_feedback: str = None) -> str:
|
|
|
- print(f"\n[Agent 2] 👁️ Qwen-VL 开始视觉评估...")
|
|
|
- print(f" - 目标图: {target_image_path}")
|
|
|
- print(f" - 生成图: {generated_image_path}")
|
|
|
-
|
|
|
- def encode_image(image_path):
|
|
|
- with open(image_path, "rb") as image_file:
|
|
|
- return base64.b64encode(image_file.read()).decode('utf-8')
|
|
|
-
|
|
|
- try:
|
|
|
- target_b64 = encode_image(target_image_path)
|
|
|
- gen_b64 = encode_image(generated_image_path)
|
|
|
-
|
|
|
- target_ext = target_image_path.split('.')[-1].lower()
|
|
|
- if target_ext == 'jpg': target_ext = 'jpeg'
|
|
|
- gen_ext = generated_image_path.split('.')[-1].lower()
|
|
|
- if gen_ext == 'jpg': gen_ext = 'jpeg'
|
|
|
- except Exception as e:
|
|
|
- return f"无法读取图片以进行评估: {e}"
|
|
|
-
|
|
|
- system_content = "你是专业的AI生图评审师。你的工作是对比【目标参考图】和当前【生成图】,找出具体的差异,并给出针对性的修改意见给生图Prompt工程师。"
|
|
|
- if previous_feedback:
|
|
|
- system_content += "\n你还会收到你【上一轮的评估反馈】。请结合你的旧反馈,检查这轮新图片是否修正了你上次提出的问题,避免重复说一样的话,而是要有动态进展意识!"
|
|
|
|
|
|
- text_prompt = "请做详细的差异点分析:从构图、色彩、人物或物体细节、整体质感等方面指出当前生成图与目标图的差距。"
|
|
|
- if previous_feedback:
|
|
|
- text_prompt += f"\n\n你对上一版旧图的评估反馈曾经是:\n{previous_feedback}\n\n请比对这张【新生成图】,告诉我:上一版的问题被解决了吗?画面的进步点和退步点在哪里?请给出更新的针对性修改意见!"
|
|
|
- else:
|
|
|
- text_prompt += "结束时,请给出具体的 Prompt 修改建议。"
|
|
|
-
|
|
|
- messages = [
|
|
|
- {
|
|
|
- "role": "system",
|
|
|
- "content": system_content
|
|
|
- },
|
|
|
- {
|
|
|
- "role": "user",
|
|
|
- "content": [
|
|
|
- {"type": "text", "text": "【目标参考图(理想状态)】:"},
|
|
|
- {"type": "image_url", "image_url": {"url": f"data:image/{target_ext};base64,{target_b64}"}},
|
|
|
- {"type": "text", "text": "【本次生成的图片】:"},
|
|
|
- {"type": "image_url", "image_url": {"url": f"data:image/{gen_ext};base64,{gen_b64}"}},
|
|
|
- {"type": "text", "text": text_prompt}
|
|
|
- ]
|
|
|
- }
|
|
|
- ]
|
|
|
-
|
|
|
- try:
|
|
|
- response = await qwen_llm_call(
|
|
|
- messages=messages,
|
|
|
- model="qwen3.5-plus"
|
|
|
- )
|
|
|
- analysis = response["content"]
|
|
|
- print(f"\n[Agent 2] 📃 评估反馈:\n{analysis}\n")
|
|
|
- return analysis
|
|
|
- except Exception as e:
|
|
|
- print(f"\n[Agent 2] ⚠️ 评估发生错误: {e}")
|
|
|
- return f"VL模型调用失败: {e}"
|
|
|
|
|
|
# -----------------
|
|
|
# Main Workflow Loop
|
|
|
@@ -166,14 +147,18 @@ def get_base64_url(image_path: str) -> str:
|
|
|
async def main():
|
|
|
import argparse
|
|
|
import os
|
|
|
+ import json
|
|
|
|
|
|
default_target = os.path.join(os.path.dirname(os.path.abspath(__file__)), "input", "img_1.png")
|
|
|
parser = argparse.ArgumentParser(description="多智能体画图自动优化 Workflow")
|
|
|
parser.add_argument("-t", "--target", default=default_target, help="你想逼近的目标参考图本地路径")
|
|
|
- parser.add_argument("-m", "--max_loops", type=int, default=10, help="优化的最大迭代论调")
|
|
|
+ parser.add_argument("-p", "--pose", default=None, help="你提供的姿势参考图(如果有的话,给 Agent 用来走捷径垫底)")
|
|
|
+ parser.add_argument("-m", "--max_loops", type=int, default=15, help="优化的最大迭代论调")
|
|
|
+ parser.add_argument("-r", "--resume", action="store_true", help="是否从上次的 history.json 继续运行")
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
target_image = args.target
|
|
|
+ pose_image = args.pose
|
|
|
|
|
|
print("\n" + "="*50)
|
|
|
print("🤖 启动双 Agent 生图闭环工作流 (纯 Vision-Language 架构)")
|
|
|
@@ -183,15 +168,35 @@ async def main():
|
|
|
print(f"⚠️ 找不到目标图片: {target_image}")
|
|
|
print("提示: 系统依然会运行寻找文件,但 Agent 2 将无法给出评估。可随便放一个图片来模拟。")
|
|
|
|
|
|
+ sys_content = f"你是一个高度自治的闭环生图优化 AI 架构师。你的目标是:生成一张与【目标参考图】在主角姿势、整体结构上无限接近的图片。\n你拥有极强的视觉反思能力和 Prompt 编写能力。\n\n【核心工作流与防坑指南】:\n- 你会看到你的【目标参考图】和你的【往期历史尝试与生成结果】。\n- 请你先利用你的**多模态火眼金睛**,无情地对自己上一轮生成的图片进行找茬。绝不允许说客套话!重点对比人物骨架、姿势和构图的偏离程度。\n- 紧接着,请在反思的基础上,直接重构或调整你的 Prompt,并在一次回复中调用 `call_banana_tool` 下发生图指令!\n- 【防作弊铁律】:你**绝对禁止**直接将【目标参考图】的路径传进 `reference_image` 来作弊!如果你想用图生图垫出完美动作,必须使用【中间素材战法】亲手画一张骨架出来垫。\n- 【中间素材战法】:如果原图姿态过于刁钻复杂,**要求你必须**分两步走:\n 第一步:设置 `is_final=false` 并写一段专门用于抽出单一维度的动作骨架/白模 Prompt(如: \"a generic white 3d mannequin jumping in mid-air, clean white background, high contrast skeleton\"),专门用于抽出干净的辅助骨架。\n 第二步:拿到这只纯净骨架的本地路径后,在同回合的下一次调用中,把这只骨架当做 `reference_image` 垫进去,配合你华丽的最终描述(如: \"a neon cyberpunk assassin jumping\"),设置 `is_final=true` 完成高阶对齐兼防污染! \n\n"
|
|
|
+
|
|
|
+ if pose_image and os.path.exists(pose_image):
|
|
|
+ sys_content += f"【🔥终极开挂特权】:\n天啊!用户居然为你额外提供了一张极致完美的【姿势参考图】!既然有了这张现成的动作骨架底图,你**立刻抛弃**两步走去抽骨架的方法。你应当直接使用特权,将这张姿势参考图的绝对物理路径 `{os.path.abspath(pose_image)}` 作为 `reference_image` 无脑传给引擎,配合你的终极词汇,并在第一回合内设置 `is_final=true` 完成终极绝杀生成!\n\n"
|
|
|
+
|
|
|
+ sys_content += "流程要求:\n1. 仔细分析差异,在你的纯文本回复段落写出【犀利的反思和执行步骤】。\n2. 反思结束后,使用工具发号施令。\n3. 当调用 `is_final=true` 时,视为你的本轮彻底结束。"
|
|
|
+
|
|
|
system_msg = {
|
|
|
"role": "system",
|
|
|
- "content": "你是一个超级提示词工程师(Prompt Engineer)。目标:生成一张无限接近【目标参考图】的图片。\n作为多模态大模型,每一轮我都会给你看你上次生成的图片结果和评估专家的犀利分析反馈。你需要利用这些反馈进行修改。\n流程要求:\n1. (可选)如果你对风格不确定,可以请求 search_tool 调研别人怎么写相关提示词。\n2. 使用 call_banana_tool 来实际提交你的提示词并生成图片。\n3. 调用生成工具后,你本轮的工作就结束了,系统会把成果拿去评估并在下一轮找你。"
|
|
|
+ "content": sys_content
|
|
|
}
|
|
|
|
|
|
max_loops = args.max_loops
|
|
|
current_generation_loop_count = 0
|
|
|
last_gen_info = None
|
|
|
prompt_history = [] # 记录完整的历史 Prompt 轨迹,防止反复抽卡
|
|
|
+
|
|
|
+ history_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "history.json")
|
|
|
+ if args.resume and os.path.exists(history_file):
|
|
|
+ try:
|
|
|
+ with open(history_file, "r", encoding="utf-8") as f:
|
|
|
+ prompt_history = json.load(f)
|
|
|
+ if prompt_history:
|
|
|
+ current_generation_loop_count = len(prompt_history)
|
|
|
+ last_gen_info = prompt_history[-1]
|
|
|
+ print(f"✅ [状态恢复] 已成功从 history.json 加载 {current_generation_loop_count} 轮历史,即将开始第 {current_generation_loop_count + 1} 轮...")
|
|
|
+ except Exception as e:
|
|
|
+ print(f"⚠️ [状态恢复失败] 读取历史记录报错: {e},将重新开始第一轮。")
|
|
|
+ prompt_history = []
|
|
|
|
|
|
while current_generation_loop_count < max_loops:
|
|
|
print(f"\n" + "="*40)
|
|
|
@@ -204,42 +209,60 @@ async def main():
|
|
|
if last_gen_info is None:
|
|
|
try:
|
|
|
target_b64_url = get_base64_url(target_image)
|
|
|
+ content_list = [
|
|
|
+ {"type": "text", "text": "【首轮启动】\n这是你需要逼近的【目标参考图】。现在请你仔细观察它,提炼出一份初步生图 Prompt。\n因为是第一轮,请直接凭借直觉观察,并使用 call_banana_tool 生成原型。"},
|
|
|
+ {"type": "image_url", "image_url": {"url": target_b64_url}}
|
|
|
+ ]
|
|
|
+
|
|
|
+ if pose_image and os.path.exists(pose_image):
|
|
|
+ content_list.append({"type": "text", "text": "并且,下面是用户良心为你提供的【开挂级·姿势参考图】!你可以直接在接下来的提示词工具调用中将此图拿去垫图!"})
|
|
|
+ content_list.append({"type": "image_url", "image_url": {"url": get_base64_url(pose_image)}})
|
|
|
+
|
|
|
messages.append({
|
|
|
"role": "user",
|
|
|
- "content": [
|
|
|
- {"type": "text", "text": "这是你需要逼近的【目标参考图】。现在请你仔细观察它,并提炼出一份详尽的初步生图 Prompt。你可以酌情使用 search_tool 调研,最后必须使用 call_banana_tool 提交你的 Prompt 生成最初的原型。"},
|
|
|
- {"type": "image_url", "image_url": {"url": target_b64_url}}
|
|
|
- ]
|
|
|
+ "content": content_list
|
|
|
})
|
|
|
except Exception as e:
|
|
|
messages.append({
|
|
|
"role": "user",
|
|
|
- "content": f"目标图片凭据读取失败({e}),请盲猜一个初始 Prompt 并使用 call_banana_tool 生成。"
|
|
|
+ "content": f"目标图片读取失败({e}),请盲猜一个初始 Prompt 用 call_banana_tool 生成。"
|
|
|
})
|
|
|
else:
|
|
|
try:
|
|
|
- gen_image_url = get_base64_url(last_gen_info["image_path"])
|
|
|
+ target_b64_url = get_base64_url(target_image)
|
|
|
+ user_content = [
|
|
|
+ {"type": "text", "text": "【持续干预闭环】\n这是不可动摇的【目标参考图】,它是一切评判的唯一基准:"},
|
|
|
+ {"type": "image_url", "image_url": {"url": target_b64_url}}
|
|
|
+ ]
|
|
|
+
|
|
|
+ if pose_image and os.path.exists(pose_image):
|
|
|
+ user_content.append({"type": "text", "text": "【外挂辅助】\n这是不可动摇的【姿势参考图】,请毫不犹豫地拿它去填进 reference_image 控制动作:"})
|
|
|
+ user_content.append({"type": "image_url", "image_url": {"url": get_base64_url(pose_image)}})
|
|
|
+
|
|
|
+ user_content.append({"type": "text", "text": "\n==== 【你的历史试错轨迹】 ====\n为了防止你在这场试错过程中来回打转(所谓的废卡反复抽卡),我为你列出了你*从古至今*所有的失败作品和对应的提示词!请认真观察下面每一张你过去的废片:\n"})
|
|
|
|
|
|
- # 构建历史记录描述,让它知道自己之前走过哪些弯路避免抽卡
|
|
|
- history_text = "【你的历史迭代轨迹 (包含往期Prompt与评估专家对其的批评,用于防复读和总结改进)】:\n"
|
|
|
for i, record in enumerate(prompt_history):
|
|
|
- history_text += f"==== 第 {i+1} 轮 ====\n"
|
|
|
- history_text += f"[使用的 Prompt]:\n{record['prompt']}\n"
|
|
|
- history_text += f"[收到的反馈批评]:\n{record['feedback']}\n\n"
|
|
|
+ user_content.append({"type": "text", "text": f"-- 第 {i+1} 轮 --\n[上次使用的 Prompt]:\n{record['prompt']}\n[此轮的废片结果]:"})
|
|
|
+
|
|
|
+ try:
|
|
|
+ img_path = record.get("image_paths", [record.get("image_path")])[0]
|
|
|
+ # 节约上下文 Token 和视觉注意力:只渲染第一张(由于打底盲测)和最近一次的历史原图,中间的全部折叠仅保留反思文本
|
|
|
+ if i == 0 or i == len(prompt_history) - 1:
|
|
|
+ user_content.append({"type": "image_url", "image_url": {"url": get_base64_url(img_path)}})
|
|
|
+ else:
|
|
|
+ user_content.append({"type": "text", "text": "*(由于历史过于久远,中间轮次图片已省去展示,请聚焦于下面你对它的纯文本反思)*"})
|
|
|
+ except:
|
|
|
+ pass
|
|
|
+
|
|
|
+ if record.get("feedback"):
|
|
|
+ user_content.append({"type": "text", "text": f"[你在本轮结束后的反思]:\n{record['feedback']}\n"})
|
|
|
+
|
|
|
+ user_content.append({"type": "text", "text": "====================\n\n现在,结合上述轨迹与那张【目标参考图】,请在回复中写出最新的【极度苛刻自我反思】,然后立马调用工具生成这轮新的 Prompt!"})
|
|
|
+
|
|
|
+ messages.append({"role": "user", "content": user_content})
|
|
|
|
|
|
- messages.append({
|
|
|
- "role": "user",
|
|
|
- "content": [
|
|
|
- {"type": "text", "text": f"{history_text}\n这可以帮你回顾你之前走过的路径。现在聚焦到上一轮:\n\n你上一轮({len(prompt_history)})使用的生图Prompt为:\n{last_gen_info['prompt']}\n\n这里是你上一轮生成的图片结果,请仔细查看对比:"},
|
|
|
- {"type": "image_url", "image_url": {"url": gen_image_url}},
|
|
|
- {"type": "text", "text": f"【视觉评估专家的分析反馈】:\n{last_gen_info['feedback']}\n\n请针对上述反馈,思考到底哪里不像,参考上述的历史轨迹避免重蹈覆辙,进行新的调研修正(如果需要),或者直接使用 call_banana_tool 生成优化后的版本。"}
|
|
|
- ]
|
|
|
- })
|
|
|
except Exception as e:
|
|
|
- messages.append({
|
|
|
- "role": "user",
|
|
|
- "content": f"上一轮信息读取失败 ({e})。请重新尝试凭感觉用 call_banana_tool 再次生成。"
|
|
|
- })
|
|
|
+ messages.append({"role": "user", "content": f"上下文读取失败 ({e})。请重试用 call_banana_tool 生成。"})
|
|
|
|
|
|
# Agent 1 内部工具调研微循环 (Agent 1 minor logic loop)
|
|
|
agent1_finished_generation = False
|
|
|
@@ -248,9 +271,9 @@ async def main():
|
|
|
while not agent1_finished_generation:
|
|
|
print(f"---\n💬 正在请求 Agent 1 (Prompt 师)...")
|
|
|
# 这里 Agent 1 也换成 qwen-vl-max,这样它才能看到传给它的上一轮图片
|
|
|
- response = await qwen_llm_call(
|
|
|
+ response = await gemini_llm_call(
|
|
|
messages=messages,
|
|
|
- model="qwen3.5-plus",
|
|
|
+ model="gemini-3.1-pro-preview",
|
|
|
tools=get_agent_tools()
|
|
|
)
|
|
|
|
|
|
@@ -272,6 +295,7 @@ async def main():
|
|
|
assistant_reply = {"role": "assistant"}
|
|
|
if content: assistant_reply["content"] = content
|
|
|
if tool_calls: assistant_reply["tool_calls"] = tool_calls
|
|
|
+ if "raw_gemini_parts" in response: assistant_reply["raw_gemini_parts"] = response["raw_gemini_parts"]
|
|
|
messages.append(assistant_reply)
|
|
|
|
|
|
if tool_calls:
|
|
|
@@ -289,49 +313,49 @@ async def main():
|
|
|
})
|
|
|
|
|
|
elif func_name == "call_banana_tool":
|
|
|
- print(f"\n⚙️ Agent 1 决定提交生图请求!")
|
|
|
+ is_final = args_dict.get("is_final", True)
|
|
|
+ print(f"\n⚙️ 节点发起了生图请求 (是否为终极图: {is_final})!")
|
|
|
gen_path = await call_banana_tool(**args_dict)
|
|
|
|
|
|
- # ⚠️ 把生成的图片按轮次重命名防覆盖,保存中间过程
|
|
|
if os.path.exists(gen_path):
|
|
|
ext = gen_path.split('.')[-1]
|
|
|
- new_gen_path = f"gen_loop_{current_generation_loop_count + 1}.{ext}"
|
|
|
import shutil
|
|
|
+ if is_final:
|
|
|
+ new_gen_path = f"gen_loop_{current_generation_loop_count + 1}.{ext}"
|
|
|
+ else:
|
|
|
+ import uuid
|
|
|
+ new_gen_path = f"gen_loop_{current_generation_loop_count + 1}_material_{str(uuid.uuid4())[:8]}.{ext}"
|
|
|
shutil.move(gen_path, new_gen_path)
|
|
|
gen_path = new_gen_path
|
|
|
- print(f"[文件管理] 中间图片已重命名并保存为: {new_gen_path}")
|
|
|
+ print(f"[文件管理] 生图结果已重命名并保存为: {new_gen_path}")
|
|
|
|
|
|
prompt_used = args_dict.get("prompt", "")
|
|
|
|
|
|
- # 把消息补齐,虽然这一轮马上就要重置销毁了
|
|
|
messages.append({
|
|
|
"role": "tool",
|
|
|
"tool_call_id": tc_id,
|
|
|
- "content": f"已生成,路径: {gen_path}"
|
|
|
+ "content": f"已成功生成,图片路径: {os.path.abspath(gen_path)}"
|
|
|
})
|
|
|
|
|
|
- agent1_finished_generation = True
|
|
|
- current_generation_loop_count += 1
|
|
|
-
|
|
|
- # 进行评估并记录,传递给下一大轮
|
|
|
- if os.path.exists(gen_path) and os.path.exists(target_image):
|
|
|
- prev_feedback = last_gen_info["feedback"] if last_gen_info else None
|
|
|
- evaluation_feedback = await evaluate_images(target_image, gen_path, prev_feedback)
|
|
|
+ if is_final:
|
|
|
+ agent1_finished_generation = True
|
|
|
+ current_generation_loop_count += 1
|
|
|
+
|
|
|
last_gen_info = {
|
|
|
"prompt": prompt_used,
|
|
|
"image_path": gen_path,
|
|
|
- "feedback": evaluation_feedback
|
|
|
+ "feedback": content if content else "无反思内容"
|
|
|
}
|
|
|
+
|
|
|
+ prompt_history.append(last_gen_info)
|
|
|
+ try:
|
|
|
+ with open(history_file, "w", encoding="utf-8") as f:
|
|
|
+ json.dump(prompt_history, f, ensure_ascii=False, indent=2)
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[警告] 历史记录保存失败: {e}")
|
|
|
+ break # 跳出 tool_calls for loop 并进入下一大轮
|
|
|
else:
|
|
|
- last_gen_info = {
|
|
|
- "prompt": prompt_used,
|
|
|
- "image_path": gen_path,
|
|
|
- "feedback": f"系统提示:由于目标图 {target_image} 或生成图 {gen_path} 不存在,评估被跳过。"
|
|
|
- }
|
|
|
-
|
|
|
- # 记录到全局大历史中,供它长线参考防重踩坑
|
|
|
- prompt_history.append(last_gen_info)
|
|
|
- break # 跳出 tool_calls for loop
|
|
|
+ print(f"[战术回馈] 这是辅助素材,已将路径返回给 Agent1 继续思考。")
|
|
|
else:
|
|
|
# 没调工具
|
|
|
print("\n[控制中心] Agent 1 没有继续使用任何工具。结束其周期。")
|
|
|
@@ -346,10 +370,14 @@ async def main():
|
|
|
print("🏆 正在生成【专家最终多维度反馈报告】...")
|
|
|
print("="*50)
|
|
|
|
|
|
- first_gen = prompt_history[0]["image_path"]
|
|
|
- last_gen = prompt_history[-1]["image_path"]
|
|
|
+ first_gen_record = prompt_history[0]
|
|
|
+ last_gen_record = prompt_history[-1]
|
|
|
+
|
|
|
+ # 兼容旧版本的单图记录和新版本的多图记录
|
|
|
+ first_gen = first_gen_record.get("image_paths", [first_gen_record.get("image_path")])[0]
|
|
|
+ last_gen = last_gen_record.get("image_paths", [last_gen_record.get("image_path")])[0]
|
|
|
|
|
|
- if os.path.exists(first_gen) and os.path.exists(last_gen):
|
|
|
+ if first_gen and last_gen and os.path.exists(first_gen) and os.path.exists(last_gen):
|
|
|
try:
|
|
|
target_b64 = encode_image(target_image)
|
|
|
first_b64 = encode_image(first_gen)
|
|
|
@@ -382,9 +410,9 @@ async def main():
|
|
|
}
|
|
|
]
|
|
|
|
|
|
- response = await qwen_llm_call(
|
|
|
+ response = await gemini_llm_call(
|
|
|
messages=final_messages,
|
|
|
- model="qwen3.5-plus"
|
|
|
+ model="gemini-3.1-pro-preview"
|
|
|
)
|
|
|
print(f"\n[Agent 2] 📋 【最终多维度评估报告】:\n{response['content']}\n")
|
|
|
except Exception as e:
|