| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300 |
- """
- 图像质量评估工具
- 输入:需求文档路径 + 图片路径(单图或多图)+ 质量标准(可选)
- 输出:评分 + 详细反馈
- 通过多模态 VL 大模型对生成图像进行质量评估:
- - 单图模式:对照需求文档检查是否满足要求
- - 多图模式:检查跨图一致性(角色、服装、色调等)
- """
- import json
- from pathlib import Path
- from typing import Dict, Any, Optional, List, Union
- from agent.tools import tool, ToolResult
- from agent.llm import create_qwen_llm_call
- @tool(
- display={
- "zh": {"name": "图像质量评估", "params": {
- "requirement_path": "需求文档路径",
- "image_paths": "图片路径(单个字符串或列表)",
- "quality_criteria": "质量标准(可选)"
- }},
- "en": {"name": "Image Quality Evaluation", "params": {
- "requirement_path": "Requirement document path",
- "image_paths": "Image path(s) (string or list)",
- "quality_criteria": "Quality criteria (optional)"
- }},
- }
- )
- async def evaluate_image(
- requirement_path: str,
- image_paths: Union[str, List[str]],
- quality_criteria: Optional[str] = None
- ) -> ToolResult:
- """评估生成图像是否满足需求文档的要求
- 使用多模态 VL 大模型对生成图像进行质量评估:
- **单图模式**(传入单个路径字符串):
- - 姿态、服装、光影、背景等是否符合规格
- - 材质、细节的真实感
- - 整体构图和色调
- **多图模式**(传入路径列表):
- - 检查跨图一致性:角色外观、服装款式、色调风格是否统一
- - 识别不一致的图片并给出修复建议
- Args:
- requirement_path: 需求文档路径(JSON 或文本文件)
- image_paths: 待评估的图片路径(单个字符串或路径列表)
- quality_criteria: 额外的质量标准描述(可选)
- Returns:
- ToolResult 包含评分(0-10)和详细反馈
- """
- # 统一处理为列表
- if isinstance(image_paths, str):
- paths_list = [image_paths]
- is_multi_image = False
- else:
- paths_list = image_paths
- is_multi_image = len(paths_list) > 1
- # 1. 读取需求文档
- req_path = Path(requirement_path)
- if not req_path.exists():
- return ToolResult(
- title="评估失败",
- output="",
- error=f"需求文档不存在: {requirement_path}",
- )
- requirement_text = req_path.read_text(encoding="utf-8")
- # 如果是 JSON,尝试智能提取内容
- requirement_summary = requirement_text
- if requirement_path.endswith(".json"):
- try:
- req_data = json.loads(requirement_text)
- parts = []
- # 单图需求
- if "required_spec" in req_data:
- parts.append("## 单图需求\n" + json.dumps(req_data["required_spec"], ensure_ascii=False, indent=2))
- if "prompt" in req_data:
- parts.append(f"Prompt: {req_data['prompt']}")
- # 多图一致性需求
- if "consistency_checks" in req_data:
- parts.append("## 一致性检查标准\n" + json.dumps(req_data["consistency_checks"], ensure_ascii=False, indent=2))
- # 多图各自的需求(pipeline.json 整体)
- if "images" in req_data:
- img_specs = {}
- for img_id, img_data in req_data["images"].items():
- if "required_spec" in img_data:
- img_specs[img_id] = img_data["required_spec"]
- if img_specs:
- parts.append("## 各图需求规格\n" + json.dumps(img_specs, ensure_ascii=False, indent=2))
- if parts:
- requirement_summary = "\n\n".join(parts)
- except:
- pass
- # 2. 检查所有图片文件
- import base64
- image_contents = []
- missing_files = []
- for p_str in paths_list:
- p = Path(p_str)
- if not p.exists():
- missing_files.append(p_str)
- continue
- img_bytes = p.read_bytes()
- img_b64 = base64.b64encode(img_bytes).decode("utf-8")
- mime_type = "image/png"
- if p.suffix.lower() in (".jpg", ".jpeg"):
- mime_type = "image/jpeg"
- elif p.suffix.lower() == ".webp":
- mime_type = "image/webp"
- image_contents.append({
- "path": p_str,
- "b64": img_b64,
- "mime": mime_type,
- })
- if missing_files:
- return ToolResult(
- title="评估失败",
- output="",
- error=f"以下图片文件不存在: {', '.join(missing_files)}",
- )
- if not image_contents:
- return ToolResult(
- title="评估失败",
- output="",
- error="没有可评估的图片",
- )
- # 3. 构建评估 prompt(根据模式不同)
- if is_multi_image:
- image_labels = "\n".join([f"- 图片 {i+1}: {ic['path']}" for i, ic in enumerate(image_contents)])
- eval_prompt = f"""你是一个专业的图像质量评估专家。请对以下 {len(image_contents)} 张生成图像进行评估,重点检查**跨图一致性**。
- ## 需求文档
- {requirement_summary}
- ## 图片列表
- {image_labels}
- ## 质量标准
- {quality_criteria if quality_criteria else "按照需求文档中的一致性检查标准进行评估"}
- ## 评估维度
- ### A. 跨图一致性(每项 0-10 分)
- 1. **角色一致性**:所有图中的人物面部特征、发型、肤色是否保持一致
- 2. **服装一致性**:白色长裙的款式、材质、颜色是否 100% 统一
- 3. **色调一致性**:白绿配色方案、色彩饱和度是否贯穿所有图像
- 4. **光影一致性**:逆光/轮廓光方向、光晕效果是否统一
- 5. **风格一致性**:摄影风格、镜头参数感(85mm、f/1.8 景深)是否统一
- ### B. 单图质量(每张图 0-10 分)
- 对每张图分别给出质量评分。
- ## 输出格式
- 请严格按照以下 JSON 格式输出:
- ```json
- {{
- "overall_score": <0-10 的总分>,
- "consistency_scores": {{
- "character": <0-10>,
- "clothing": <0-10>,
- "color_scheme": <0-10>,
- "lighting": <0-10>,
- "style": <0-10>
- }},
- "per_image_scores": {{
- "图片1": <0-10>,
- "图片2": <0-10>
- }},
- "inconsistent_images": ["<列出不一致的图片编号及问题>"],
- "feedback": "<详细的文字反馈,指出一致性的优点和不足>",
- "suggestions": "<改进建议,哪些图需要重新生成、怎么调整>"
- }}
- ```
- 请仔细对比所有图像,给出客观、专业的评估。"""
- else:
- eval_prompt = f"""你是一个专业的图像质量评估专家。请根据以下需求文档,对生成的图像进行详细评估。
- ## 需求文档
- {requirement_summary}
- ## 质量标准
- {quality_criteria if quality_criteria else "按照需求文档中的 required_spec 和 prompt 描述进行评估"}
- ## 评估维度
- 请从以下维度评估图像质量(每项 0-10 分):
- 1. **姿态准确性**:人物姿态是否符合需求描述
- 2. **服装还原度**:服装款式、材质、细节是否符合要求
- 3. **光影效果**:光线方向、强度、轮廓光等是否符合描述
- 4. **背景一致性**:背景元素、虚化效果是否符合要求
- 5. **材质真实感**:服装、道具的材质是否真实自然
- 6. **整体构图**:构图、色调、氛围是否符合预期
- ## 输出格式
- 请严格按照以下 JSON 格式输出评估结果:
- ```json
- {{
- "overall_score": <0-10 的总分>,
- "dimension_scores": {{
- "pose": <0-10>,
- "clothing": <0-10>,
- "lighting": <0-10>,
- "background": <0-10>,
- "material": <0-10>,
- "composition": <0-10>
- }},
- "feedback": "<详细的文字反馈,指出优点和不足>",
- "suggestions": "<改进建议,如需调整哪些参数或换用哪些工具>"
- }}
- ```
- 请仔细观察图像,给出客观、专业的评估。"""
- # 4. 构建多模态消息
- content_parts = [{"type": "text", "text": eval_prompt}]
- for ic in image_contents:
- content_parts.append({
- "type": "image_url",
- "image_url": {
- "url": f"data:{ic['mime']};base64,{ic['b64']}"
- }
- })
- messages = [{"role": "user", "content": content_parts}]
- # 5. 调用 VL 模型
- try:
- llm_call = create_qwen_llm_call(model="qwen-vl-max")
- response = await llm_call(messages, model="qwen-vl-max", temperature=0.3)
- # 6. 解析评估结果
- response_text = response["content"].strip()
- # 提取 JSON
- if "```json" in response_text:
- json_start = response_text.find("```json") + 7
- json_end = response_text.find("```", json_start)
- json_str = response_text[json_start:json_end].strip()
- elif "```" in response_text:
- json_start = response_text.find("```") + 3
- json_end = response_text.find("```", json_start)
- json_str = response_text[json_start:json_end].strip()
- else:
- json_str = response_text
- eval_result = json.loads(json_str)
- # 7. 格式化输出
- output = {
- "mode": "multi_image_consistency" if is_multi_image else "single_image",
- "requirement_path": requirement_path,
- "image_paths": paths_list,
- "evaluation": eval_result,
- }
- overall_score = eval_result.get("overall_score", 0)
- image_count = len(paths_list)
- if is_multi_image:
- title = f"多图一致性评估完成({image_count} 张,总分: {overall_score}/10)"
- memory = f"Consistency evaluation of {image_count} images: score={overall_score}/10"
- else:
- title = f"图像评估完成(总分: {overall_score}/10)"
- memory = f"Evaluated {paths_list[0]}: score={overall_score}/10"
- return ToolResult(
- title=title,
- output=json.dumps(output, ensure_ascii=False, indent=2),
- long_term_memory=memory,
- )
- except json.JSONDecodeError as e:
- return ToolResult(
- title="评估完成(JSON 解析失败,返回原始文本)",
- output=f"LLM 返回内容:\n{response_text}",
- error=f"无法解析 LLM 返回的 JSON: {e}",
- )
- except Exception as e:
- return ToolResult(
- title="评估失败",
- output="",
- error=f"评估过程出错: {e}",
- )
|