howard
/
Agent


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
							"""
图像质量评估工具

输入：需求文档路径 + 图片路径（单图或多图）+ 质量标准（可选）
输出：评分 + 详细反馈

通过多模态 VL 大模型对生成图像进行质量评估：
- 单图模式：对照需求文档检查是否满足要求
- 多图模式：检查跨图一致性（角色、服装、色调等）
"""

import json
from pathlib import Path
from typing import Dict, Any, Optional, List, Union

from agent.tools import tool, ToolResult
from agent.llm import create_qwen_llm_call


@tool(
    display={
        "zh": {"name": "图像质量评估", "params": {
            "requirement_path": "需求文档路径",
            "image_paths": "图片路径（单个字符串或列表）",
            "quality_criteria": "质量标准（可选）"
        }},
        "en": {"name": "Image Quality Evaluation", "params": {
            "requirement_path": "Requirement document path",
            "image_paths": "Image path(s) (string or list)",
            "quality_criteria": "Quality criteria (optional)"
        }},
    }
)
async def evaluate_image(
    requirement_path: str,
    image_paths: Union[str, List[str]],
    quality_criteria: Optional[str] = None
) -> ToolResult:
    """评估生成图像是否满足需求文档的要求

    使用多模态 VL 大模型对生成图像进行质量评估：

    **单图模式**（传入单个路径字符串）：
    - 姿态、服装、光影、背景等是否符合规格
    - 材质、细节的真实感
    - 整体构图和色调

    **多图模式**（传入路径列表）：
    - 检查跨图一致性：角色外观、服装款式、色调风格是否统一
    - 识别不一致的图片并给出修复建议

    Args:
        requirement_path: 需求文档路径（JSON 或文本文件）
        image_paths: 待评估的图片路径（单个字符串或路径列表）
        quality_criteria: 额外的质量标准描述（可选）

    Returns:
        ToolResult 包含评分（0-10）和详细反馈
    """
    # 统一处理为列表
    if isinstance(image_paths, str):
        paths_list = [image_paths]
        is_multi_image = False
    else:
        paths_list = image_paths
        is_multi_image = len(paths_list) > 1
    # 1. 读取需求文档
    req_path = Path(requirement_path)
    if not req_path.exists():
        return ToolResult(
            title="评估失败",
            output="",
            error=f"需求文档不存在: {requirement_path}",
        )

    requirement_text = req_path.read_text(encoding="utf-8")

    # 如果是 JSON，尝试智能提取内容
    requirement_summary = requirement_text
    if requirement_path.endswith(".json"):
        try:
            req_data = json.loads(requirement_text)
            parts = []
            # 单图需求
            if "required_spec" in req_data:
                parts.append("## 单图需求\n" + json.dumps(req_data["required_spec"], ensure_ascii=False, indent=2))
            if "prompt" in req_data:
                parts.append(f"Prompt: {req_data['prompt']}")
            # 多图一致性需求
            if "consistency_checks" in req_data:
                parts.append("## 一致性检查标准\n" + json.dumps(req_data["consistency_checks"], ensure_ascii=False, indent=2))
            # 多图各自的需求（pipeline.json 整体）
            if "images" in req_data:
                img_specs = {}
                for img_id, img_data in req_data["images"].items():
                    if "required_spec" in img_data:
                        img_specs[img_id] = img_data["required_spec"]
                if img_specs:
                    parts.append("## 各图需求规格\n" + json.dumps(img_specs, ensure_ascii=False, indent=2))
            if parts:
                requirement_summary = "\n\n".join(parts)
        except:
            pass

    # 2. 检查所有图片文件
    import base64
    image_contents = []
    missing_files = []
    for p_str in paths_list:
        p = Path(p_str)
        if not p.exists():
            missing_files.append(p_str)
            continue
        img_bytes = p.read_bytes()
        img_b64 = base64.b64encode(img_bytes).decode("utf-8")
        mime_type = "image/png"
        if p.suffix.lower() in (".jpg", ".jpeg"):
            mime_type = "image/jpeg"
        elif p.suffix.lower() == ".webp":
            mime_type = "image/webp"
        image_contents.append({
            "path": p_str,
            "b64": img_b64,
            "mime": mime_type,
        })

    if missing_files:
        return ToolResult(
            title="评估失败",
            output="",
            error=f"以下图片文件不存在: {', '.join(missing_files)}",
        )
    if not image_contents:
        return ToolResult(
            title="评估失败",
            output="",
            error="没有可评估的图片",
        )

    # 3. 构建评估 prompt（根据模式不同）
    if is_multi_image:
        image_labels = "\n".join([f"- 图片 {i+1}: {ic['path']}" for i, ic in enumerate(image_contents)])
        eval_prompt = f"""你是一个专业的图像质量评估专家。请对以下 {len(image_contents)} 张生成图像进行评估，重点检查**跨图一致性**。

## 需求文档
{requirement_summary}

## 图片列表
{image_labels}

## 质量标准
{quality_criteria if quality_criteria else "按照需求文档中的一致性检查标准进行评估"}

## 评估维度

### A. 跨图一致性（每项 0-10 分）
1. **角色一致性**：所有图中的人物面部特征、发型、肤色是否保持一致
2. **服装一致性**：白色长裙的款式、材质、颜色是否 100% 统一
3. **色调一致性**：白绿配色方案、色彩饱和度是否贯穿所有图像
4. **光影一致性**：逆光/轮廓光方向、光晕效果是否统一
5. **风格一致性**：摄影风格、镜头参数感（85mm、f/1.8 景深）是否统一

### B. 单图质量（每张图 0-10 分）
对每张图分别给出质量评分。

## 输出格式
请严格按照以下 JSON 格式输出：

```json
{{
  "overall_score": <0-10 的总分>,
  "consistency_scores": {{
    "character": <0-10>,
    "clothing": <0-10>,
    "color_scheme": <0-10>,
    "lighting": <0-10>,
    "style": <0-10>
  }},
  "per_image_scores": {{
    "图片1": <0-10>,
    "图片2": <0-10>
  }},
  "inconsistent_images": ["<列出不一致的图片编号及问题>"],
  "feedback": "<详细的文字反馈，指出一致性的优点和不足>",
  "suggestions": "<改进建议，哪些图需要重新生成、怎么调整>"
}}
```

请仔细对比所有图像，给出客观、专业的评估。"""

    else:
        eval_prompt = f"""你是一个专业的图像质量评估专家。请根据以下需求文档，对生成的图像进行详细评估。

## 需求文档
{requirement_summary}

## 质量标准
{quality_criteria if quality_criteria else "按照需求文档中的 required_spec 和 prompt 描述进行评估"}

## 评估维度
请从以下维度评估图像质量（每项 0-10 分）：

1. **姿态准确性**：人物姿态是否符合需求描述
2. **服装还原度**：服装款式、材质、细节是否符合要求
3. **光影效果**：光线方向、强度、轮廓光等是否符合描述
4. **背景一致性**：背景元素、虚化效果是否符合要求
5. **材质真实感**：服装、道具的材质是否真实自然
6. **整体构图**：构图、色调、氛围是否符合预期

## 输出格式
请严格按照以下 JSON 格式输出评估结果：

```json
{{
  "overall_score": <0-10 的总分>,
  "dimension_scores": {{
    "pose": <0-10>,
    "clothing": <0-10>,
    "lighting": <0-10>,
    "background": <0-10>,
    "material": <0-10>,
    "composition": <0-10>
  }},
  "feedback": "<详细的文字反馈，指出优点和不足>",
  "suggestions": "<改进建议，如需调整哪些参数或换用哪些工具>"
}}
```

请仔细观察图像，给出客观、专业的评估。"""

    # 4. 构建多模态消息
    content_parts = [{"type": "text", "text": eval_prompt}]
    for ic in image_contents:
        content_parts.append({
            "type": "image_url",
            "image_url": {
                "url": f"data:{ic['mime']};base64,{ic['b64']}"
            }
        })

    messages = [{"role": "user", "content": content_parts}]

    # 5. 调用 VL 模型
    try:
        llm_call = create_qwen_llm_call(model="qwen-vl-max")
        response = await llm_call(messages, model="qwen-vl-max", temperature=0.3)

        # 6. 解析评估结果
        response_text = response["content"].strip()

        # 提取 JSON
        if "```json" in response_text:
            json_start = response_text.find("```json") + 7
            json_end = response_text.find("```", json_start)
            json_str = response_text[json_start:json_end].strip()
        elif "```" in response_text:
            json_start = response_text.find("```") + 3
            json_end = response_text.find("```", json_start)
            json_str = response_text[json_start:json_end].strip()
        else:
            json_str = response_text

        eval_result = json.loads(json_str)

        # 7. 格式化输出
        output = {
            "mode": "multi_image_consistency" if is_multi_image else "single_image",
            "requirement_path": requirement_path,
            "image_paths": paths_list,
            "evaluation": eval_result,
        }

        overall_score = eval_result.get("overall_score", 0)
        image_count = len(paths_list)

        if is_multi_image:
            title = f"多图一致性评估完成（{image_count} 张，总分: {overall_score}/10）"
            memory = f"Consistency evaluation of {image_count} images: score={overall_score}/10"
        else:
            title = f"图像评估完成（总分: {overall_score}/10）"
            memory = f"Evaluated {paths_list[0]}: score={overall_score}/10"

        return ToolResult(
            title=title,
            output=json.dumps(output, ensure_ascii=False, indent=2),
            long_term_memory=memory,
        )

    except json.JSONDecodeError as e:
        return ToolResult(
            title="评估完成（JSON 解析失败，返回原始文本）",
            output=f"LLM 返回内容：\n{response_text}",
            error=f"无法解析 LLM 返回的 JSON: {e}",
        )
    except Exception as e:
        return ToolResult(
            title="评估失败",
            output="",
            error=f"评估过程出错: {e}",
        )