howard
/
Agent


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
							"""
图片还原 Prompt 生成系统
基于解构数据自动构建高质量的图片生成prompt
"""

import json
from pathlib import Path
from typing import Dict, List, Any


class PromptGenerator:
    """Prompt生成器"""
    
    def __init__(self, input_dir: str = "input/paragraphs"):
        self.input_dir = Path(input_dir)
        self.global_elements = self._load_global_elements()
        self.global_forms = self._load_global_forms()
    
    def _load_global_elements(self) -> List[Dict]:
        """加载全局实质元素（跨图聚合）"""
        file_path = self.input_dir / "03_图片制作点实质结果.json"
        with open(file_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    
    def _load_global_forms(self) -> List[Dict]:
        """加载全局形式特征（跨图聚合）"""
        file_path = self.input_dir / "04_图片制作点形式结果.json"
        with open(file_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    
    def _load_image_segment(self, image_num: int) -> Dict:
        """加载指定图片的分段数据"""
        # 查找对应的分段文件
        pattern = f"01_图片分段_{image_num:02d}_*.json"
        files = list(self.input_dir.glob(pattern))
        if not files:
            raise FileNotFoundError(f"未找到图片{image_num}的分段文件")
        
        with open(files[0], 'r', encoding='utf-8') as f:
            return json.load(f)
    
    def _load_image_form(self, image_num: int) -> Dict:
        """加载指定图片的形式分析"""
        file_path = self.input_dir / f"02_图片形式_{image_num:02d}.json"
        with open(file_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    
    def _extract_key_descriptions(self, segment_data: Dict, form_data: Dict) -> Dict[str, str]:
        """提取关键描述信息"""
        descriptions = {
            "scene": "",
            "person": "",
            "person_pose": "",
            "person_clothing": "",
            "person_hair": "",
            "easel": "",
            "palette": "",
            "background": "",
            "details": []
        }
        
        # 从分段数据提取
        sections = segment_data.get("sections", [])
        if sections:
            main_section = sections[0]
            descriptions["scene"] = main_section.get("描述", "")
            
            # 遍历子段落
            for sub in main_section.get("子段落", []):
                name = sub.get("名称", "")
                desc = sub.get("描述", "")
                
                if "人物" in name:
                    descriptions["person"] = desc
                elif "画架" in name:
                    descriptions["easel"] = desc
                elif "调色板" in name:
                    descriptions["palette"] = desc
                elif "背景" in name:
                    descriptions["background"] = desc
        
        # 从形式数据提取细节
        form_elements = form_data.get("form_elements", [])
        for elem_group in form_elements:
            for form in elem_group.get("形式", []):
                name = form.get("名称", "")
                desc = form.get("描述", "")
                
                if "人物姿态" in name:
                    descriptions["person_pose"] = desc
                elif "人物着装" in name:
                    descriptions["person_clothing"] = desc
                elif "人物发型" in name:
                    descriptions["person_hair"] = desc
                elif name not in ["人物", "背景", "画架", "调色板"]:
                    descriptions["details"].append(f"{name}: {desc}")
        
        return descriptions
    
    def _build_prompt_structure(self, descriptions: Dict[str, str], group_id: str) -> str:
        """
        构建结构化prompt
        顺序：[主体] + [描述性属性] + [场景环境] + [光照条件] + [情感氛围] + [构图方式] + [艺术风格]
        """
        prompt_parts = []
        
        # 1. 主体描述（人物 + 道具）
        if descriptions["person"]:
            prompt_parts.append(descriptions["person"])
        
        # 2. 描述性属性（姿态、着装）
        if descriptions["person_pose"]:
            # 简化姿态描述，提取关键动作
            pose_simplified = self._simplify_pose(descriptions["person_pose"])
            prompt_parts.append(pose_simplified)
        
        if descriptions["person_clothing"]:
            # 简化着装描述
            clothing_simplified = self._simplify_clothing(descriptions["person_clothing"])
            prompt_parts.append(clothing_simplified)
        
        # 3. 道具细节
        if "g1" in group_id or "g2" in group_id:  # 户外绘画场景
            if descriptions["easel"]:
                prompt_parts.append(descriptions["easel"])
            if descriptions["palette"]:
                prompt_parts.append(descriptions["palette"])
        
        # 4. 场景环境
        if descriptions["background"]:
            prompt_parts.append(descriptions["background"])
        
        # 5. 光照条件（从全局形式特征推断）
        prompt_parts.append("Natural outdoor lighting, bright and soft sunlight")
        
        # 6. 情感氛围
        if "g3" in group_id:  # 人物特写
            prompt_parts.append("Peaceful and serene atmosphere, eyes closed in contemplation")
        else:  # 绘画场景
            prompt_parts.append("Focused and creative atmosphere, artist at work")
        
        # 7. 艺术风格
        prompt_parts.append("Photorealistic style, high quality photography, professional composition")
        
        # 组合成完整prompt
        prompt = ". ".join(filter(None, prompt_parts)) + "."
        
        return prompt
    
    def _simplify_pose(self, pose_desc: str) -> str:
        """简化姿态描述，提取关键信息"""
        # 提取关键动作词
        key_actions = []
        if "站立" in pose_desc:
            key_actions.append("standing")
        if "侧身" in pose_desc or "侧向" in pose_desc:
            key_actions.append("side view")
        if "蹲" in pose_desc:
            key_actions.append("crouching")
        if "背对" in pose_desc:
            key_actions.append("back view")
        if "持画笔" in pose_desc:
            key_actions.append("holding a paintbrush")
        if "持调色板" in pose_desc or "托举调色板" in pose_desc:
            key_actions.append("holding a palette")
        
        return ", ".join(key_actions) if key_actions else pose_desc[:100]
    
    def _simplify_clothing(self, clothing_desc: str) -> str:
        """简化着装描述"""
        # 提取关键服饰信息
        simplified = []
        if "白色" in clothing_desc and "连衣裙" in clothing_desc:
            simplified.append("white dress")
        if "长袖" in clothing_desc:
            simplified.append("long sleeves")
        if "V字形领口" in clothing_desc or "V领" in clothing_desc:
            simplified.append("V-neck")
        
        return ", ".join(simplified) if simplified else clothing_desc[:100]
    
    def generate_prompt(self, image_num: int) -> Dict[str, Any]:
        """
        为指定图片生成prompt
        
        Args:
            image_num: 图片编号 (1-9)
        
        Returns:
            包含prompt和元数据的字典
        """
        # 加载数据
        segment_data = self._load_image_segment(image_num)
        form_data = self._load_image_form(image_num)
        
        # 确定分组
        segment_file = list(self.input_dir.glob(f"01_图片分段_{image_num:02d}_*.json"))[0]
        group_id = "g1"  # 默认
        if "g2" in segment_file.name:
            group_id = "g2"
        elif "g3" in segment_file.name:
            group_id = "g3"
        
        # 提取关键描述
        descriptions = self._extract_key_descriptions(segment_data, form_data)
        
        # 构建prompt
        prompt = self._build_prompt_structure(descriptions, group_id)
        
        # 确定图片尺寸（竖版）
        size = "1024x1792"  # DALL-E 3 竖版尺寸
        
        return {
            "image_num": image_num,
            "group_id": group_id,
            "prompt": prompt,
            "size": size,
            "quality": "hd",
            "descriptions": descriptions
        }
    
    def generate_all_prompts(self) -> List[Dict[str, Any]]:
        """生成所有9张图片的prompts"""
        prompts = []
        for i in range(1, 10):
            try:
                prompt_data = self.generate_prompt(i)
                prompts.append(prompt_data)
                print(f"✓ 图片 {i} prompt已生成")
            except Exception as e:
                print(f"✗ 图片 {i} 生成失败: {e}")
        
        return prompts
    
    def save_prompts(self, prompts: List[Dict], output_file: str = "output_1/prompts.json"):
        """保存生成的prompts到文件"""
        output_path = Path(output_file)
        output_path.parent.mkdir(parents=True, exist_ok=True)
        
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(prompts, f, ensure_ascii=False, indent=2)
        
        print(f"\n✓ Prompts已保存到: {output_path}")


def main():
    """主函数：生成并保存所有prompts"""
    print("=" * 60)
    print("图片还原 Prompt 生成系统")
    print("=" * 60)
    
    # 创建生成器
    generator = PromptGenerator()
    
    # 生成所有prompts
    print("\n开始生成prompts...")
    prompts = generator.generate_all_prompts()
    
    # 保存结果
    generator.save_prompts(prompts)
    
    # 打印预览
    print("\n" + "=" * 60)
    print("Prompt 预览（前3个）:")
    print("=" * 60)
    for i, p in enumerate(prompts[:3], 1):
        print(f"\n图片 {i} ({p['group_id']}):")
        print(f"Prompt: {p['prompt'][:200]}...")
    
    print("\n" + "=" * 60)
    print(f"✓ 完成！共生成 {len(prompts)} 个prompts")
    print("=" * 60)


if __name__ == "__main__":
    main()