howard
/
Agent


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
							"""
多模态特征提取脚本 - 写生油画图组
提取维度：
1. character_reference - 人物参考图（实质，nanobanana用）
2. pose_skeleton - 人体姿态骨架（DWPose，各图独立）
3. palette_texture - 调色板颜料质感（实质，裁剪图）
4. painting_tools - 绘画工具（实质，裁剪图）
5. natural_background - 自然背景（实质，rembg去主体）
6. depth_map - 深度图（形式，Depth Anything V2）
7. color_palette_text - 色彩调色板文字描述（形式）
"""

import os
import json
import warnings
warnings.filterwarnings('ignore')

import numpy as np
from PIL import Image, ImageDraw, ImageFont
import cv2

BASE_DIR = "/Users/liuxiaobai/Desktop/Agent/Agent/examples/find knowledge"
INPUT_DIR = os.path.join(BASE_DIR, "input")
OUTPUT_DIR = os.path.join(BASE_DIR, "output/features")

# 确保输出目录存在
for d in ['character_reference', 'pose_skeleton', 'palette_texture', 
          'painting_tools', 'natural_background', 'depth_map', 'color_palette_text']:
    os.makedirs(os.path.join(OUTPUT_DIR, d), exist_ok=True)

print("=" * 60)
print("步骤1: 加载所有图片")
print("=" * 60)

images = {}
for i in range(1, 10):
    path = os.path.join(INPUT_DIR, f"img_{i}.jpg")
    img = Image.open(path).convert("RGB")
    images[f"img_{i}"] = img
    print(f"  img_{i}: {img.size}")

# ============================================================
# 维度1: character_reference - 人物参考图
# 策略：从img_7（侧脸特写）提取最清晰的人物面部+身体参考
# 同时从img_6（背部特写）提取背影参考
# ============================================================
print("\n" + "=" * 60)
print("步骤2: 提取人物参考图 (character_reference)")
print("=" * 60)

# img_7是侧脸特写，最能体现人物面部特征
# img_6是背部+耳饰特写
# img_1是全身最完整的侧后方视角

# 保存关键参考图（不做任何修改，直接保存原图）
ref_imgs = {
    "img_7_face_reference": images["img_7"],  # 侧脸+玫瑰，最清晰面部
    "img_6_back_reference": images["img_6"],  # 背部特写+耳饰
    "img_1_full_reference": images["img_1"],  # 全身参考
}

for name, img in ref_imgs.items():
    out_path = os.path.join(OUTPUT_DIR, "character_reference", f"{name}.png")
    img.save(out_path)
    print(f"  保存: {name}.png ({img.size})")

# ============================================================
# 维度2: pose_skeleton - 人体姿态骨架 (DWPose)
# ============================================================
print("\n" + "=" * 60)
print("步骤3: 提取人体姿态骨架 (DWPose)")
print("=" * 60)

try:
    from controlnet_aux import DWposeDetector
    dwpose = DWposeDetector()
    print("  DWPose加载成功")
    
    # 对每张图提取姿态
    pose_imgs = ["img_1", "img_2", "img_3", "img_4", "img_8", "img_9"]  # 全身/半身图
    
    for img_id in pose_imgs:
        img = images[img_id]
        try:
            pose_result = dwpose(img, detect_resolution=512, image_resolution=img.size[0])
            out_path = os.path.join(OUTPUT_DIR, "pose_skeleton", f"{img_id}_dwpose.png")
            pose_result.save(out_path)
            print(f"  ✓ {img_id}: 姿态提取成功")
        except Exception as e:
            print(f"  ✗ {img_id}: {e}")
            # 降级：使用OpenPose
            try:
                from controlnet_aux import OpenposeDetector
                openpose = OpenposeDetector.from_pretrained('lllyasviel/ControlNet')
                pose_result = openpose(img, detect_resolution=512, image_resolution=img.size[0])
                out_path = os.path.join(OUTPUT_DIR, "pose_skeleton", f"{img_id}_openpose.png")
                pose_result.save(out_path)
                print(f"  ✓ {img_id}: OpenPose降级成功")
            except Exception as e2:
                print(f"  ✗ {img_id} OpenPose也失败: {e2}")

except Exception as e:
    print(f"  DWPose加载失败: {e}")
    print("  尝试OpenPose...")
    try:
        from controlnet_aux import OpenposeDetector
        openpose = OpenposeDetector.from_pretrained('lllyasviel/ControlNet')
        print("  OpenPose加载成功")
        
        pose_imgs = ["img_1", "img_2", "img_3", "img_4", "img_8", "img_9"]
        for img_id in pose_imgs:
            img = images[img_id]
            try:
                pose_result = openpose(img, detect_resolution=512, image_resolution=img.size[0])
                out_path = os.path.join(OUTPUT_DIR, "pose_skeleton", f"{img_id}_openpose.png")
                pose_result.save(out_path)
                print(f"  ✓ {img_id}: OpenPose成功")
            except Exception as e2:
                print(f"  ✗ {img_id}: {e2}")
    except Exception as e3:
        print(f"  OpenPose也失败: {e3}")

# ============================================================
# 维度3: palette_texture - 调色板颜料质感
# 策略：从img_5（调色板特写）裁剪调色板区域
# ============================================================
print("\n" + "=" * 60)
print("步骤4: 提取调色板颜料质感 (palette_texture)")
print("=" * 60)

# img_5是调色板最清晰的特写
# img_6也有调色板
palette_imgs = {
    "img_5_palette_closeup": images["img_5"],  # 调色板特写
    "img_6_palette_detail": images["img_6"],   # 作画特写含调色板
}

for name, img in palette_imgs.items():
    out_path = os.path.join(OUTPUT_DIR, "palette_texture", f"{name}.png")
    img.save(out_path)
    print(f"  保存: {name}.png")

# ============================================================
# 维度4: painting_tools - 绘画工具（画架+画布）
# 策略：从img_4（画架+空白画布最清晰）提取
# ============================================================
print("\n" + "=" * 60)
print("步骤5: 提取绘画工具参考 (painting_tools)")
print("=" * 60)

tool_imgs = {
    "img_4_easel_blank_canvas": images["img_4"],   # 画架+空白画布
    "img_8_easel_with_rose": images["img_8"],      # 画架+玫瑰花
    "img_3_easel_painting": images["img_3"],       # 画架+油画作品
}

for name, img in tool_imgs.items():
    out_path = os.path.join(OUTPUT_DIR, "painting_tools", f"{name}.png")
    img.save(out_path)
    print(f"  保存: {name}.png")

# ============================================================
# 维度5: natural_background - 自然背景
# 策略：使用rembg去除主体，保留背景
# ============================================================
print("\n" + "=" * 60)
print("步骤6: 提取自然背景 (natural_background)")
print("=" * 60)

try:
    from rembg import remove
    print("  rembg加载成功")
    
    # 选择背景最清晰的图片
    bg_imgs = ["img_9", "img_3", "img_1"]  # 背景占比大的图
    
    for img_id in bg_imgs:
        img = images[img_id]
        try:
            # 去除前景，保留背景
            result = remove(img)
            # 将透明区域填充为白色（前景位置），保留背景
            bg_array = np.array(result)
            # 创建背景蒙版：alpha=0的区域是前景（被去除的），alpha>0是背景
            # 实际上rembg去除背景，我们需要反向操作
            # 直接保存原图作为背景参考，并保存去背景版本
            
            # 保存原图（背景参考）
            out_path = os.path.join(OUTPUT_DIR, "natural_background", f"{img_id}_bg_reference.png")
            img.save(out_path)
            
            # 保存去主体版本（背景分离）
            out_path2 = os.path.join(OUTPUT_DIR, "natural_background", f"{img_id}_fg_removed.png")
            result.save(out_path2)
            print(f"  ✓ {img_id}: 背景提取成功")
        except Exception as e:
            print(f"  ✗ {img_id}: {e}")
            img.save(os.path.join(OUTPUT_DIR, "natural_background", f"{img_id}_bg_reference.png"))
            
except Exception as e:
    print(f"  rembg失败: {e}")
    # 降级：直接保存背景参考图
    for img_id in ["img_9", "img_3", "img_1"]:
        images[img_id].save(os.path.join(OUTPUT_DIR, "natural_background", f"{img_id}_bg_reference.png"))
        print(f"  降级保存: {img_id}")

# ============================================================
# 维度6: depth_map - 深度图 (Depth Anything V2)
# ============================================================
print("\n" + "=" * 60)
print("步骤7: 提取深度图 (Depth Anything)")
print("=" * 60)

try:
    from transformers import pipeline
    print("  加载Depth Anything V2...")
    
    # 使用Depth Anything V2 - 最新最强的单目深度估计模型
    depth_pipe = pipeline(
        task="depth-estimation",
        model="depth-anything/Depth-Anything-V2-Small-hf",
        device="cpu"
    )
    print("  Depth Anything V2加载成功")
    
    # 对所有图提取深度图
    for img_id, img in images.items():
        try:
            result = depth_pipe(img)
            depth_img = result["depth"]
            
            # 转换为可视化深度图
            depth_array = np.array(depth_img)
            # 归一化到0-255
            depth_norm = ((depth_array - depth_array.min()) / 
                         (depth_array.max() - depth_array.min()) * 255).astype(np.uint8)
            depth_visual = Image.fromarray(depth_norm)
            
            out_path = os.path.join(OUTPUT_DIR, "depth_map", f"{img_id}_depth.png")
            depth_visual.save(out_path)
            print(f"  ✓ {img_id}: 深度图提取成功")
        except Exception as e:
            print(f"  ✗ {img_id}: {e}")
            
except Exception as e:
    print(f"  Depth Anything失败: {e}")
    print("  尝试controlnet_aux的MiDaS...")
    try:
        from controlnet_aux import MidasDetector
        midas = MidasDetector.from_pretrained("lllyasviel/Annotators")
        print("  MiDaS加载成功")
        
        for img_id, img in images.items():
            try:
                depth_result = midas(img, detect_resolution=512, image_resolution=img.size[0])
                out_path = os.path.join(OUTPUT_DIR, "depth_map", f"{img_id}_midas_depth.png")
                depth_result.save(out_path)
                print(f"  ✓ {img_id}: MiDaS深度图成功")
            except Exception as e2:
                print(f"  ✗ {img_id}: {e2}")
    except Exception as e3:
        print(f"  MiDaS也失败: {e3}")

# ============================================================
# 维度7: color_palette_text - 色彩调色板（文字描述）
# 使用Python提取主色调，生成专业色彩描述
# ============================================================
print("\n" + "=" * 60)
print("步骤8: 提取色彩调色板 (color_palette_text)")
print("=" * 60)

def extract_color_palette(img, n_colors=8):
    """提取图片主色调"""
    img_small = img.resize((150, 150))
    img_array = np.array(img_small).reshape(-1, 3).astype(float)
    
    # K-means聚类
    from sklearn.cluster import KMeans
    kmeans = KMeans(n_clusters=n_colors, random_state=42, n_init=10)
    kmeans.fit(img_array)
    
    colors = kmeans.cluster_centers_.astype(int)
    labels = kmeans.labels_
    
    # 计算每个颜色的占比
    counts = np.bincount(labels)
    percentages = counts / len(labels) * 100
    
    # 按占比排序
    sorted_idx = np.argsort(percentages)[::-1]
    colors = colors[sorted_idx]
    percentages = percentages[sorted_idx]
    
    return colors, percentages

def rgb_to_hex(rgb):
    return f"#{rgb[0]:02X}{rgb[1]:02X}{rgb[2]:02X}"

def rgb_to_hsv_desc(rgb):
    """将RGB转为HSV并给出描述"""
    r, g, b = rgb[0]/255, rgb[1]/255, rgb[2]/255
    h, s, v = cv2.cvtColor(np.array([[[rgb[0], rgb[1], rgb[2]]]], dtype=np.uint8), 
                            cv2.COLOR_RGB2HSV)[0][0]
    
    # 色相描述
    if s < 30:
        if v < 50: hue_name = "black"
        elif v > 200: hue_name = "white"
        else: hue_name = "gray"
    elif h < 15 or h > 165: hue_name = "red"
    elif h < 30: hue_name = "orange"
    elif h < 45: hue_name = "yellow"
    elif h < 75: hue_name = "yellow-green"
    elif h < 105: hue_name = "green"
    elif h < 120: hue_name = "cyan-green"
    elif h < 135: hue_name = "cyan"
    elif h < 150: hue_name = "blue-cyan"
    elif h < 165: hue_name = "blue"
    else: hue_name = "purple"
    
    # 饱和度描述
    if s < 50: sat_name = "desaturated"
    elif s < 120: sat_name = "muted"
    elif s < 200: sat_name = "saturated"
    else: sat_name = "vivid"
    
    # 亮度描述
    if v < 80: val_name = "dark"
    elif v < 160: val_name = "mid-tone"
    else: val_name = "light"
    
    return f"{val_name} {sat_name} {hue_name}", int(h)*2, int(s/255*100), int(v/255*100)

try:
    from sklearn.cluster import KMeans
    
    color_data = {}
    
    for img_id, img in images.items():
        colors, percentages = extract_color_palette(img, n_colors=8)
        
        palette_info = []
        for i, (color, pct) in enumerate(zip(colors, percentages)):
            desc, h, s, v = rgb_to_hsv_desc(color)
            palette_info.append({
                "rank": i + 1,
                "hex": rgb_to_hex(color),
                "rgb": [int(color[0]), int(color[1]), int(color[2])],
                "hsv": {"h": h, "s": s, "v": v},
                "description": desc,
                "percentage": round(float(pct), 1)
            })
        
        color_data[img_id] = palette_info
        print(f"  ✓ {img_id}: 提取{len(palette_info)}个主色调")
        for p in palette_info[:3]:
            print(f"      {p['hex']} ({p['percentage']}%) - {p['description']}")
    
    # 保存色彩数据
    out_path = os.path.join(OUTPUT_DIR, "color_palette_text", "all_images_color_palette.json")
    with open(out_path, 'w', encoding='utf-8') as f:
        json.dump(color_data, f, ensure_ascii=False, indent=2)
    print(f"\n  色彩数据已保存: {out_path}")
    
    # 生成色彩可视化图
    for img_id, palette in color_data.items():
        palette_img = Image.new('RGB', (800, 120), 'white')
        draw = ImageDraw.Draw(palette_img)
        
        x = 0
        for p in palette[:8]:
            w = int(800 * p['percentage'] / 100)
            if w < 5: w = 5
            color_tuple = tuple(p['rgb'])
            draw.rectangle([x, 0, x+w, 80], fill=color_tuple)
            x += w
        
        out_path = os.path.join(OUTPUT_DIR, "color_palette_text", f"{img_id}_palette.png")
        palette_img.save(out_path)
    
    print("  色彩可视化图已保存")
    
except Exception as e:
    print(f"  色彩提取失败: {e}")

print("\n" + "=" * 60)
print("特征提取完成！")
print("=" * 60)

# 列出所有输出文件
for dim in os.listdir(OUTPUT_DIR):
    dim_path = os.path.join(OUTPUT_DIR, dim)
    if os.path.isdir(dim_path):
        files = os.listdir(dim_path)
        print(f"\n  {dim}/")
        for f in sorted(files):
            fpath = os.path.join(dim_path, f)
            size = os.path.getsize(fpath)
            print(f"    {f} ({size//1024}KB)")