""" 多模态特征提取脚本 - 写生油画图组 提取维度: 1. character_reference - 人物参考图(实质,nanobanana用) 2. pose_skeleton - 人体姿态骨架(DWPose,各图独立) 3. palette_texture - 调色板颜料质感(实质,裁剪图) 4. painting_tools - 绘画工具(实质,裁剪图) 5. natural_background - 自然背景(实质,rembg去主体) 6. depth_map - 深度图(形式,Depth Anything V2) 7. color_palette_text - 色彩调色板文字描述(形式) """ import os import json import warnings warnings.filterwarnings('ignore') import numpy as np from PIL import Image, ImageDraw, ImageFont import cv2 BASE_DIR = "/Users/liuxiaobai/Desktop/Agent/Agent/examples/find knowledge" INPUT_DIR = os.path.join(BASE_DIR, "input") OUTPUT_DIR = os.path.join(BASE_DIR, "output/features") # 确保输出目录存在 for d in ['character_reference', 'pose_skeleton', 'palette_texture', 'painting_tools', 'natural_background', 'depth_map', 'color_palette_text']: os.makedirs(os.path.join(OUTPUT_DIR, d), exist_ok=True) print("=" * 60) print("步骤1: 加载所有图片") print("=" * 60) images = {} for i in range(1, 10): path = os.path.join(INPUT_DIR, f"img_{i}.jpg") img = Image.open(path).convert("RGB") images[f"img_{i}"] = img print(f" img_{i}: {img.size}") # ============================================================ # 维度1: character_reference - 人物参考图 # 策略:从img_7(侧脸特写)提取最清晰的人物面部+身体参考 # 同时从img_6(背部特写)提取背影参考 # ============================================================ print("\n" + "=" * 60) print("步骤2: 提取人物参考图 (character_reference)") print("=" * 60) # img_7是侧脸特写,最能体现人物面部特征 # img_6是背部+耳饰特写 # img_1是全身最完整的侧后方视角 # 保存关键参考图(不做任何修改,直接保存原图) ref_imgs = { "img_7_face_reference": images["img_7"], # 侧脸+玫瑰,最清晰面部 "img_6_back_reference": images["img_6"], # 背部特写+耳饰 "img_1_full_reference": images["img_1"], # 全身参考 } for name, img in ref_imgs.items(): out_path = os.path.join(OUTPUT_DIR, "character_reference", f"{name}.png") img.save(out_path) print(f" 保存: {name}.png ({img.size})") # ============================================================ # 维度2: pose_skeleton - 人体姿态骨架 (DWPose) # ============================================================ print("\n" + "=" * 60) print("步骤3: 提取人体姿态骨架 (DWPose)") print("=" * 60) try: from controlnet_aux import DWposeDetector dwpose = DWposeDetector() print(" DWPose加载成功") # 对每张图提取姿态 pose_imgs = ["img_1", "img_2", "img_3", "img_4", "img_8", "img_9"] # 全身/半身图 for img_id in pose_imgs: img = images[img_id] try: pose_result = dwpose(img, detect_resolution=512, image_resolution=img.size[0]) out_path = os.path.join(OUTPUT_DIR, "pose_skeleton", f"{img_id}_dwpose.png") pose_result.save(out_path) print(f" ✓ {img_id}: 姿态提取成功") except Exception as e: print(f" ✗ {img_id}: {e}") # 降级:使用OpenPose try: from controlnet_aux import OpenposeDetector openpose = OpenposeDetector.from_pretrained('lllyasviel/ControlNet') pose_result = openpose(img, detect_resolution=512, image_resolution=img.size[0]) out_path = os.path.join(OUTPUT_DIR, "pose_skeleton", f"{img_id}_openpose.png") pose_result.save(out_path) print(f" ✓ {img_id}: OpenPose降级成功") except Exception as e2: print(f" ✗ {img_id} OpenPose也失败: {e2}") except Exception as e: print(f" DWPose加载失败: {e}") print(" 尝试OpenPose...") try: from controlnet_aux import OpenposeDetector openpose = OpenposeDetector.from_pretrained('lllyasviel/ControlNet') print(" OpenPose加载成功") pose_imgs = ["img_1", "img_2", "img_3", "img_4", "img_8", "img_9"] for img_id in pose_imgs: img = images[img_id] try: pose_result = openpose(img, detect_resolution=512, image_resolution=img.size[0]) out_path = os.path.join(OUTPUT_DIR, "pose_skeleton", f"{img_id}_openpose.png") pose_result.save(out_path) print(f" ✓ {img_id}: OpenPose成功") except Exception as e2: print(f" ✗ {img_id}: {e2}") except Exception as e3: print(f" OpenPose也失败: {e3}") # ============================================================ # 维度3: palette_texture - 调色板颜料质感 # 策略:从img_5(调色板特写)裁剪调色板区域 # ============================================================ print("\n" + "=" * 60) print("步骤4: 提取调色板颜料质感 (palette_texture)") print("=" * 60) # img_5是调色板最清晰的特写 # img_6也有调色板 palette_imgs = { "img_5_palette_closeup": images["img_5"], # 调色板特写 "img_6_palette_detail": images["img_6"], # 作画特写含调色板 } for name, img in palette_imgs.items(): out_path = os.path.join(OUTPUT_DIR, "palette_texture", f"{name}.png") img.save(out_path) print(f" 保存: {name}.png") # ============================================================ # 维度4: painting_tools - 绘画工具(画架+画布) # 策略:从img_4(画架+空白画布最清晰)提取 # ============================================================ print("\n" + "=" * 60) print("步骤5: 提取绘画工具参考 (painting_tools)") print("=" * 60) tool_imgs = { "img_4_easel_blank_canvas": images["img_4"], # 画架+空白画布 "img_8_easel_with_rose": images["img_8"], # 画架+玫瑰花 "img_3_easel_painting": images["img_3"], # 画架+油画作品 } for name, img in tool_imgs.items(): out_path = os.path.join(OUTPUT_DIR, "painting_tools", f"{name}.png") img.save(out_path) print(f" 保存: {name}.png") # ============================================================ # 维度5: natural_background - 自然背景 # 策略:使用rembg去除主体,保留背景 # ============================================================ print("\n" + "=" * 60) print("步骤6: 提取自然背景 (natural_background)") print("=" * 60) try: from rembg import remove print(" rembg加载成功") # 选择背景最清晰的图片 bg_imgs = ["img_9", "img_3", "img_1"] # 背景占比大的图 for img_id in bg_imgs: img = images[img_id] try: # 去除前景,保留背景 result = remove(img) # 将透明区域填充为白色(前景位置),保留背景 bg_array = np.array(result) # 创建背景蒙版:alpha=0的区域是前景(被去除的),alpha>0是背景 # 实际上rembg去除背景,我们需要反向操作 # 直接保存原图作为背景参考,并保存去背景版本 # 保存原图(背景参考) out_path = os.path.join(OUTPUT_DIR, "natural_background", f"{img_id}_bg_reference.png") img.save(out_path) # 保存去主体版本(背景分离) out_path2 = os.path.join(OUTPUT_DIR, "natural_background", f"{img_id}_fg_removed.png") result.save(out_path2) print(f" ✓ {img_id}: 背景提取成功") except Exception as e: print(f" ✗ {img_id}: {e}") img.save(os.path.join(OUTPUT_DIR, "natural_background", f"{img_id}_bg_reference.png")) except Exception as e: print(f" rembg失败: {e}") # 降级:直接保存背景参考图 for img_id in ["img_9", "img_3", "img_1"]: images[img_id].save(os.path.join(OUTPUT_DIR, "natural_background", f"{img_id}_bg_reference.png")) print(f" 降级保存: {img_id}") # ============================================================ # 维度6: depth_map - 深度图 (Depth Anything V2) # ============================================================ print("\n" + "=" * 60) print("步骤7: 提取深度图 (Depth Anything)") print("=" * 60) try: from transformers import pipeline print(" 加载Depth Anything V2...") # 使用Depth Anything V2 - 最新最强的单目深度估计模型 depth_pipe = pipeline( task="depth-estimation", model="depth-anything/Depth-Anything-V2-Small-hf", device="cpu" ) print(" Depth Anything V2加载成功") # 对所有图提取深度图 for img_id, img in images.items(): try: result = depth_pipe(img) depth_img = result["depth"] # 转换为可视化深度图 depth_array = np.array(depth_img) # 归一化到0-255 depth_norm = ((depth_array - depth_array.min()) / (depth_array.max() - depth_array.min()) * 255).astype(np.uint8) depth_visual = Image.fromarray(depth_norm) out_path = os.path.join(OUTPUT_DIR, "depth_map", f"{img_id}_depth.png") depth_visual.save(out_path) print(f" ✓ {img_id}: 深度图提取成功") except Exception as e: print(f" ✗ {img_id}: {e}") except Exception as e: print(f" Depth Anything失败: {e}") print(" 尝试controlnet_aux的MiDaS...") try: from controlnet_aux import MidasDetector midas = MidasDetector.from_pretrained("lllyasviel/Annotators") print(" MiDaS加载成功") for img_id, img in images.items(): try: depth_result = midas(img, detect_resolution=512, image_resolution=img.size[0]) out_path = os.path.join(OUTPUT_DIR, "depth_map", f"{img_id}_midas_depth.png") depth_result.save(out_path) print(f" ✓ {img_id}: MiDaS深度图成功") except Exception as e2: print(f" ✗ {img_id}: {e2}") except Exception as e3: print(f" MiDaS也失败: {e3}") # ============================================================ # 维度7: color_palette_text - 色彩调色板(文字描述) # 使用Python提取主色调,生成专业色彩描述 # ============================================================ print("\n" + "=" * 60) print("步骤8: 提取色彩调色板 (color_palette_text)") print("=" * 60) def extract_color_palette(img, n_colors=8): """提取图片主色调""" img_small = img.resize((150, 150)) img_array = np.array(img_small).reshape(-1, 3).astype(float) # K-means聚类 from sklearn.cluster import KMeans kmeans = KMeans(n_clusters=n_colors, random_state=42, n_init=10) kmeans.fit(img_array) colors = kmeans.cluster_centers_.astype(int) labels = kmeans.labels_ # 计算每个颜色的占比 counts = np.bincount(labels) percentages = counts / len(labels) * 100 # 按占比排序 sorted_idx = np.argsort(percentages)[::-1] colors = colors[sorted_idx] percentages = percentages[sorted_idx] return colors, percentages def rgb_to_hex(rgb): return f"#{rgb[0]:02X}{rgb[1]:02X}{rgb[2]:02X}" def rgb_to_hsv_desc(rgb): """将RGB转为HSV并给出描述""" r, g, b = rgb[0]/255, rgb[1]/255, rgb[2]/255 h, s, v = cv2.cvtColor(np.array([[[rgb[0], rgb[1], rgb[2]]]], dtype=np.uint8), cv2.COLOR_RGB2HSV)[0][0] # 色相描述 if s < 30: if v < 50: hue_name = "black" elif v > 200: hue_name = "white" else: hue_name = "gray" elif h < 15 or h > 165: hue_name = "red" elif h < 30: hue_name = "orange" elif h < 45: hue_name = "yellow" elif h < 75: hue_name = "yellow-green" elif h < 105: hue_name = "green" elif h < 120: hue_name = "cyan-green" elif h < 135: hue_name = "cyan" elif h < 150: hue_name = "blue-cyan" elif h < 165: hue_name = "blue" else: hue_name = "purple" # 饱和度描述 if s < 50: sat_name = "desaturated" elif s < 120: sat_name = "muted" elif s < 200: sat_name = "saturated" else: sat_name = "vivid" # 亮度描述 if v < 80: val_name = "dark" elif v < 160: val_name = "mid-tone" else: val_name = "light" return f"{val_name} {sat_name} {hue_name}", int(h)*2, int(s/255*100), int(v/255*100) try: from sklearn.cluster import KMeans color_data = {} for img_id, img in images.items(): colors, percentages = extract_color_palette(img, n_colors=8) palette_info = [] for i, (color, pct) in enumerate(zip(colors, percentages)): desc, h, s, v = rgb_to_hsv_desc(color) palette_info.append({ "rank": i + 1, "hex": rgb_to_hex(color), "rgb": [int(color[0]), int(color[1]), int(color[2])], "hsv": {"h": h, "s": s, "v": v}, "description": desc, "percentage": round(float(pct), 1) }) color_data[img_id] = palette_info print(f" ✓ {img_id}: 提取{len(palette_info)}个主色调") for p in palette_info[:3]: print(f" {p['hex']} ({p['percentage']}%) - {p['description']}") # 保存色彩数据 out_path = os.path.join(OUTPUT_DIR, "color_palette_text", "all_images_color_palette.json") with open(out_path, 'w', encoding='utf-8') as f: json.dump(color_data, f, ensure_ascii=False, indent=2) print(f"\n 色彩数据已保存: {out_path}") # 生成色彩可视化图 for img_id, palette in color_data.items(): palette_img = Image.new('RGB', (800, 120), 'white') draw = ImageDraw.Draw(palette_img) x = 0 for p in palette[:8]: w = int(800 * p['percentage'] / 100) if w < 5: w = 5 color_tuple = tuple(p['rgb']) draw.rectangle([x, 0, x+w, 80], fill=color_tuple) x += w out_path = os.path.join(OUTPUT_DIR, "color_palette_text", f"{img_id}_palette.png") palette_img.save(out_path) print(" 色彩可视化图已保存") except Exception as e: print(f" 色彩提取失败: {e}") print("\n" + "=" * 60) print("特征提取完成!") print("=" * 60) # 列出所有输出文件 for dim in os.listdir(OUTPUT_DIR): dim_path = os.path.join(OUTPUT_DIR, dim) if os.path.isdir(dim_path): files = os.listdir(dim_path) print(f"\n {dim}/") for f in sorted(files): fpath = os.path.join(dim_path, f) size = os.path.getsize(fpath) print(f" {f} ({size//1024}KB)")