| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399 |
- """
- 多模态特征提取脚本 - 写生油画图组
- 提取维度:
- 1. character_reference - 人物参考图(实质,nanobanana用)
- 2. pose_skeleton - 人体姿态骨架(DWPose,各图独立)
- 3. palette_texture - 调色板颜料质感(实质,裁剪图)
- 4. painting_tools - 绘画工具(实质,裁剪图)
- 5. natural_background - 自然背景(实质,rembg去主体)
- 6. depth_map - 深度图(形式,Depth Anything V2)
- 7. color_palette_text - 色彩调色板文字描述(形式)
- """
- import os
- import json
- import warnings
- warnings.filterwarnings('ignore')
- import numpy as np
- from PIL import Image, ImageDraw, ImageFont
- import cv2
- BASE_DIR = "/Users/liuxiaobai/Desktop/Agent/Agent/examples/find knowledge"
- INPUT_DIR = os.path.join(BASE_DIR, "input")
- OUTPUT_DIR = os.path.join(BASE_DIR, "output/features")
- # 确保输出目录存在
- for d in ['character_reference', 'pose_skeleton', 'palette_texture',
- 'painting_tools', 'natural_background', 'depth_map', 'color_palette_text']:
- os.makedirs(os.path.join(OUTPUT_DIR, d), exist_ok=True)
- print("=" * 60)
- print("步骤1: 加载所有图片")
- print("=" * 60)
- images = {}
- for i in range(1, 10):
- path = os.path.join(INPUT_DIR, f"img_{i}.jpg")
- img = Image.open(path).convert("RGB")
- images[f"img_{i}"] = img
- print(f" img_{i}: {img.size}")
- # ============================================================
- # 维度1: character_reference - 人物参考图
- # 策略:从img_7(侧脸特写)提取最清晰的人物面部+身体参考
- # 同时从img_6(背部特写)提取背影参考
- # ============================================================
- print("\n" + "=" * 60)
- print("步骤2: 提取人物参考图 (character_reference)")
- print("=" * 60)
- # img_7是侧脸特写,最能体现人物面部特征
- # img_6是背部+耳饰特写
- # img_1是全身最完整的侧后方视角
- # 保存关键参考图(不做任何修改,直接保存原图)
- ref_imgs = {
- "img_7_face_reference": images["img_7"], # 侧脸+玫瑰,最清晰面部
- "img_6_back_reference": images["img_6"], # 背部特写+耳饰
- "img_1_full_reference": images["img_1"], # 全身参考
- }
- for name, img in ref_imgs.items():
- out_path = os.path.join(OUTPUT_DIR, "character_reference", f"{name}.png")
- img.save(out_path)
- print(f" 保存: {name}.png ({img.size})")
- # ============================================================
- # 维度2: pose_skeleton - 人体姿态骨架 (DWPose)
- # ============================================================
- print("\n" + "=" * 60)
- print("步骤3: 提取人体姿态骨架 (DWPose)")
- print("=" * 60)
- try:
- from controlnet_aux import DWposeDetector
- dwpose = DWposeDetector()
- print(" DWPose加载成功")
-
- # 对每张图提取姿态
- pose_imgs = ["img_1", "img_2", "img_3", "img_4", "img_8", "img_9"] # 全身/半身图
-
- for img_id in pose_imgs:
- img = images[img_id]
- try:
- pose_result = dwpose(img, detect_resolution=512, image_resolution=img.size[0])
- out_path = os.path.join(OUTPUT_DIR, "pose_skeleton", f"{img_id}_dwpose.png")
- pose_result.save(out_path)
- print(f" ✓ {img_id}: 姿态提取成功")
- except Exception as e:
- print(f" ✗ {img_id}: {e}")
- # 降级:使用OpenPose
- try:
- from controlnet_aux import OpenposeDetector
- openpose = OpenposeDetector.from_pretrained('lllyasviel/ControlNet')
- pose_result = openpose(img, detect_resolution=512, image_resolution=img.size[0])
- out_path = os.path.join(OUTPUT_DIR, "pose_skeleton", f"{img_id}_openpose.png")
- pose_result.save(out_path)
- print(f" ✓ {img_id}: OpenPose降级成功")
- except Exception as e2:
- print(f" ✗ {img_id} OpenPose也失败: {e2}")
- except Exception as e:
- print(f" DWPose加载失败: {e}")
- print(" 尝试OpenPose...")
- try:
- from controlnet_aux import OpenposeDetector
- openpose = OpenposeDetector.from_pretrained('lllyasviel/ControlNet')
- print(" OpenPose加载成功")
-
- pose_imgs = ["img_1", "img_2", "img_3", "img_4", "img_8", "img_9"]
- for img_id in pose_imgs:
- img = images[img_id]
- try:
- pose_result = openpose(img, detect_resolution=512, image_resolution=img.size[0])
- out_path = os.path.join(OUTPUT_DIR, "pose_skeleton", f"{img_id}_openpose.png")
- pose_result.save(out_path)
- print(f" ✓ {img_id}: OpenPose成功")
- except Exception as e2:
- print(f" ✗ {img_id}: {e2}")
- except Exception as e3:
- print(f" OpenPose也失败: {e3}")
- # ============================================================
- # 维度3: palette_texture - 调色板颜料质感
- # 策略:从img_5(调色板特写)裁剪调色板区域
- # ============================================================
- print("\n" + "=" * 60)
- print("步骤4: 提取调色板颜料质感 (palette_texture)")
- print("=" * 60)
- # img_5是调色板最清晰的特写
- # img_6也有调色板
- palette_imgs = {
- "img_5_palette_closeup": images["img_5"], # 调色板特写
- "img_6_palette_detail": images["img_6"], # 作画特写含调色板
- }
- for name, img in palette_imgs.items():
- out_path = os.path.join(OUTPUT_DIR, "palette_texture", f"{name}.png")
- img.save(out_path)
- print(f" 保存: {name}.png")
- # ============================================================
- # 维度4: painting_tools - 绘画工具(画架+画布)
- # 策略:从img_4(画架+空白画布最清晰)提取
- # ============================================================
- print("\n" + "=" * 60)
- print("步骤5: 提取绘画工具参考 (painting_tools)")
- print("=" * 60)
- tool_imgs = {
- "img_4_easel_blank_canvas": images["img_4"], # 画架+空白画布
- "img_8_easel_with_rose": images["img_8"], # 画架+玫瑰花
- "img_3_easel_painting": images["img_3"], # 画架+油画作品
- }
- for name, img in tool_imgs.items():
- out_path = os.path.join(OUTPUT_DIR, "painting_tools", f"{name}.png")
- img.save(out_path)
- print(f" 保存: {name}.png")
- # ============================================================
- # 维度5: natural_background - 自然背景
- # 策略:使用rembg去除主体,保留背景
- # ============================================================
- print("\n" + "=" * 60)
- print("步骤6: 提取自然背景 (natural_background)")
- print("=" * 60)
- try:
- from rembg import remove
- print(" rembg加载成功")
-
- # 选择背景最清晰的图片
- bg_imgs = ["img_9", "img_3", "img_1"] # 背景占比大的图
-
- for img_id in bg_imgs:
- img = images[img_id]
- try:
- # 去除前景,保留背景
- result = remove(img)
- # 将透明区域填充为白色(前景位置),保留背景
- bg_array = np.array(result)
- # 创建背景蒙版:alpha=0的区域是前景(被去除的),alpha>0是背景
- # 实际上rembg去除背景,我们需要反向操作
- # 直接保存原图作为背景参考,并保存去背景版本
-
- # 保存原图(背景参考)
- out_path = os.path.join(OUTPUT_DIR, "natural_background", f"{img_id}_bg_reference.png")
- img.save(out_path)
-
- # 保存去主体版本(背景分离)
- out_path2 = os.path.join(OUTPUT_DIR, "natural_background", f"{img_id}_fg_removed.png")
- result.save(out_path2)
- print(f" ✓ {img_id}: 背景提取成功")
- except Exception as e:
- print(f" ✗ {img_id}: {e}")
- img.save(os.path.join(OUTPUT_DIR, "natural_background", f"{img_id}_bg_reference.png"))
-
- except Exception as e:
- print(f" rembg失败: {e}")
- # 降级:直接保存背景参考图
- for img_id in ["img_9", "img_3", "img_1"]:
- images[img_id].save(os.path.join(OUTPUT_DIR, "natural_background", f"{img_id}_bg_reference.png"))
- print(f" 降级保存: {img_id}")
- # ============================================================
- # 维度6: depth_map - 深度图 (Depth Anything V2)
- # ============================================================
- print("\n" + "=" * 60)
- print("步骤7: 提取深度图 (Depth Anything)")
- print("=" * 60)
- try:
- from transformers import pipeline
- print(" 加载Depth Anything V2...")
-
- # 使用Depth Anything V2 - 最新最强的单目深度估计模型
- depth_pipe = pipeline(
- task="depth-estimation",
- model="depth-anything/Depth-Anything-V2-Small-hf",
- device="cpu"
- )
- print(" Depth Anything V2加载成功")
-
- # 对所有图提取深度图
- for img_id, img in images.items():
- try:
- result = depth_pipe(img)
- depth_img = result["depth"]
-
- # 转换为可视化深度图
- depth_array = np.array(depth_img)
- # 归一化到0-255
- depth_norm = ((depth_array - depth_array.min()) /
- (depth_array.max() - depth_array.min()) * 255).astype(np.uint8)
- depth_visual = Image.fromarray(depth_norm)
-
- out_path = os.path.join(OUTPUT_DIR, "depth_map", f"{img_id}_depth.png")
- depth_visual.save(out_path)
- print(f" ✓ {img_id}: 深度图提取成功")
- except Exception as e:
- print(f" ✗ {img_id}: {e}")
-
- except Exception as e:
- print(f" Depth Anything失败: {e}")
- print(" 尝试controlnet_aux的MiDaS...")
- try:
- from controlnet_aux import MidasDetector
- midas = MidasDetector.from_pretrained("lllyasviel/Annotators")
- print(" MiDaS加载成功")
-
- for img_id, img in images.items():
- try:
- depth_result = midas(img, detect_resolution=512, image_resolution=img.size[0])
- out_path = os.path.join(OUTPUT_DIR, "depth_map", f"{img_id}_midas_depth.png")
- depth_result.save(out_path)
- print(f" ✓ {img_id}: MiDaS深度图成功")
- except Exception as e2:
- print(f" ✗ {img_id}: {e2}")
- except Exception as e3:
- print(f" MiDaS也失败: {e3}")
- # ============================================================
- # 维度7: color_palette_text - 色彩调色板(文字描述)
- # 使用Python提取主色调,生成专业色彩描述
- # ============================================================
- print("\n" + "=" * 60)
- print("步骤8: 提取色彩调色板 (color_palette_text)")
- print("=" * 60)
- def extract_color_palette(img, n_colors=8):
- """提取图片主色调"""
- img_small = img.resize((150, 150))
- img_array = np.array(img_small).reshape(-1, 3).astype(float)
-
- # K-means聚类
- from sklearn.cluster import KMeans
- kmeans = KMeans(n_clusters=n_colors, random_state=42, n_init=10)
- kmeans.fit(img_array)
-
- colors = kmeans.cluster_centers_.astype(int)
- labels = kmeans.labels_
-
- # 计算每个颜色的占比
- counts = np.bincount(labels)
- percentages = counts / len(labels) * 100
-
- # 按占比排序
- sorted_idx = np.argsort(percentages)[::-1]
- colors = colors[sorted_idx]
- percentages = percentages[sorted_idx]
-
- return colors, percentages
- def rgb_to_hex(rgb):
- return f"#{rgb[0]:02X}{rgb[1]:02X}{rgb[2]:02X}"
- def rgb_to_hsv_desc(rgb):
- """将RGB转为HSV并给出描述"""
- r, g, b = rgb[0]/255, rgb[1]/255, rgb[2]/255
- h, s, v = cv2.cvtColor(np.array([[[rgb[0], rgb[1], rgb[2]]]], dtype=np.uint8),
- cv2.COLOR_RGB2HSV)[0][0]
-
- # 色相描述
- if s < 30:
- if v < 50: hue_name = "black"
- elif v > 200: hue_name = "white"
- else: hue_name = "gray"
- elif h < 15 or h > 165: hue_name = "red"
- elif h < 30: hue_name = "orange"
- elif h < 45: hue_name = "yellow"
- elif h < 75: hue_name = "yellow-green"
- elif h < 105: hue_name = "green"
- elif h < 120: hue_name = "cyan-green"
- elif h < 135: hue_name = "cyan"
- elif h < 150: hue_name = "blue-cyan"
- elif h < 165: hue_name = "blue"
- else: hue_name = "purple"
-
- # 饱和度描述
- if s < 50: sat_name = "desaturated"
- elif s < 120: sat_name = "muted"
- elif s < 200: sat_name = "saturated"
- else: sat_name = "vivid"
-
- # 亮度描述
- if v < 80: val_name = "dark"
- elif v < 160: val_name = "mid-tone"
- else: val_name = "light"
-
- return f"{val_name} {sat_name} {hue_name}", int(h)*2, int(s/255*100), int(v/255*100)
- try:
- from sklearn.cluster import KMeans
-
- color_data = {}
-
- for img_id, img in images.items():
- colors, percentages = extract_color_palette(img, n_colors=8)
-
- palette_info = []
- for i, (color, pct) in enumerate(zip(colors, percentages)):
- desc, h, s, v = rgb_to_hsv_desc(color)
- palette_info.append({
- "rank": i + 1,
- "hex": rgb_to_hex(color),
- "rgb": [int(color[0]), int(color[1]), int(color[2])],
- "hsv": {"h": h, "s": s, "v": v},
- "description": desc,
- "percentage": round(float(pct), 1)
- })
-
- color_data[img_id] = palette_info
- print(f" ✓ {img_id}: 提取{len(palette_info)}个主色调")
- for p in palette_info[:3]:
- print(f" {p['hex']} ({p['percentage']}%) - {p['description']}")
-
- # 保存色彩数据
- out_path = os.path.join(OUTPUT_DIR, "color_palette_text", "all_images_color_palette.json")
- with open(out_path, 'w', encoding='utf-8') as f:
- json.dump(color_data, f, ensure_ascii=False, indent=2)
- print(f"\n 色彩数据已保存: {out_path}")
-
- # 生成色彩可视化图
- for img_id, palette in color_data.items():
- palette_img = Image.new('RGB', (800, 120), 'white')
- draw = ImageDraw.Draw(palette_img)
-
- x = 0
- for p in palette[:8]:
- w = int(800 * p['percentage'] / 100)
- if w < 5: w = 5
- color_tuple = tuple(p['rgb'])
- draw.rectangle([x, 0, x+w, 80], fill=color_tuple)
- x += w
-
- out_path = os.path.join(OUTPUT_DIR, "color_palette_text", f"{img_id}_palette.png")
- palette_img.save(out_path)
-
- print(" 色彩可视化图已保存")
-
- except Exception as e:
- print(f" 色彩提取失败: {e}")
- print("\n" + "=" * 60)
- print("特征提取完成!")
- print("=" * 60)
- # 列出所有输出文件
- for dim in os.listdir(OUTPUT_DIR):
- dim_path = os.path.join(OUTPUT_DIR, dim)
- if os.path.isdir(dim_path):
- files = os.listdir(dim_path)
- print(f"\n {dim}/")
- for f in sorted(files):
- fpath = os.path.join(dim_path, f)
- size = os.path.getsize(fpath)
- print(f" {f} ({size//1024}KB)")
|