""" 多模态特征提取脚本 针对"户外白裙写生少女"图片组,提取以下维度: 1. openpose_skeleton - 人体姿态骨架(OpenPose) 2. depth_map - 深度图(MiDaS) 3. lineart_edge - 线稿/边缘图(Lineart) 4. color_palette - 色彩调色板(ColorThief + 自定义) 5. bokeh_mask - 景深虚化遮罩(基于深度图推导) 6. semantic_segmentation - 语义分割(基于颜色聚类) 7. color_distribution - 色彩分布向量(HSV直方图) """ import os import json import warnings warnings.filterwarnings('ignore') import numpy as np from PIL import Image import cv2 from colorthief import ColorThief # 设置工作目录 WORKDIR = os.path.dirname(os.path.abspath(__file__)) INPUT_DIR = os.path.join(WORKDIR, 'input') OUTPUT_DIR = os.path.join(WORKDIR, 'output', 'features') print("=== 开始多模态特征提取 ===") print(f"工作目录: {WORKDIR}") # ============================================================ # 维度1: OpenPose 人体姿态骨架 # ============================================================ def extract_openpose(img_path, output_path): """提取人体姿态骨架图""" from controlnet_aux import OpenposeDetector detector = OpenposeDetector.from_pretrained('lllyasviel/ControlNet') img = Image.open(img_path) result = detector(img, hand_and_face=True) result.save(output_path) print(f" OpenPose saved: {output_path}") return True # ============================================================ # 维度2: MiDaS 深度图 # ============================================================ def extract_depth(img_path, output_path, npy_path=None): """提取深度图""" from controlnet_aux import MidasDetector detector = MidasDetector.from_pretrained('lllyasviel/Annotators') img = Image.open(img_path) result = detector(img) result.save(output_path) print(f" Depth map saved: {output_path}") return True # ============================================================ # 维度3: Lineart 线稿 # ============================================================ def extract_lineart(img_path, output_path): """提取线稿""" from controlnet_aux import LineartDetector detector = LineartDetector.from_pretrained('lllyasviel/Annotators') img = Image.open(img_path) result = detector(img, coarse=False) result.save(output_path) print(f" Lineart saved: {output_path}") return True # ============================================================ # 维度4: 色彩调色板 # ============================================================ def extract_color_palette(img_path, output_path_json, output_path_png, n_colors=8): """提取主色调调色板""" # 使用ColorThief提取主色 ct = ColorThief(img_path) palette = ct.get_palette(color_count=n_colors, quality=1) # 生成调色板可视化图 palette_img = np.zeros((100, n_colors * 100, 3), dtype=np.uint8) for i, color in enumerate(palette): palette_img[:, i*100:(i+1)*100] = color cv2.imwrite(output_path_png, cv2.cvtColor(palette_img, cv2.COLOR_RGB2BGR)) # 保存JSON palette_data = { "colors": [{"r": c[0], "g": c[1], "b": c[2], "hex": "#{:02x}{:02x}{:02x}".format(c[0], c[1], c[2])} for c in palette], "dominant_color": {"r": palette[0][0], "g": palette[0][1], "b": palette[0][2], "hex": "#{:02x}{:02x}{:02x}".format(palette[0][0], palette[0][1], palette[0][2])} } with open(output_path_json, 'w') as f: json.dump(palette_data, f, indent=2, ensure_ascii=False) print(f" Color palette saved: {output_path_png}") return palette_data # ============================================================ # 维度5: 景深虚化遮罩(Bokeh Mask) # ============================================================ def extract_bokeh_mask(img_path, depth_path, output_path): """ 基于深度图推导景深虚化遮罩 亮区=近景清晰区域,暗区=远景虚化区域 """ img = cv2.imread(img_path) depth = cv2.imread(depth_path, cv2.IMREAD_GRAYSCALE) if depth is None: print(f" Warning: depth map not found for {img_path}") return False # 深度图归一化 - 调整到原图尺寸 h_orig, w_orig = img.shape[:2] depth = cv2.resize(depth, (w_orig, h_orig), interpolation=cv2.INTER_LINEAR) depth_norm = depth.astype(np.float32) / 255.0 # 计算局部清晰度(拉普拉斯方差) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) laplacian = cv2.Laplacian(gray, cv2.CV_64F) # 用高斯模糊计算局部清晰度图 blur_map = np.abs(laplacian) blur_map = cv2.GaussianBlur(blur_map.astype(np.float32), (51, 51), 0) blur_map = (blur_map - blur_map.min()) / (blur_map.max() - blur_map.min() + 1e-8) # 结合深度图和清晰度图生成bokeh mask # 近景(深度值高)且清晰 = 主体区域 bokeh_mask = (blur_map * 0.6 + depth_norm * 0.4) * 255 bokeh_mask = bokeh_mask.astype(np.uint8) cv2.imwrite(output_path, bokeh_mask) print(f" Bokeh mask saved: {output_path}") return True # ============================================================ # 维度6: 语义分割(基于颜色聚类) # ============================================================ def extract_semantic_segmentation(img_path, output_path, n_segments=6): """ 基于颜色聚类的语义分割 针对本图片组的特点:白裙/绿背景/调色板/画布 """ from sklearn.cluster import KMeans img = cv2.imread(img_path) img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 转换到LAB颜色空间(更符合人眼感知) img_lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB) h, w = img.shape[:2] # 加入位置信息(x, y坐标)以增强空间连续性 y_coords, x_coords = np.mgrid[0:h, 0:w] y_norm = y_coords.astype(np.float32) / h * 30 # 位置权重 x_norm = x_coords.astype(np.float32) / w * 30 # 特征向量:LAB颜色 + 位置 features = np.column_stack([ img_lab.reshape(-1, 3).astype(np.float32), y_norm.reshape(-1, 1), x_norm.reshape(-1, 1) ]) # K-means聚类 kmeans = KMeans(n_clusters=n_segments, random_state=42, n_init=3) labels = kmeans.fit_predict(features) labels = labels.reshape(h, w) # 生成彩色分割图 colors = [ [255, 255, 255], # 白色 - 白裙 [34, 139, 34], # 绿色 - 背景草地 [101, 67, 33], # 棕色 - 调色板/画架 [135, 206, 235], # 天蓝 - 天空/远景 [255, 218, 185], # 肤色 - 人物皮肤 [200, 200, 200], # 灰色 - 画布 ] seg_img = np.zeros((h, w, 3), dtype=np.uint8) for i in range(n_segments): mask = labels == i # 找到该聚类的平均颜色 cluster_color = kmeans.cluster_centers_[i][:3] # 转回RGB cluster_lab = np.uint8([[cluster_color]]) cluster_rgb = cv2.cvtColor(cluster_lab, cv2.COLOR_LAB2RGB)[0][0] seg_img[mask] = cluster_rgb cv2.imwrite(output_path, cv2.cvtColor(seg_img, cv2.COLOR_RGB2BGR)) print(f" Segmentation saved: {output_path}") return True # ============================================================ # 维度7: 色彩分布向量(HSV直方图) # ============================================================ def extract_color_distribution(img_path, output_path_json, output_path_png): """ 提取HSV色彩分布向量 捕捉图片的整体色调特征 """ img = cv2.imread(img_path) img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # 计算HSV直方图 h_hist = cv2.calcHist([img_hsv], [0], None, [36], [0, 180]) # 色相 s_hist = cv2.calcHist([img_hsv], [1], None, [32], [0, 256]) # 饱和度 v_hist = cv2.calcHist([img_hsv], [2], None, [32], [0, 256]) # 明度 # 归一化 h_hist = h_hist.flatten() / h_hist.sum() s_hist = s_hist.flatten() / s_hist.sum() v_hist = v_hist.flatten() / v_hist.sum() # 计算统计特征 h, w = img.shape[:2] total_pixels = h * w # 白色像素比例(白裙特征) white_mask = (img_hsv[:,:,1] < 30) & (img_hsv[:,:,2] > 200) white_ratio = white_mask.sum() / total_pixels # 绿色像素比例(背景特征) green_mask = (img_hsv[:,:,0] >= 35) & (img_hsv[:,:,0] <= 85) & (img_hsv[:,:,1] > 50) green_ratio = green_mask.sum() / total_pixels # 平均亮度 mean_brightness = img_hsv[:,:,2].mean() / 255.0 # 平均饱和度 mean_saturation = img_hsv[:,:,1].mean() / 255.0 data = { "h_histogram": h_hist.tolist(), "s_histogram": s_hist.tolist(), "v_histogram": v_hist.tolist(), "statistics": { "white_ratio": float(white_ratio), "green_ratio": float(green_ratio), "mean_brightness": float(mean_brightness), "mean_saturation": float(mean_saturation) } } with open(output_path_json, 'w') as f: json.dump(data, f, indent=2) # 生成可视化图 fig_h = 200 fig_w = 600 vis = np.ones((fig_h, fig_w, 3), dtype=np.uint8) * 240 # 绘制色相直方图(彩色) bar_w = fig_w // 36 for i, val in enumerate(h_hist): bar_h = int(val * (fig_h - 20)) hue = int(i * 5) # 0-180 color_hsv = np.uint8([[[hue, 200, 200]]]) color_rgb = cv2.cvtColor(color_hsv, cv2.COLOR_HSV2BGR)[0][0] x1 = i * bar_w x2 = x1 + bar_w - 1 y1 = fig_h - bar_h - 10 y2 = fig_h - 10 cv2.rectangle(vis, (x1, y1), (x2, y2), color_rgb.tolist(), -1) cv2.imwrite(output_path_png, vis) print(f" Color distribution saved: {output_path_png}") return data # ============================================================ # 主执行流程 # ============================================================ print("\n--- 加载检测器 ---") from controlnet_aux import OpenposeDetector, MidasDetector, LineartDetector print("Loading OpenPose...") openpose_detector = OpenposeDetector.from_pretrained('lllyasviel/ControlNet') print("Loading MiDaS...") midas_detector = MidasDetector.from_pretrained('lllyasviel/Annotators') print("Loading Lineart...") lineart_detector = LineartDetector.from_pretrained('lllyasviel/Annotators') print("All detectors loaded!") # 处理每张图片 for i in range(1, 10): img_name = f"img_{i}" img_path = os.path.join(INPUT_DIR, f"{img_name}.jpg") print(f"\n=== 处理 {img_name} ===") # 1. OpenPose openpose_path = os.path.join(OUTPUT_DIR, 'openpose_skeleton', f"{img_name}.png") img = Image.open(img_path) result = openpose_detector(img, hand_and_face=True) result.save(openpose_path) print(f" [1/7] OpenPose: {openpose_path}") # 2. Depth Map depth_path = os.path.join(OUTPUT_DIR, 'depth_map', f"{img_name}.png") result = midas_detector(img) result.save(depth_path) print(f" [2/7] Depth: {depth_path}") # 3. Lineart lineart_path = os.path.join(OUTPUT_DIR, 'lineart_edge', f"{img_name}.png") result = lineart_detector(img, coarse=False) result.save(lineart_path) print(f" [3/7] Lineart: {lineart_path}") # 4. Color Palette palette_json = os.path.join(OUTPUT_DIR, 'color_palette', f"{img_name}.json") palette_png = os.path.join(OUTPUT_DIR, 'color_palette', f"{img_name}.png") extract_color_palette(img_path, palette_json, palette_png, n_colors=8) print(f" [4/7] Color Palette: {palette_png}") # 5. Bokeh Mask bokeh_path = os.path.join(OUTPUT_DIR, 'bokeh_mask', f"{img_name}.png") extract_bokeh_mask(img_path, depth_path, bokeh_path) print(f" [5/7] Bokeh Mask: {bokeh_path}") # 6. Semantic Segmentation seg_path = os.path.join(OUTPUT_DIR, 'semantic_segmentation', f"{img_name}.png") extract_semantic_segmentation(img_path, seg_path, n_segments=6) print(f" [6/7] Segmentation: {seg_path}") # 7. Color Distribution dist_json = os.path.join(OUTPUT_DIR, 'color_distribution', f"{img_name}.json") dist_png = os.path.join(OUTPUT_DIR, 'color_distribution', f"{img_name}.png") extract_color_distribution(img_path, dist_json, dist_png) print(f" [7/7] Color Distribution: {dist_png}") print("\n=== 所有特征提取完成 ===")