elksmmx 6 дней назад
Родитель
Сommit
a850762425

+ 154 - 0
examples/find knowledge/input/写生油画_图片制作点实质结果.json

@@ -0,0 +1,154 @@
+{
+  "帖子ID": "616192600000000021034642",
+  "帖子名称": "写生油画",
+  "图片制作点实质结果": [
+    {
+      "元素ID": "元素1",
+      "元素名称": "女性",
+      "元素描述": "穿着白裙的女性,包括其背影、躯干与手臂,有蹲坐姿态。",
+      "段落数量": 8,
+      "段落列表": [
+        "段落1.1.1",
+        "段落2.1.1",
+        "段落3.1.1",
+        "段落4.1.1.1",
+        "段落4.1.1.2",
+        "段落8.1.1.1",
+        "段落8.1.1.2",
+        "段落9.1.1"
+      ],
+      "覆盖图片数": 6,
+      "出现总次数": 8,
+      "重要性得分": 95.0,
+      "频次基础分": 100,
+      "频次得分": 40.0,
+      "覆盖率基础分": 66.67,
+      "覆盖率得分": 40.0,
+      "频率权重": 80.0,
+      "综合权重": 84.5
+    },
+    {
+      "元素ID": "元素2",
+      "元素名称": "绘画工具",
+      "元素描述": "包含调色板、画笔和颜料,部分描述涉及手持这些工具的动作,调色板有主体部分。",
+      "段落数量": 15,
+      "段落列表": [
+        "段落1.1.3",
+        "段落2.1.3",
+        "段落4.1.3",
+        "段落4.1.4",
+        "段落5.1.1.1",
+        "段落5.1.1.2",
+        "段落5.2.1.1",
+        "段落5.2.1.2",
+        "段落6.1.1.1.1",
+        "段落6.1.1.1.2",
+        "段落6.2.3",
+        "段落6.2.4.1",
+        "段落6.2.4.2",
+        "段落8.1.3",
+        "段落8.1.4"
+      ],
+      "覆盖图片数": 6,
+      "出现总次数": 15,
+      "重要性得分": 75.0,
+      "频次基础分": 100,
+      "频次得分": 40.0,
+      "覆盖率基础分": 66.67,
+      "覆盖率得分": 40.0,
+      "频率权重": 80.0,
+      "综合权重": 78.5
+    },
+    {
+      "元素ID": "元素3",
+      "元素名称": "自然背景",
+      "元素描述": "由草坪、树木和阳光构成的自然背景,部分描述提及远处建筑。",
+      "段落数量": 8,
+      "段落列表": [
+        "段落1.3",
+        "段落2.2",
+        "段落3.2",
+        "段落4.2.1",
+        "段落4.2.2",
+        "段落8.2.1",
+        "段落8.2.2",
+        "段落9.2"
+      ],
+      "覆盖图片数": 6,
+      "出现总次数": 8,
+      "重要性得分": 60.0,
+      "频次基础分": 100,
+      "频次得分": 40.0,
+      "覆盖率基础分": 66.67,
+      "覆盖率得分": 40.0,
+      "频率权重": 80.0,
+      "综合权重": 74.0
+    },
+    {
+      "元素ID": "元素4",
+      "元素名称": "画架与画布",
+      "元素描述": "包含画架结构和其上的画布,画布可以是空白的或正在创作的油画。",
+      "段落数量": 8,
+      "段落列表": [
+        "段落4.1.2.1",
+        "段落4.1.2.2",
+        "段落5.2.2",
+        "段落5.2.3",
+        "段落6.2.1",
+        "段落6.2.2",
+        "段落8.1.2.1",
+        "段落8.1.2.2"
+      ],
+      "覆盖图片数": 4,
+      "出现总次数": 8,
+      "重要性得分": 70.0,
+      "频次基础分": 100,
+      "频次得分": 40.0,
+      "覆盖率基础分": 44.44,
+      "覆盖率得分": 26.67,
+      "频率权重": 66.67,
+      "综合权重": 67.67
+    },
+    {
+      "元素ID": "元素5",
+      "元素名称": "画架与油画",
+      "元素描述": "画架上放置着油画。",
+      "段落数量": 4,
+      "段落列表": [
+        "段落1.1.2",
+        "段落2.1.2",
+        "段落3.1.2",
+        "段落9.1.2"
+      ],
+      "覆盖图片数": 4,
+      "出现总次数": 4,
+      "重要性得分": 85.0,
+      "频次基础分": 60,
+      "频次得分": 24.0,
+      "覆盖率基础分": 44.44,
+      "覆盖率得分": 26.67,
+      "频率权重": 50.67,
+      "综合权重": 60.97
+    },
+    {
+      "元素ID": "元素6",
+      "元素名称": "女性衣物",
+      "元素描述": "女性穿着的白色衣物,包括白色上衣。",
+      "段落数量": 3,
+      "段落列表": [
+        "段落5.1.2",
+        "段落6.1.2",
+        "段落7.1.3"
+      ],
+      "覆盖图片数": 3,
+      "出现总次数": 3,
+      "重要性得分": 65.0,
+      "频次基础分": 40,
+      "频次得分": 16.0,
+      "覆盖率基础分": 33.33,
+      "覆盖率得分": 20.0,
+      "频率权重": 36.0,
+      "综合权重": 44.7
+    }
+  ]
+}

+ 125 - 0
examples/find knowledge/knowledge/dimension_selection.md

@@ -0,0 +1,125 @@
+# 多模态维度筛选决策
+
+## 实质列表
+
+| 实质 | 重要性 | 出现频次 | 优先级 |
+|------|--------|----------|--------|
+| 女性人物 | 95 | 8次/6图 | 最高 |
+| 绘画工具(调色板/画笔/颜料) | 75 | 15次/6图 | 高 |
+| 画架与油画 | 85/70 | 8+4次 | 高 |
+| 自然背景(草地/树木) | 60 | 8次/6图 | 中 |
+| 女性衣物(白裙) | 65 | 3次/3图 | 中 |
+
+## 形式列表
+
+| 形式 | 亮点聚类 | 权重 | 类型 |
+|------|----------|------|------|
+| 白绿配色 | cluster_3 | 高 | 整体色调 |
+| 逆光/散景/梦幻光影 | cluster_4 | 高 | 光影 |
+| 画中画结构 | cluster_5 | 中 | 叙事形式 |
+| 构图引导(过肩/视线引导) | cluster_6 | 中 | 构图 |
+| 人物姿态(站/跪/侧/背) | cluster_1 | 高 | 姿态 |
+
+---
+
+## 筛选的多模态维度(共8个)
+
+### 维度1:人体姿态骨骼图(pose_skeleton)
+- **对应实质**: 女性人物(段落X.1.1系列)
+- **对应形式**: 人物姿态(cluster_1)
+- **表示形式**: PNG图像(骨骼关键点连线图)
+- **提取工具**: MediaPipe Pose(33关键点)
+- **可逆性**: 高 - 骨骼图直接作为ControlNet OpenPose的输入
+- **生成模型友好性**: 极高 - ControlNet标准输入格式
+- **泛化价值**: 高 - 骨骼姿态可复用于不同服装/场景/风格
+- **还原中的作用**: 控制人物的站立/跪姿/侧身/背影等姿态,保证多图一致性
+- **必要性**: 图片组中人物姿态多样(站立、跪姿、侧身),是最核心的控制维度
+
+### 维度2:全局色彩调色板(color_palette)
+- **对应实质**: 整体图像(图像级形式)
+- **对应形式**: 白绿配色(cluster_3)
+- **表示形式**: JSON(主色调列表,含HSL值和比例)+ PNG色块可视化
+- **提取工具**: scikit-learn KMeans聚类(K=6)
+- **可逆性**: 高 - 色彩调色板可直接作为ControlNet t2iaColor的输入
+- **生成模型友好性**: 高 - 可转化为Prompt色彩描述或T2I-Adapter颜色控制
+- **泛化价值**: 高 - 白绿配色是可复用的视觉风格基因
+- **还原中的作用**: 控制整体色调,确保白裙+绿背景的清新配色一致性
+- **必要性**: cluster_3是高权重亮点,白绿配色是图片组的核心视觉特征
+
+### 维度3:颜料质感色彩图(palette_texture_colors)
+- **对应实质**: 绘画工具(调色板上的颜料,段落X.1.3系列)
+- **对应形式**: 斑斓厚重的油画颜料(cluster_2_texture)
+- **表示形式**: JSON(颜料色块列表,含颜色和位置分布)+ PNG可视化
+- **提取工具**: KMeans聚类 + 区域分析
+- **可逆性**: 高 - 颜料色彩可作为局部色彩控制信号
+- **生成模型友好性**: 高 - 可描述为"impasto oil paint palette with vivid colors"
+- **泛化价值**: 高 - 颜料色彩组合可复用于其他艺术创作场景
+- **还原中的作用**: 控制调色板上颜料的色彩丰富度和分布,营造真实的艺术创作感
+- **必要性**: cluster_2_texture是独立亮点聚类,颜料质感是图片的核心视觉反差元素
+
+### 维度4:人物外观语义描述(person_appearance)
+- **对应实质**: 女性人物(段落X.1系列)
+- **对应形式**: 服装(白裙)、发型(棕色长发)、配饰(耳饰/项链/手镯)
+- **表示形式**: JSON(结构化外观描述)
+- **提取工具**: VLM(视觉语言模型,如Gemini)
+- **可逆性**: 高 - 自然语言描述直接作为Prompt输入
+- **生成模型友好性**: 极高 - 最直接的生成控制信号
+- **泛化价值**: 高 - 外观描述可复用于生成同一人物的不同场景
+- **还原中的作用**: 控制人物的服装颜色、发型、配饰等外观特征,保证跨图一致性
+- **必要性**: 白裙是cluster_1的核心特征,人物外观一致性是图片组的基本要求
+
+### 维度5:场景构图描述(composition_layout)
+- **对应实质**: 整体场景(图像级)
+- **对应形式**: 构图引导(cluster_6)、景别、拍摄角度
+- **表示形式**: JSON(构图参数:主体位置比例、景别类型、拍摄角度、视线引导方向)
+- **提取工具**: VLM分析 + 规则提取
+- **可逆性**: 中高 - 构图参数可转化为Prompt的构图描述
+- **生成模型友好性**: 高 - 构图描述是Prompt的重要组成部分
+- **泛化价值**: 高 - 过肩视角、视线引导等构图规律可复用于新内容创作
+- **还原中的作用**: 控制人物与画架的相对位置、拍摄角度、景别,保证构图一致性
+- **必要性**: 9张图片构图各异(背影/侧身/特写/远景),构图是区分各图的关键维度
+
+### 维度6:光影氛围描述(lighting_atmosphere)
+- **对应实质**: 整体图像(图像级形式)
+- **对应形式**: 逆光/散景/梦幻光影(cluster_4)
+- **表示形式**: JSON(光照类型、方向、散景程度、整体氛围)
+- **提取工具**: VLM分析
+- **可逆性**: 高 - 光影描述可直接作为Prompt的光照控制词
+- **生成模型友好性**: 极高 - "backlight, bokeh, dreamy atmosphere"等词汇是生成模型标准控制词
+- **泛化价值**: 高 - 逆光散景是可复用的摄影风格基因
+- **还原中的作用**: 控制光照方向(逆光/侧光)、背景虚化程度、整体氛围(梦幻/清新)
+- **必要性**: cluster_4是高权重亮点,光影是图片组的核心氛围特征
+
+### 维度7:背景环境色彩(background_color)
+- **对应实质**: 自然背景(段落X.3系列)
+- **对应形式**: 清新雅致的白绿配色(cluster_3的背景部分)
+- **表示形式**: JSON(背景主色调HSL值、饱和度、亮度范围)
+- **提取工具**: KMeans聚类(仅背景区域)
+- **可逆性**: 高 - 背景色彩可作为独立的色彩控制信号
+- **生成模型友好性**: 高 - 可转化为"lush green grass, natural outdoor background"等描述
+- **泛化价值**: 高 - 绿色自然背景是可复用的场景基因
+- **还原中的作用**: 控制背景的绿色调性,与白裙形成对比,营造清新户外感
+- **必要性**: 背景色彩是cluster_3的重要组成,与人物白裙的对比是核心视觉亮点
+
+### 维度8:画中画内容描述(painting_content)
+- **对应实质**: 画架与油画(段落X.2.1系列)
+- **对应形式**: 画中画结构(cluster_5)
+- **表示形式**: JSON(画布内容描述、与现实场景的对应关系)
+- **提取工具**: VLM分析
+- **可逆性**: 高 - 内容描述可直接作为Prompt输入
+- **生成模型友好性**: 高 - "painting within painting, canvas showing..."等描述
+- **泛化价值**: 中 - 画中画结构是独特的叙事形式,可复用于艺术创作场景
+- **还原中的作用**: 控制画布上的内容,实现现实与艺术的"镜像"呼应关系
+- **必要性**: cluster_5是独立亮点聚类,画中画是图片组的独特叙事亮点
+
+---
+
+## 排除的维度及原因
+
+| 排除维度 | 排除原因 |
+|----------|----------|
+| 深度图(Depth Map) | 与原图过于相似,缺乏泛化价值;且本图组景深效果已通过光影描述覆盖 |
+| 边缘检测图(Canny/Lineart) | 与原图过于相似,为了还原而还原,缺乏创造性价值 |
+| 语义分割图(Segmentation) | 信息量过大,与原图相似度高;已通过其他维度覆盖各区域特征 |
+| 面部特征(Face Embedding) | 图片组以背影为主,面部信息有限;且面部特征过于具体,泛化性差 |
+| 画面比例(Aspect Ratio) | 所有图片均为1080×1439(约3:4),固定值,无需单独提取 |

+ 100 - 0
examples/find knowledge/knowledge/restoration_experience/search_report.md

@@ -0,0 +1,100 @@
+# 还原经验搜索报告
+
+## 搜索策略与关键词记录
+
+### 第一轮搜索
+- **关键词**: "ControlNet 人物姿态 图像还原 生成模型" (知乎) → 0结果
+- **关键词**: "图像特征提取 生成模型控制信号 多模态" (知乎) → 0结果
+- **关键词**: "ControlNet 图像重建 特征提取" (知乎) → 0结果
+
+### 第二轮搜索(调整平台)
+- **关键词**: "stable diffusion 人物还原 姿态控制" (小红书) → 3结果 ✅
+- **关键词**: "色彩调色板提取 palette 图像生成控制" (小红书) → 2结果
+- **关键词**: "IP-Adapter 人物一致性 图像生成" (小红书) → 5结果 ✅
+
+### 第三轮搜索(工具研究)
+- **关键词**: "MediaPipe pose estimation body keypoints" (GitHub) → 5结果 ✅
+- **关键词**: "color palette extraction dominant colors image" (GitHub) → 5结果 ✅
+- **关键词**: "AI图像还原 特征提取 controlnet 实战教程" (小红书) → 5结果 ✅
+- **关键词**: "人物骨骼姿态 openpose 提取工具 python" (小红书) → 4结果 ✅
+- **关键词**: "图像生成 色彩调色板 HSL 色调控制 stable diffusion" (小红书) → 5结果 ✅
+- **关键词**: "人像分割 SAM segment anything 背景分离" (小红书) → 4结果 ✅
+- **关键词**: "mediapipe 人体姿态 关键点 python 教程" (小红书) → 1结果 ✅
+
+---
+
+## 核心发现
+
+### 1. 姿态控制(OpenPose/MediaPipe)
+**来源**: 小红书多篇教程
+**URL**: 
+- https://www.xiaohongshu.com/explore/66d79b89000000000c019bb1 (OpenPose教程)
+- https://www.xiaohongshu.com/explore/695a8d99000000001e0108d7 (姿势还原ControlNet)
+- https://www.xiaohongshu.com/explore/66ea1d7d000000001e019fb9 (姿态控制SD)
+- https://www.xiaohongshu.com/explore/67b5f029000000000903adbc (MediaPipe Python)
+
+**关键经验**:
+- OpenPose是最成熟的人体姿态提取工具,支持18个关键点(头、肩、手肘、膝盖等)
+- ControlNet的OpenPose模型(control_v11p_sd15_openpose.pth)是生成模型友好的控制信号
+- 有6种预处理器:openpose(基础)、openpose_face(含面部)、openpose_faceonly(仅面部)、openpose_hand(含手部)、openpose_full(全部)、dw_openpose_full(加强版)
+- **骨骼图是抽象的、可复用的**,不包含原始像素,适合作为多模态特征
+- MediaPipe也支持33个关键点的人体姿态估计,Python友好,无需GPU
+
+### 2. 颜色控制(Color Palette)
+**来源**: 小红书ControlNet教程
+**URL**: https://www.xiaohongshu.com/explore/67a487fa000000002902a16f
+
+**关键经验**:
+- ControlNet的t2iaColor模型能提取参考图的色彩分布并应用到生成图
+- KMeans聚类是提取主色调的标准方法
+- 色彩调色板(dominant colors + 比例)是生成模型友好的控制信号
+- 白绿配色是本图组的核心亮点,需要精确提取
+
+### 3. 人物一致性(IP-Adapter/Reference)
+**来源**: 小红书多篇教程
+**URL**:
+- https://www.xiaohongshu.com/explore/65f814b6000000000d00f30c (IP-Adapter)
+- https://www.xiaohongshu.com/explore/685cd182000000001203c90b (SD角色一致性)
+- https://www.xiaohongshu.com/explore/684a3bca0000000220015345 (即梦垫图人物一致性)
+
+**关键经验**:
+- IP-Adapter将图像作为"图像提示词",可复制参考图的风格、构图或人物特征
+- ip-adapter_clip_h 迁移性最强
+- Reference预处理器适合控制动漫或IP形象,对真人效果一般
+- ConsistentID使用face parsing(BiSeNet)分割人脸不同区域,实现细粒度控制
+
+### 4. 图像分割(SAM/Segment Anything)
+**来源**: 小红书多篇教程
+**URL**:
+- https://www.xiaohongshu.com/explore/68d65791000000001301c5ff (SAM2 ComfyUI)
+- https://www.xiaohongshu.com/explore/689c598b000000001c011bd5 (SAM介绍)
+- https://www.xiaohongshu.com/explore/6826dccb0000000021007dd4 (SAM2+ComfyUI)
+
+**关键经验**:
+- SAM2(Meta)是最先进的图像分割模型,支持零样本分割
+- 可以精确分割人物、道具、背景等区域
+- 分割蒙版可以作为区域控制信号
+
+### 5. ControlNet综合控制策略
+**来源**: 小红书教程
+**URL**: 
+- https://www.xiaohongshu.com/explore/6988252f0000000015039617 (SDXL双控)
+- https://www.xiaohongshu.com/explore/68c8c32e000000001d014a74 (ControlNet可控性)
+- https://www.xiaohongshu.com/explore/698dbf4a000000000c03686c (Qwen+ControlNet)
+
+**关键经验**:
+- 写实摄影用 Pose+Depth 双控就够了,不要开3个
+- ControlNet的核心价值:用结构约束解决姿态、构图、边缘、深度等关键维度
+- 图生图流程:参考图 → LLM反推语义 → ControlNet锁结构 → 重新生成
+- strength参数:0.4~0.6保留结构+允许创作(最常用)
+
+---
+
+## 关键结论
+
+1. **姿态骨骼图(OpenPose/MediaPipe)** 是人物还原最重要的控制信号,可逆性强,生成模型友好
+2. **色彩调色板(KMeans主色调)** 是色调控制的标准方法,JSON格式存储,生成模型可直接使用
+3. **语义分割蒙版(SAM2)** 可以精确分离人物/道具/背景,为分区域控制提供基础
+4. **自然语言描述(Prompt)** 是生成模型最直接的控制信号,应该精确、专业
+5. **深度图不适合作为特征**:与原图过于相似,缺乏泛化价值
+6. **避免直接使用原图**:特征应该是抽象的、可复用的控制信号

+ 151 - 0
examples/find knowledge/knowledge/tools/tools_research.md

@@ -0,0 +1,151 @@
+# 工具研究报告
+
+## 一、姿态提取工具
+
+### 1. MediaPipe Pose(选用)
+- **来源**: Google开发,Python友好
+- **版本**: 0.10.9(2024年更新)
+- **关键点数量**: 33个(比OpenPose的18个更精细)
+- **优势**: 无需GPU,安装简单,支持归一化坐标
+- **ControlNet兼容性**: 输出格式与ControlNet OpenPose兼容
+- **GitHub参考**:
+  - https://github.com/rohitshetty/pose-overlay (Python toolkit for video pose estimation with MediaPipe)
+  - https://github.com/HeleenaRobert/human-pose-estimation (Human pose estimation using MediaPipe Pose & OpenCV)
+  - https://github.com/venkatesh-madanwale/Skeleton-Detection (MediaPipe landmark detection)
+
+### 2. OpenPose(参考)
+- **来源**: CMU开发,SD WebUI内置
+- **关键点数量**: 18个(基础版)
+- **SD WebUI预处理器**:
+  - openpose:基础关键点(眼、鼻、脖子、肩、手腕、膝盖、脚踝)
+  - openpose_face:openpose + 面部细节
+  - openpose_faceonly:仅面部细节
+  - openpose_hand:openpose + 手和手指
+  - openpose_full:提取以上所有信息
+  - dw_openpose_full:openpose_full的加强版
+- **控制模型**: control_v11p_sd15_openpose.pth
+- **来源URL**: https://www.xiaohongshu.com/explore/66d79b89000000000c019bb1
+
+---
+
+## 二、色彩提取工具
+
+### 1. scikit-learn KMeans(选用)
+- **用途**: 提取图像主色调
+- **方法**: K均值聚类,K=6(全局)或K=4(背景)
+- **输出**: RGB/HEX/HSL多种格式 + 比例
+- **GitHub参考**:
+  - https://github.com/Niteshmeena9672/ColorExtraction-Using-KMeans-Clustering (Flask-based dominant color extraction)
+  - https://github.com/nehamehta2110/Dominant-Color-extraction-Kmeans (K-Means dominant palette colors)
+  - https://github.com/kwizatz-haderach/ImageColorExtraction (scikit-learn KMeans color extraction)
+
+### 2. T2I-Adapter Color(参考)
+- **用途**: ControlNet颜色控制
+- **预处理器**: t2iaColor(色彩像素化)
+- **功能**: 提取参考图的色彩分布并应用到生成图
+- **来源URL**: 
+  - https://www.xiaohongshu.com/explore/67a487fa000000002902a16f (ControlNet控制类型)
+  - https://www.xiaohongshu.com/explore/660a772a000000001a0173cd (T2I-Adapter用法)
+  - https://www.xiaohongshu.com/explore/68f5b3ec0000000005032e0c (T2I-Adapter-SDXL)
+
+---
+
+## 三、视觉语言模型(VLM)
+
+### 1. Google Gemini 2.0 Flash(选用)
+- **用途**: 图像语义分析,结构化JSON输出
+- **优势**: 2024年最新模型,多模态理解能力强,中文语境理解好
+- **调用方式**: OpenRouter API
+- **应用场景**: 人物外观描述、构图分析、光影分析、画中画内容分析
+
+### 2. 其他VLM参考
+- **VLM-FO1**: Om AI Lab发布,专注于精准物体识别和区域理解
+  - 来源: https://www.xiaohongshu.com/explore/68a2c34d000000001d01bde8
+- **ATPrompt**: 属性锚定提示,提升VLM泛化能力
+  - 来源: https://www.xiaohongshu.com/explore/687a4389000000000d0269b0
+
+---
+
+## 四、生成模型控制工具
+
+### 1. ControlNet(核心控制框架)
+- **用途**: 结构约束,控制姿态/构图/色彩
+- **关键经验**:
+  - 写实摄影用 Pose+Depth 双控就够了,不要开3个
+  - strength参数:0.4~0.6保留结构+允许创作(最常用)
+  - 图生图流程:参考图 → LLM反推语义 → ControlNet锁结构 → 重新生成
+- **来源URL**: https://www.xiaohongshu.com/explore/697081e0000000000c037f22 (ComfyUI ControlNet学习)
+
+### 2. Flux + ControlNet(最新方案,2025年)
+- **用途**: 高质量写实人像生成
+- **工作流**: Flux生成初始帧 → ControlNet Tile调整布局和姿势 → 姿势网格参考
+- **优势**: 比传统SD方案更精确地指定角色的姿势和布局,提升一致性
+- **适用场景**: 人物、动物、风景等多种主题
+- **来源URL**: 
+  - https://www.xiaohongshu.com/explore/66f8a185000000001902c68f (Flux+ControlNet一致帧)
+  - https://www.xiaohongshu.com/explore/68954436000000002501580b (Flux+CN人物一致性)
+
+### 3. Flux + Redux(人物一致性方案)
+- **用途**: 迁移人物服饰和面部特征
+- **工作流**: Flux+CN生图(控制姿态)→ Redux迁移人物特征 → 重绘细节
+- **效果**: 白底图效果最好,带背景的人物效果需要抽卡
+- **来源URL**: https://www.xiaohongshu.com/explore/68954436000000002501580b
+
+### 4. ComfyUI + OpenPose(姿态控制)
+- **用途**: 通过OpenPose控制人物姿态
+- **节点**: AIO Aux Preprocessor(通用预处理节点)
+- **关键经验**: 
+  - 提示词中不要出现跟姿态相冲突的内容
+  - 适用于人像摄影、IP角色设计、产品广告、布景、构图
+- **来源URL**: 
+  - https://www.xiaohongshu.com/explore/6731c5a7000000021b01a642 (ComfyUI姿态控制)
+  - https://www.xiaohongshu.com/explore/69610ae9000000001a026d9f (ComfyUI+Pose实战)
+
+### 5. FLUX模型 + 深度图+线稿双控(模特生成)
+- **用途**: 高级感产品模特图生成
+- **工作流**: 实拍姿势图 → 深度图+线稿同时启用 → 文生图 → 图生图融合(重绘幅度0.3-0.4)
+- **关键经验**: 
+  - 深度图+线稿同时启用效果最佳
+  - 图生图重绘幅度0.3-0.4最佳
+  - 建议先跑3-5组测试效果
+- **来源URL**: https://www.xiaohongshu.com/explore/67fbdf5e000000000903905b
+
+---
+
+## 五、人物一致性工具
+
+### 1. IP-Adapter(图像提示适配器)
+- **用途**: 使用图片作为生成图像的提示词,复制参考图的风格/构图/人物特征
+- **版本**: ip-adapter_clip_h 迁移性最强
+- **来源URL**: 
+  - https://www.xiaohongshu.com/explore/65f814b6000000000d00f30c (IP-Adapter教程)
+  - https://www.xiaohongshu.com/explore/66f6389200000002190261a5 (IP-Adapter图片风格提示)
+
+### 2. ACE + Redux(模特特征迁移)
+- **用途**: 迁移模特特征(服饰+面部)
+- **工作流**: OpenPose控制姿态 + ACE+Redux迁移特征 + 拼图参照
+- **来源URL**: https://www.xiaohongshu.com/explore/67c7ba710000000212015d0c
+
+### 3. 提示词人物一致性(最简单方案)
+- **用途**: 通过精确提示词保持人物一致性
+- **方法**: 图生图+提示词融合,在对话框内实现
+- **关键提示词结构**: 人物描述(发型/服装/配饰)+ 场景描述 + 质量词
+- **来源URL**: https://www.xiaohongshu.com/explore/6975c916000000001a036583
+
+---
+
+## 六、工具选择总结
+
+| 工具 | 用途 | 选用理由 | 状态 |
+|------|------|----------|------|
+| MediaPipe Pose | 姿态骨骼提取 | 33关键点,Python友好,无需GPU | ✅ 已选用 |
+| scikit-learn KMeans | 色彩调色板提取 | 标准方法,稳定可重复 | ✅ 已选用 |
+| Google Gemini 2.0 Flash | VLM语义分析 | 最新模型,结构化输出 | ✅ 已选用 |
+| ControlNet OpenPose | 生成时姿态控制 | 标准控制信号,直接可用 | 📋 还原时使用 |
+| T2I-Adapter Color | 生成时色彩控制 | 色彩分布控制 | 📋 还原时使用 |
+| Flux + Redux | 人物一致性 | 服饰+面部特征迁移 | 📋 还原时使用 |
+| ComfyUI | 工作流编排 | 节点化工作流,灵活组合 | 📋 还原时使用 |
+
+---
+
+*报告更新时间: 2026年3月4日*

+ 234 - 0
examples/find knowledge/scripts/extract_colors.py

@@ -0,0 +1,234 @@
+#!/usr/bin/env python3
+"""
+提取色彩调色板 - 使用KMeans聚类
+维度2: 全局色彩调色板 (color_palette)
+维度7: 背景环境色彩 (background_color)
+"""
+
+import numpy as np
+import json
+import os
+from PIL import Image, ImageDraw
+from sklearn.cluster import KMeans
+import colorsys
+
+def rgb_to_hsl(r, g, b):
+    """RGB转HSL"""
+    r, g, b = r/255.0, g/255.0, b/255.0
+    h, l, s = colorsys.rgb_to_hls(r, g, b)
+    return {
+        "h": round(h * 360, 1),
+        "s": round(s * 100, 1),
+        "l": round(l * 100, 1)
+    }
+
+def rgb_to_hex(r, g, b):
+    return f"#{int(r):02x}{int(g):02x}{int(b):02x}"
+
+def extract_palette(pixels, n_colors=6, img_id="", label="global"):
+    """从像素数组提取主色调"""
+    # 降采样加速
+    if len(pixels) > 10000:
+        idx = np.random.choice(len(pixels), 10000, replace=False)
+        pixels_sample = pixels[idx]
+    else:
+        pixels_sample = pixels
+    
+    kmeans = KMeans(n_clusters=n_colors, random_state=42, n_init=10)
+    kmeans.fit(pixels_sample)
+    
+    # 计算每个聚类的比例
+    labels = kmeans.predict(pixels_sample)
+    unique, counts = np.unique(labels, return_counts=True)
+    total = len(labels)
+    
+    colors = []
+    for cluster_id, count in zip(unique, counts):
+        center = kmeans.cluster_centers_[cluster_id]
+        r, g, b = int(center[0]), int(center[1]), int(center[2])
+        proportion = round(count / total, 3)
+        
+        colors.append({
+            "rank": len(colors) + 1,
+            "rgb": {"r": r, "g": g, "b": b},
+            "hex": rgb_to_hex(r, g, b),
+            "hsl": rgb_to_hsl(r, g, b),
+            "proportion": proportion
+        })
+    
+    # 按比例排序
+    colors.sort(key=lambda x: x["proportion"], reverse=True)
+    for i, c in enumerate(colors):
+        c["rank"] = i + 1
+    
+    return colors
+
+def create_palette_image(colors, width=600, height=100, output_path=None):
+    """创建色彩调色板可视化图"""
+    img = Image.new('RGB', (width, height), (255, 255, 255))
+    draw = ImageDraw.Draw(img)
+    
+    x = 0
+    for color in colors:
+        r, g, b = color["rgb"]["r"], color["rgb"]["g"], color["rgb"]["b"]
+        block_width = int(color["proportion"] * width)
+        if block_width > 0:
+            draw.rectangle([x, 0, x + block_width, height], fill=(r, g, b))
+            x += block_width
+    
+    if output_path:
+        img.save(output_path)
+    return img
+
+def get_background_pixels(img_array, threshold_top=0.3, threshold_bottom=0.7):
+    """提取背景区域像素(上部和左侧区域,排除人物主体区域)"""
+    h, w = img_array.shape[:2]
+    
+    # 取图片上部(天空/树木区域)和左侧(背景区域)
+    # 基于制作表分析:背景主要在左侧和上方
+    top_region = img_array[:int(h * 0.4), :, :]  # 上40%
+    left_region = img_array[:, :int(w * 0.35), :]  # 左35%
+    
+    # 合并背景区域
+    bg_pixels = np.vstack([
+        top_region.reshape(-1, 3),
+        left_region.reshape(-1, 3)
+    ])
+    
+    return bg_pixels
+
+def main():
+    input_dir = "input"
+    
+    # 全局色彩调色板
+    palette_dir = "output/features/color_palette"
+    # 背景色彩
+    bg_dir = "output/features/background_color"
+    
+    palette_mappings = []
+    bg_mappings = []
+    
+    # 段落对应关系
+    segment_map = {
+        "img_1": {"global_seg": "段落1", "bg_seg": "段落1.3"},
+        "img_2": {"global_seg": "段落2", "bg_seg": "段落2.3"},
+        "img_3": {"global_seg": "段落3", "bg_seg": "段落3.3"},
+        "img_4": {"global_seg": "段落4", "bg_seg": "段落4.3"},
+        "img_5": {"global_seg": "段落5", "bg_seg": "段落5.3"},
+        "img_6": {"global_seg": "段落6", "bg_seg": "段落6.3"},
+        "img_7": {"global_seg": "段落7", "bg_seg": "段落7.3"},
+        "img_8": {"global_seg": "段落8", "bg_seg": "段落8.3"},
+        "img_9": {"global_seg": "段落9", "bg_seg": "段落9.2"},
+    }
+    
+    for i in range(1, 10):
+        img_id = f"img_{i}"
+        image_path = os.path.join(input_dir, f"{img_id}.jpg")
+        
+        if not os.path.exists(image_path):
+            continue
+        
+        print(f"处理 {img_id}...")
+        
+        img = Image.open(image_path).convert('RGB')
+        img_array = np.array(img)
+        all_pixels = img_array.reshape(-1, 3).astype(float)
+        
+        # === 维度2:全局色彩调色板 ===
+        global_colors = extract_palette(all_pixels, n_colors=6, img_id=img_id, label="global")
+        
+        # 保存JSON
+        global_json_path = os.path.join(palette_dir, f"{img_id}_color_palette.json")
+        with open(global_json_path, 'w', encoding='utf-8') as f:
+            json.dump({
+                "image_id": img_id,
+                "type": "global_color_palette",
+                "n_colors": 6,
+                "colors": global_colors,
+                "description": "全局主色调,按比例排序"
+            }, f, ensure_ascii=False, indent=2)
+        
+        # 保存可视化图
+        global_img_path = os.path.join(palette_dir, f"{img_id}_color_palette.png")
+        create_palette_image(global_colors, output_path=global_img_path)
+        
+        print(f"  ✓ 全局调色板: {[c['hex'] for c in global_colors[:3]]}")
+        
+        seg_info = segment_map.get(img_id, {})
+        palette_mappings.append({
+            "file": f"{img_id}_color_palette.png",
+            "json_file": f"{img_id}_color_palette.json",
+            "source_image": f"input/{img_id}.jpg",
+            "segment": seg_info.get("global_seg", f"段落{i}"),
+            "category": "形式",
+            "feature": "整体色彩调色板(白绿配色)",
+            "highlight_cluster": "cluster_3",
+            "top_colors": [c["hex"] for c in global_colors[:3]]
+        })
+        
+        # === 维度7:背景环境色彩 ===
+        bg_pixels = get_background_pixels(img_array)
+        bg_colors = extract_palette(bg_pixels.astype(float), n_colors=4, img_id=img_id, label="background")
+        
+        # 保存JSON
+        bg_json_path = os.path.join(bg_dir, f"{img_id}_background_color.json")
+        with open(bg_json_path, 'w', encoding='utf-8') as f:
+            json.dump({
+                "image_id": img_id,
+                "type": "background_color_palette",
+                "n_colors": 4,
+                "colors": bg_colors,
+                "extraction_region": "上40%区域 + 左35%区域",
+                "description": "背景区域主色调(自然绿色调)"
+            }, f, ensure_ascii=False, indent=2)
+        
+        # 保存可视化图
+        bg_img_path = os.path.join(bg_dir, f"{img_id}_background_color.png")
+        create_palette_image(bg_colors, width=400, output_path=bg_img_path)
+        
+        print(f"  ✓ 背景色彩: {[c['hex'] for c in bg_colors[:3]]}")
+        
+        bg_mappings.append({
+            "file": f"{img_id}_background_color.png",
+            "json_file": f"{img_id}_background_color.json",
+            "source_image": f"input/{img_id}.jpg",
+            "segment": seg_info.get("bg_seg", ""),
+            "category": "实质",
+            "feature": "自然背景色彩(草地/树木绿色调)",
+            "element_id": "元素3",
+            "highlight_cluster": "cluster_3",
+            "top_colors": [c["hex"] for c in bg_colors[:3]]
+        })
+    
+    # 保存mapping.json for color_palette
+    palette_mapping = {
+        "dimension": "color_palette",
+        "description": "全局色彩调色板,使用KMeans聚类提取6个主色调",
+        "tool": "scikit-learn KMeans",
+        "format": {
+            "palette_image": "PNG,色块按比例排列,宽600px高100px",
+            "palette_json": "JSON,包含6个主色调的RGB/HEX/HSL值和比例"
+        },
+        "mappings": palette_mappings
+    }
+    with open(os.path.join(palette_dir, "mapping.json"), 'w', encoding='utf-8') as f:
+        json.dump(palette_mapping, f, ensure_ascii=False, indent=2)
+    
+    # 保存mapping.json for background_color
+    bg_mapping = {
+        "dimension": "background_color",
+        "description": "背景区域色彩调色板,提取图片上部和左侧区域的主色调",
+        "tool": "scikit-learn KMeans",
+        "format": {
+            "bg_image": "PNG,色块按比例排列,宽400px高100px",
+            "bg_json": "JSON,包含4个主色调的RGB/HEX/HSL值和比例"
+        },
+        "mappings": bg_mappings
+    }
+    with open(os.path.join(bg_dir, "mapping.json"), 'w', encoding='utf-8') as f:
+        json.dump(bg_mapping, f, ensure_ascii=False, indent=2)
+    
+    print("\n✓ 色彩调色板提取完成")
+
+if __name__ == "__main__":
+    main()

+ 187 - 0
examples/find knowledge/scripts/extract_palette_texture.py

@@ -0,0 +1,187 @@
+#!/usr/bin/env python3
+"""
+提取调色板颜料色彩 - 维度3: palette_texture_colors
+针对img_1, img_5, img_6(cluster_2_texture聚类图片)
+"""
+
+import numpy as np
+import json
+import os
+from PIL import Image, ImageDraw
+from sklearn.cluster import KMeans
+import colorsys
+
+def rgb_to_hsl(r, g, b):
+    r, g, b = r/255.0, g/255.0, b/255.0
+    h, l, s = colorsys.rgb_to_hls(r, g, b)
+    return {"h": round(h*360,1), "s": round(s*100,1), "l": round(l*100,1)}
+
+def rgb_to_hex(r, g, b):
+    return f"#{int(r):02x}{int(g):02x}{int(b):02x}"
+
+def is_vivid_color(r, g, b, min_saturation=0.2, min_value=0.15):
+    """判断是否为鲜艳颜色(非白色/黑色/灰色)"""
+    r_n, g_n, b_n = r/255.0, g/255.0, b/255.0
+    h, s, v = colorsys.rgb_to_hsv(r_n, g_n, b_n)
+    return s > min_saturation and v > min_value
+
+def extract_vivid_colors(pixels, n_colors=8):
+    """提取鲜艳颜色(过滤白色/黑色/灰色)"""
+    # 过滤出鲜艳像素
+    vivid_mask = np.array([is_vivid_color(p[0], p[1], p[2]) for p in pixels])
+    vivid_pixels = pixels[vivid_mask]
+    
+    if len(vivid_pixels) < 100:
+        # 如果鲜艳像素太少,使用所有像素
+        vivid_pixels = pixels
+    
+    # 降采样
+    if len(vivid_pixels) > 5000:
+        idx = np.random.choice(len(vivid_pixels), 5000, replace=False)
+        vivid_pixels = vivid_pixels[idx]
+    
+    kmeans = KMeans(n_clusters=min(n_colors, len(vivid_pixels)), random_state=42, n_init=10)
+    kmeans.fit(vivid_pixels)
+    
+    labels = kmeans.predict(vivid_pixels)
+    unique, counts = np.unique(labels, return_counts=True)
+    total = len(labels)
+    
+    colors = []
+    for cluster_id, count in zip(unique, counts):
+        center = kmeans.cluster_centers_[cluster_id]
+        r, g, b = int(center[0]), int(center[1]), int(center[2])
+        proportion = round(count / total, 3)
+        
+        colors.append({
+            "rank": len(colors) + 1,
+            "rgb": {"r": r, "g": g, "b": b},
+            "hex": rgb_to_hex(r, g, b),
+            "hsl": rgb_to_hsl(r, g, b),
+            "proportion": proportion,
+            "is_vivid": is_vivid_color(r, g, b)
+        })
+    
+    colors.sort(key=lambda x: x["proportion"], reverse=True)
+    for i, c in enumerate(colors):
+        c["rank"] = i + 1
+    
+    return colors
+
+def create_color_swatches(colors, swatch_size=80, output_path=None):
+    """创建色块展示图"""
+    n = len(colors)
+    img = Image.new('RGB', (n * swatch_size, swatch_size), (240, 240, 240))
+    draw = ImageDraw.Draw(img)
+    
+    for i, color in enumerate(colors):
+        r, g, b = color["rgb"]["r"], color["rgb"]["g"], color["rgb"]["b"]
+        x = i * swatch_size
+        draw.rectangle([x, 0, x + swatch_size, swatch_size], fill=(r, g, b))
+    
+    if output_path:
+        img.save(output_path)
+    return img
+
+def extract_palette_region(img_array, img_id):
+    """
+    提取调色板区域的颜料颜色
+    基于制作表分析,调色板通常在人物手持区域
+    使用全图鲜艳颜色提取来模拟颜料色彩
+    """
+    h, w = img_array.shape[:2]
+    
+    # 对于img_1, img_5, img_6,调色板在画面中央偏右下区域
+    # 使用全图鲜艳颜色提取
+    all_pixels = img_array.reshape(-1, 3).astype(float)
+    
+    return extract_vivid_colors(all_pixels, n_colors=8)
+
+def main():
+    input_dir = "input"
+    output_dir = "output/features/palette_texture_colors"
+    
+    # cluster_2_texture的图片:img_1, img_5, img_6
+    # 其他图片也有调色板,但cluster_2_texture专注于这三张
+    target_images = {
+        "img_1": {"segment": "段落1.1.2.3", "note": "调色板上的颜料(主要特写)"},
+        "img_5": {"segment": "段落5.1.3.1", "note": "颜料特写(调色板主体)"},
+        "img_6": {"segment": "段落6.1.3.3", "note": "调色板颜料(背部特写中可见)"},
+    }
+    
+    # 其他有调色板的图片
+    other_images = {
+        "img_2": {"segment": "段落2.1.2.3", "note": "调色板颜料"},
+        "img_3": {"segment": "段落3.1.2.2", "note": "调色板颜料"},
+        "img_4": {"segment": "段落4.1.2.3", "note": "调色板颜料"},
+        "img_8": {"segment": "段落8.1.2.3", "note": "调色板颜料"},
+    }
+    
+    all_images = {**target_images, **other_images}
+    mappings = []
+    
+    for img_id, info in all_images.items():
+        image_path = os.path.join(input_dir, f"{img_id}.jpg")
+        if not os.path.exists(image_path):
+            continue
+        
+        print(f"处理 {img_id}...")
+        
+        img = Image.open(image_path).convert('RGB')
+        img_array = np.array(img)
+        
+        colors = extract_palette_region(img_array, img_id)
+        
+        # 保存JSON
+        json_path = os.path.join(output_dir, f"{img_id}_palette_texture.json")
+        with open(json_path, 'w', encoding='utf-8') as f:
+            json.dump({
+                "image_id": img_id,
+                "type": "palette_texture_colors",
+                "n_colors": len(colors),
+                "colors": colors,
+                "description": "调色板颜料色彩(鲜艳色彩提取,过滤白色/黑色/灰色)",
+                "note": info["note"]
+            }, f, ensure_ascii=False, indent=2)
+        
+        # 保存可视化图
+        img_path = os.path.join(output_dir, f"{img_id}_palette_texture.png")
+        create_color_swatches(colors, output_path=img_path)
+        
+        vivid_colors = [c for c in colors if c.get("is_vivid", False)]
+        print(f"  ✓ 提取 {len(colors)} 种颜色,其中 {len(vivid_colors)} 种鲜艳色")
+        print(f"  颜色: {[c['hex'] for c in colors[:5]]}")
+        
+        is_primary = img_id in target_images
+        mappings.append({
+            "file": f"{img_id}_palette_texture.png",
+            "json_file": f"{img_id}_palette_texture.json",
+            "source_image": f"input/{img_id}.jpg",
+            "segment": info["segment"],
+            "category": "实质",
+            "feature": "调色板颜料色彩(Impasto油画颜料)",
+            "element_id": "元素2",
+            "highlight_cluster": "cluster_2_texture" if is_primary else None,
+            "is_primary_cluster_image": is_primary
+        })
+    
+    # 保存mapping.json
+    mapping = {
+        "dimension": "palette_texture_colors",
+        "description": "调色板上油画颜料的色彩提取,使用KMeans聚类提取鲜艳颜色(过滤白/黑/灰)",
+        "tool": "scikit-learn KMeans + HSV饱和度过滤",
+        "format": {
+            "texture_image": "PNG,色块展示图,每个色块80x80px",
+            "texture_json": "JSON,包含颜料颜色的RGB/HEX/HSL值和比例"
+        },
+        "primary_images": ["img_1", "img_5", "img_6"],
+        "mappings": mappings
+    }
+    
+    with open(os.path.join(output_dir, "mapping.json"), 'w', encoding='utf-8') as f:
+        json.dump(mapping, f, ensure_ascii=False, indent=2)
+    
+    print("\n✓ 颜料色彩提取完成")
+
+if __name__ == "__main__":
+    main()

+ 184 - 0
examples/find knowledge/scripts/extract_pose.py

@@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+"""
+提取人体姿态骨骼图 - 使用MediaPipe Pose
+输出:每张图片的骨骼关键点图(PNG)+ 关键点坐标(JSON)
+"""
+
+import mediapipe as mp
+import cv2
+import numpy as np
+import json
+import os
+from PIL import Image
+
+mp_pose = mp.solutions.pose
+mp_drawing = mp.solutions.drawing_utils
+mp_drawing_styles = mp.solutions.drawing_styles
+
+def extract_pose(image_path, output_dir, img_id):
+    """提取单张图片的姿态骨骼"""
+    img = cv2.imread(image_path)
+    if img is None:
+        print(f"无法读取图片: {image_path}")
+        return None
+    
+    h, w = img.shape[:2]
+    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    
+    with mp_pose.Pose(
+        static_image_mode=True,
+        model_complexity=2,
+        enable_segmentation=False,
+        min_detection_confidence=0.5
+    ) as pose:
+        results = pose.process(img_rgb)
+        
+        if not results.pose_landmarks:
+            print(f"  未检测到姿态: {img_id}")
+            return None
+        
+        # 创建黑色背景的骨骼图
+        skeleton_img = np.zeros((h, w, 3), dtype=np.uint8)
+        
+        # 绘制骨骼连接线(白色)
+        mp_drawing.draw_landmarks(
+            skeleton_img,
+            results.pose_landmarks,
+            mp_pose.POSE_CONNECTIONS,
+            landmark_drawing_spec=mp_drawing.DrawingSpec(
+                color=(255, 255, 255), thickness=3, circle_radius=5
+            ),
+            connection_drawing_spec=mp_drawing.DrawingSpec(
+                color=(200, 200, 200), thickness=2
+            )
+        )
+        
+        # 保存骨骼图
+        skeleton_path = os.path.join(output_dir, f"{img_id}_pose_skeleton.png")
+        cv2.imwrite(skeleton_path, skeleton_img)
+        
+        # 提取关键点坐标
+        landmarks_data = {}
+        landmark_names = [lm.name for lm in mp_pose.PoseLandmark]
+        
+        for i, landmark in enumerate(results.pose_landmarks.landmark):
+            name = landmark_names[i] if i < len(landmark_names) else f"landmark_{i}"
+            landmarks_data[name] = {
+                "x": round(landmark.x, 4),  # 归一化坐标 [0,1]
+                "y": round(landmark.y, 4),
+                "z": round(landmark.z, 4),
+                "visibility": round(landmark.visibility, 4),
+                "pixel_x": int(landmark.x * w),
+                "pixel_y": int(landmark.y * h)
+            }
+        
+        # 保存关键点JSON
+        json_path = os.path.join(output_dir, f"{img_id}_pose_keypoints.json")
+        with open(json_path, 'w', encoding='utf-8') as f:
+            json.dump({
+                "image_id": img_id,
+                "image_size": {"width": w, "height": h},
+                "landmarks": landmarks_data,
+                "skeleton_image": f"{img_id}_pose_skeleton.png"
+            }, f, ensure_ascii=False, indent=2)
+        
+        print(f"  ✓ {img_id}: 骨骼图已保存 -> {skeleton_path}")
+        return landmarks_data
+
+def main():
+    input_dir = "input"
+    output_dir = "output/features/pose_skeleton"
+    
+    results_summary = []
+    
+    for i in range(1, 10):
+        img_id = f"img_{i}"
+        image_path = os.path.join(input_dir, f"{img_id}.jpg")
+        
+        if not os.path.exists(image_path):
+            print(f"图片不存在: {image_path}")
+            continue
+        
+        print(f"处理 {img_id}...")
+        landmarks = extract_pose(image_path, output_dir, img_id)
+        
+        if landmarks:
+            results_summary.append({
+                "image_id": img_id,
+                "detected": True,
+                "keypoints_file": f"{img_id}_pose_keypoints.json",
+                "skeleton_file": f"{img_id}_pose_skeleton.png"
+            })
+        else:
+            results_summary.append({
+                "image_id": img_id,
+                "detected": False
+            })
+    
+    # 保存mapping.json
+    mapping = {
+        "dimension": "pose_skeleton",
+        "description": "人体姿态骨骼关键点图,使用MediaPipe Pose提取33个关键点",
+        "tool": "MediaPipe Pose v0.10.9",
+        "format": {
+            "skeleton_image": "PNG,黑色背景,白色骨骼连线",
+            "keypoints_json": "JSON,包含33个关键点的归一化坐标和像素坐标"
+        },
+        "mappings": []
+    }
+    
+    # 根据制作表结构建立对应关系
+    pose_segment_map = {
+        "img_1": [
+            {"segment": "段落1.1", "category": "实质", "feature": "女性人物姿态", "element": "元素1"},
+        ],
+        "img_2": [
+            {"segment": "段落2.1", "category": "实质", "feature": "女性人物姿态", "element": "元素1"},
+        ],
+        "img_3": [
+            {"segment": "段落3.1", "category": "实质", "feature": "女性人物姿态(跪姿)", "element": "元素1"},
+        ],
+        "img_4": [
+            {"segment": "段落4.1", "category": "实质", "feature": "女性人物姿态(侧身)", "element": "元素1"},
+        ],
+        "img_5": [
+            {"segment": "段落5.1", "category": "实质", "feature": "女性人物姿态(手臂特写)", "element": "元素1"},
+        ],
+        "img_6": [
+            {"segment": "段落6.1", "category": "实质", "feature": "女性人物姿态(背部特写)", "element": "元素1"},
+        ],
+        "img_7": [
+            {"segment": "段落7.1", "category": "实质", "feature": "女性人物姿态(侧颜/嗅花)", "element": "元素1"},
+        ],
+        "img_8": [
+            {"segment": "段落8.1", "category": "实质", "feature": "女性人物姿态(侧身)", "element": "元素1"},
+        ],
+        "img_9": [
+            {"segment": "段落9.1", "category": "实质", "feature": "女性人物姿态(背影远景)", "element": "元素1"},
+        ],
+    }
+    
+    for result in results_summary:
+        img_id = result["image_id"]
+        if result["detected"]:
+            segments = pose_segment_map.get(img_id, [])
+            for seg in segments:
+                mapping["mappings"].append({
+                    "file": result["skeleton_file"],
+                    "keypoints_file": result["keypoints_file"],
+                    "source_image": f"input/{img_id}.jpg",
+                    "segment": seg["segment"],
+                    "category": seg["category"],
+                    "feature": seg["feature"],
+                    "element_id": seg["element"]
+                })
+    
+    mapping_path = os.path.join(output_dir, "mapping.json")
+    with open(mapping_path, 'w', encoding='utf-8') as f:
+        json.dump(mapping, f, ensure_ascii=False, indent=2)
+    
+    print(f"\n✓ mapping.json 已保存: {mapping_path}")
+    print(f"✓ 处理完成: {len([r for r in results_summary if r['detected']])} / {len(results_summary)} 张图片检测到姿态")
+
+if __name__ == "__main__":
+    main()

+ 192 - 0
examples/find knowledge/scripts/extract_pose_v2.py

@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+"""
+提取人体姿态骨骼图 v2 - 使用MediaPipe Pose
+针对不同图片使用不同参数
+"""
+
+import mediapipe as mp
+import cv2
+import numpy as np
+import json
+import os
+
+mp_pose = mp.solutions.pose
+mp_drawing = mp.solutions.drawing_utils
+
+def extract_pose(image_path, output_dir, img_id, complexity=2, conf=0.5):
+    """提取单张图片的姿态骨骼"""
+    img = cv2.imread(image_path)
+    if img is None:
+        return None
+    
+    h, w = img.shape[:2]
+    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    
+    with mp_pose.Pose(
+        static_image_mode=True,
+        model_complexity=complexity,
+        enable_segmentation=False,
+        min_detection_confidence=conf
+    ) as pose:
+        results = pose.process(img_rgb)
+        
+        if not results.pose_landmarks:
+            return None
+        
+        # 创建黑色背景的骨骼图
+        skeleton_img = np.zeros((h, w, 3), dtype=np.uint8)
+        
+        # 绘制骨骼
+        mp_drawing.draw_landmarks(
+            skeleton_img,
+            results.pose_landmarks,
+            mp_pose.POSE_CONNECTIONS,
+            landmark_drawing_spec=mp_drawing.DrawingSpec(
+                color=(255, 255, 255), thickness=4, circle_radius=6
+            ),
+            connection_drawing_spec=mp_drawing.DrawingSpec(
+                color=(180, 180, 180), thickness=3
+            )
+        )
+        
+        # 保存骨骼图
+        skeleton_path = os.path.join(output_dir, f"{img_id}_pose_skeleton.png")
+        cv2.imwrite(skeleton_path, skeleton_img)
+        
+        # 提取关键点坐标
+        landmark_names = [lm.name for lm in mp_pose.PoseLandmark]
+        landmarks_data = {}
+        
+        for i, landmark in enumerate(results.pose_landmarks.landmark):
+            name = landmark_names[i] if i < len(landmark_names) else f"landmark_{i}"
+            landmarks_data[name] = {
+                "x": round(landmark.x, 4),
+                "y": round(landmark.y, 4),
+                "z": round(landmark.z, 4),
+                "visibility": round(landmark.visibility, 4),
+                "pixel_x": int(landmark.x * w),
+                "pixel_y": int(landmark.y * h)
+            }
+        
+        # 保存关键点JSON
+        json_path = os.path.join(output_dir, f"{img_id}_pose_keypoints.json")
+        with open(json_path, 'w', encoding='utf-8') as f:
+            json.dump({
+                "image_id": img_id,
+                "image_size": {"width": w, "height": h},
+                "model_complexity": complexity,
+                "detection_confidence": conf,
+                "landmarks": landmarks_data,
+                "skeleton_image": f"{img_id}_pose_skeleton.png"
+            }, f, ensure_ascii=False, indent=2)
+        
+        return landmarks_data
+
+def main():
+    input_dir = "input"
+    output_dir = "output/features/pose_skeleton"
+    
+    # 针对不同图片的参数配置
+    # img_5是手臂特写,无法检测全身姿态,记录为局部
+    configs = {
+        "img_1": {"complexity": 2, "conf": 0.5},
+        "img_2": {"complexity": 0, "conf": 0.1},
+        "img_3": {"complexity": 2, "conf": 0.5},
+        "img_4": {"complexity": 2, "conf": 0.5},
+        "img_5": None,  # 手臂特写,无法检测全身
+        "img_6": {"complexity": 0, "conf": 0.1},
+        "img_7": {"complexity": 0, "conf": 0.1},
+        "img_8": {"complexity": 2, "conf": 0.5},
+        "img_9": {"complexity": 0, "conf": 0.1},
+    }
+    
+    results_summary = []
+    
+    for i in range(1, 10):
+        img_id = f"img_{i}"
+        image_path = os.path.join(input_dir, f"{img_id}.jpg")
+        
+        if not os.path.exists(image_path):
+            continue
+        
+        config = configs.get(img_id)
+        print(f"处理 {img_id}...", end=" ")
+        
+        if config is None:
+            print("跳过(局部特写,无法检测全身姿态)")
+            results_summary.append({"image_id": img_id, "detected": False, "reason": "partial_view"})
+            continue
+        
+        landmarks = extract_pose(image_path, output_dir, img_id, 
+                                  config["complexity"], config["conf"])
+        
+        if landmarks:
+            print(f"✓ 骨骼图已保存")
+            results_summary.append({
+                "image_id": img_id,
+                "detected": True,
+                "keypoints_file": f"{img_id}_pose_keypoints.json",
+                "skeleton_file": f"{img_id}_pose_skeleton.png"
+            })
+        else:
+            print("✗ 未检测到姿态")
+            results_summary.append({"image_id": img_id, "detected": False, "reason": "not_detected"})
+    
+    # 建立mapping.json
+    pose_segment_map = {
+        "img_1": {"segment": "段落1.1", "feature": "女性人物姿态(站立侧身作画)", "element": "元素1"},
+        "img_2": {"segment": "段落2.1", "feature": "女性人物姿态(背对镜头作画)", "element": "元素1"},
+        "img_3": {"segment": "段落3.1", "feature": "女性人物姿态(跪姿作画)", "element": "元素1"},
+        "img_4": {"segment": "段落4.1", "feature": "女性人物姿态(侧身面对画架)", "element": "元素1"},
+        "img_6": {"segment": "段落6.1", "feature": "女性人物姿态(背部特写作画)", "element": "元素1"},
+        "img_7": {"segment": "段落7.1", "feature": "女性人物姿态(侧颜嗅花)", "element": "元素1"},
+        "img_8": {"segment": "段落8.1", "feature": "女性人物姿态(侧身面对画架)", "element": "元素1"},
+        "img_9": {"segment": "段落9.1", "feature": "女性人物姿态(背影远景)", "element": "元素1"},
+    }
+    
+    mapping = {
+        "dimension": "pose_skeleton",
+        "description": "人体姿态骨骼关键点图,使用MediaPipe Pose提取33个关键点",
+        "tool": "MediaPipe Pose v0.10.9",
+        "format": {
+            "skeleton_image": "PNG,黑色背景,白色骨骼连线,尺寸与原图相同",
+            "keypoints_json": "JSON,包含33个关键点的归一化坐标(x,y,z)和像素坐标"
+        },
+        "mappings": []
+    }
+    
+    for result in results_summary:
+        img_id = result["image_id"]
+        if result["detected"]:
+            seg_info = pose_segment_map.get(img_id, {})
+            mapping["mappings"].append({
+                "file": result["skeleton_file"],
+                "keypoints_file": result["keypoints_file"],
+                "source_image": f"input/{img_id}.jpg",
+                "segment": seg_info.get("segment", ""),
+                "category": "实质",
+                "feature": seg_info.get("feature", ""),
+                "element_id": seg_info.get("element", "元素1"),
+                "highlight_cluster": "cluster_1"
+            })
+        else:
+            mapping["mappings"].append({
+                "file": None,
+                "source_image": f"input/{img_id}.jpg",
+                "segment": pose_segment_map.get(img_id, {}).get("segment", ""),
+                "category": "实质",
+                "feature": "局部特写,无法提取全身姿态",
+                "element_id": "元素1",
+                "note": result.get("reason", "")
+            })
+    
+    mapping_path = os.path.join(output_dir, "mapping.json")
+    with open(mapping_path, 'w', encoding='utf-8') as f:
+        json.dump(mapping, f, ensure_ascii=False, indent=2)
+    
+    detected = len([r for r in results_summary if r['detected']])
+    print(f"\n✓ 完成: {detected}/{len(results_summary)} 张图片检测到姿态")
+    print(f"✓ mapping.json 已保存")
+
+if __name__ == "__main__":
+    main()

+ 281 - 0
examples/find knowledge/scripts/extract_vlm_features.py

@@ -0,0 +1,281 @@
+#!/usr/bin/env python3
+"""
+使用VLM提取语义特征
+维度4: 人物外观语义描述 (person_appearance)
+维度5: 场景构图描述 (composition_layout)
+维度6: 光影氛围描述 (lighting_atmosphere)
+维度8: 画中画内容描述 (painting_content)
+"""
+
+import openai
+import base64
+import json
+import os
+import time
+
+client = openai.OpenAI(
+    api_key=os.environ.get('OPEN_ROUTER_API_KEY'),
+    base_url='https://openrouter.ai/api/v1'
+)
+
+def encode_image(image_path):
+    with open(image_path, 'rb') as f:
+        return base64.b64encode(f.read()).decode()
+
+def query_vlm(image_path, prompt, max_tokens=800):
+    """调用VLM分析图片"""
+    img_data = encode_image(image_path)
+    
+    response = client.chat.completions.create(
+        model='google/gemini-2.0-flash-001',
+        max_tokens=max_tokens,
+        messages=[{
+            'role': 'user',
+            'content': [
+                {'type': 'image_url', 'image_url': {'url': f'data:image/jpeg;base64,{img_data}'}},
+                {'type': 'text', 'text': prompt}
+            ]
+        }]
+    )
+    return response.choices[0].message.content
+
+def extract_json_from_response(text):
+    """从响应中提取JSON"""
+    start = text.find('{')
+    end = text.rfind('}') + 1
+    if start >= 0 and end > start:
+        try:
+            return json.loads(text[start:end])
+        except:
+            pass
+    return {"raw_text": text}
+
+# ============================================================
+# 维度4: 人物外观语义描述
+# ============================================================
+PERSON_APPEARANCE_PROMPT = """请分析图片中女性人物的外观特征,以JSON格式返回:
+{
+  "clothing": {
+    "type": "服装类型(如:白色长裙/白色上衣等)",
+    "color": "颜色",
+    "style": "风格描述",
+    "details": "细节描述(如:飘逸、垂坠感等)"
+  },
+  "hair": {
+    "color": "发色",
+    "length": "发长(长/中/短)",
+    "style": "发型(直发/卷发/盘发等)",
+    "details": "细节(如:散落肩上、发梢微卷等)"
+  },
+  "accessories": {
+    "earrings": "耳饰描述(无则填null)",
+    "necklace": "项链描述(无则填null)",
+    "bracelet": "手镯描述(无则填null)",
+    "other": "其他配饰"
+  },
+  "skin": "肤色描述",
+  "overall_impression": "整体气质描述(50字以内)",
+  "generation_prompt": "用于AI生成的英文提示词(描述人物外观,50词以内)"
+}
+只返回JSON,不要其他文字。"""
+
+# ============================================================
+# 维度5: 场景构图描述
+# ============================================================
+COMPOSITION_PROMPT = """请分析图片的构图特征,以JSON格式返回:
+{
+  "shot_type": "景别(特写/近景/中景/全景/远景)",
+  "camera_angle": "拍摄角度(正面/侧面/背面/俯视/仰视)",
+  "camera_position": "相机位置描述(如:人物右后方、侧前方等)",
+  "subject_position": {
+    "horizontal": "主体水平位置(左/中/右,及大致比例)",
+    "vertical": "主体垂直位置(上/中/下,及大致比例)"
+  },
+  "composition_type": "构图类型(如:三分法/对角线/引导线等)",
+  "visual_flow": "视线引导方向描述",
+  "depth_layers": "景深层次(前景/中景/背景的内容)",
+  "aspect_ratio": "画面比例",
+  "generation_prompt": "用于AI生成的英文构图提示词(30词以内)"
+}
+只返回JSON,不要其他文字。"""
+
+# ============================================================
+# 维度6: 光影氛围描述
+# ============================================================
+LIGHTING_PROMPT = """请分析图片的光影和氛围特征,以JSON格式返回:
+{
+  "light_type": "光照类型(自然光/逆光/侧光/散射光等)",
+  "light_direction": "光线方向(从哪个方向照射)",
+  "light_quality": "光线质感(柔和/硬朗/温暖/冷调等)",
+  "bokeh": {
+    "present": true或false,
+    "intensity": "虚化程度(轻微/中等/强烈)",
+    "description": "散景描述"
+  },
+  "color_temperature": "色温(暖/中性/冷)",
+  "overall_atmosphere": "整体氛围(如:梦幻/清新/温暖/浪漫等)",
+  "mood": "情绪感受(50字以内)",
+  "generation_prompt": "用于AI生成的英文光影提示词(如:soft natural backlight, bokeh background, dreamy atmosphere,30词以内)"
+}
+只返回JSON,不要其他文字。"""
+
+# ============================================================
+# 维度8: 画中画内容描述
+# ============================================================
+PAINTING_CONTENT_PROMPT = """请分析图片中画架上的画布内容(如果可见),以JSON格式返回:
+{
+  "canvas_visible": true或false,
+  "canvas_content": {
+    "subject": "画布上描绘的主题(如:人物/风景/空白等)",
+    "style": "绘画风格(如:油画/写实/印象派等)",
+    "colors": "主要颜色",
+    "completion": "完成程度(空白/草稿/半完成/完成)",
+    "description": "详细描述(50字以内)"
+  },
+  "reality_art_relationship": "现实场景与画作的关系(如:画中画/镜像/互文等)",
+  "narrative_value": "叙事价值描述(这个画中画结构如何增强画面叙事)",
+  "generation_prompt": "用于AI生成的英文提示词(描述画布内容,30词以内,如果画布不可见则填null)"
+}
+只返回JSON,不要其他文字。"""
+
+def main():
+    input_dir = "input"
+    
+    # 各维度输出目录
+    dirs = {
+        "person_appearance": "output/features/person_appearance",
+        "composition_layout": "output/features/composition_layout",
+        "lighting_atmosphere": "output/features/lighting_atmosphere",
+        "painting_content": "output/features/painting_content",
+    }
+    
+    # 段落对应关系
+    segment_maps = {
+        "person_appearance": {
+            "img_1": "段落1.1", "img_2": "段落2.1", "img_3": "段落3.1",
+            "img_4": "段落4.1", "img_5": "段落5.1", "img_6": "段落6.1",
+            "img_7": "段落7.1", "img_8": "段落8.1", "img_9": "段落9.1"
+        },
+        "composition_layout": {
+            "img_1": "段落1", "img_2": "段落2", "img_3": "段落3",
+            "img_4": "段落4", "img_5": "段落5", "img_6": "段落6",
+            "img_7": "段落7", "img_8": "段落8", "img_9": "段落9"
+        },
+        "lighting_atmosphere": {
+            "img_1": "段落1", "img_2": "段落2", "img_3": "段落3",
+            "img_4": "段落4", "img_5": "段落5", "img_6": "段落6",
+            "img_7": "段落7", "img_8": "段落8", "img_9": "段落9"
+        },
+        "painting_content": {
+            "img_1": "段落1.2.1", "img_2": "段落2.2.1", "img_3": "段落3.2.1",
+            "img_4": "段落4.2.1", "img_5": "段落5.2", "img_6": "段落6.2.1",
+            "img_7": None,  # img_7无画架
+            "img_8": "段落8.2.1", "img_9": "段落9.2.1"
+        }
+    }
+    
+    # 高亮聚类对应
+    highlight_clusters = {
+        "person_appearance": "cluster_1",
+        "composition_layout": "cluster_6",
+        "lighting_atmosphere": "cluster_4",
+        "painting_content": "cluster_5"
+    }
+    
+    # 特征描述
+    feature_names = {
+        "person_appearance": "女性人物外观(服装/发型/配饰)",
+        "composition_layout": "场景构图(景别/角度/视线引导)",
+        "lighting_atmosphere": "光影氛围(逆光/散景/梦幻)",
+        "painting_content": "画中画内容(画布上的油画)"
+    }
+    
+    prompts = {
+        "person_appearance": PERSON_APPEARANCE_PROMPT,
+        "composition_layout": COMPOSITION_PROMPT,
+        "lighting_atmosphere": LIGHTING_PROMPT,
+        "painting_content": PAINTING_CONTENT_PROMPT,
+    }
+    
+    # 存储所有维度的mappings
+    all_mappings = {dim: [] for dim in dirs.keys()}
+    
+    for i in range(1, 10):
+        img_id = f"img_{i}"
+        image_path = os.path.join(input_dir, f"{img_id}.jpg")
+        
+        if not os.path.exists(image_path):
+            continue
+        
+        print(f"\n处理 {img_id}...")
+        
+        for dim, prompt in prompts.items():
+            # 检查是否需要处理(如img_7无画架)
+            seg = segment_maps[dim].get(img_id)
+            
+            print(f"  提取 {dim}...", end=" ")
+            
+            try:
+                response = query_vlm(image_path, prompt)
+                data = extract_json_from_response(response)
+                
+                # 保存JSON
+                json_path = os.path.join(dirs[dim], f"{img_id}_{dim}.json")
+                with open(json_path, 'w', encoding='utf-8') as f:
+                    json.dump({
+                        "image_id": img_id,
+                        "dimension": dim,
+                        "segment": seg,
+                        "data": data
+                    }, f, ensure_ascii=False, indent=2)
+                
+                print(f"✓")
+                
+                # 添加到mappings
+                mapping_entry = {
+                    "file": f"{img_id}_{dim}.json",
+                    "source_image": f"input/{img_id}.jpg",
+                    "segment": seg or "N/A",
+                    "category": "实质" if dim == "person_appearance" else "形式",
+                    "feature": feature_names[dim],
+                    "highlight_cluster": highlight_clusters[dim]
+                }
+                if dim == "person_appearance":
+                    mapping_entry["element_id"] = "元素1"
+                elif dim == "painting_content":
+                    mapping_entry["element_id"] = "元素5"
+                
+                all_mappings[dim].append(mapping_entry)
+                
+                time.sleep(0.5)  # 避免API限流
+                
+            except Exception as e:
+                print(f"✗ 错误: {e}")
+    
+    # 保存各维度的mapping.json
+    dim_descriptions = {
+        "person_appearance": "人物外观语义描述,使用VLM分析服装/发型/配饰/气质",
+        "composition_layout": "场景构图描述,使用VLM分析景别/角度/视线引导/空间布局",
+        "lighting_atmosphere": "光影氛围描述,使用VLM分析光照类型/散景/色温/整体氛围",
+        "painting_content": "画中画内容描述,使用VLM分析画布上的油画内容及叙事关系"
+    }
+    
+    for dim, mappings in all_mappings.items():
+        mapping = {
+            "dimension": dim,
+            "description": dim_descriptions[dim],
+            "tool": "Google Gemini 2.0 Flash (via OpenRouter)",
+            "format": {
+                "json": "JSON,包含结构化语义描述和生成提示词"
+            },
+            "mappings": mappings
+        }
+        
+        mapping_path = os.path.join(dirs[dim], "mapping.json")
+        with open(mapping_path, 'w', encoding='utf-8') as f:
+            json.dump(mapping, f, ensure_ascii=False, indent=2)
+    
+    print("\n✓ VLM语义特征提取完成")
+
+if __name__ == "__main__":
+    main()