刘立冬 3 tygodni temu
rodzic
commit
4fb5789ba0
2 zmienionych plików z 603 dodań i 9 usunięć
  1. 2 2
      run_enhanced_search.py
  2. 601 7
      visualize_stage78_with_deconstruction.py

+ 2 - 2
run_enhanced_search.py

@@ -25,7 +25,7 @@ logger = logging.getLogger(__name__)
 def check_files_exist():
     """检查必需文件是否存在"""
     required_files = [
-        '69114f150000000007001f30_how copy.json',
+        '690d977d0000000007036331_how.json',
         'dimension_associations_analysis.json',
         'optimized_clustered_data_gemini-3-pro-preview.json'
     ]
@@ -81,7 +81,7 @@ def main():
     logger.info("初始化搜索系统...")
     try:
         system = EnhancedSearchV2(
-            how_json_path='69114f150000000007001f30_how copy.json',
+            how_json_path='690d977d0000000007036331_how.json',
             dimension_associations_path='dimension_associations_analysis.json',
             optimized_clustered_data_path='optimized_clustered_data_gemini-3-pro-preview.json',
             output_dir='output_v2'

+ 601 - 7
visualize_stage78_with_deconstruction.py

@@ -47,6 +47,361 @@ def load_stage8_data(json_path: str) -> Dict[str, Any]:
     return mapping
 
 
+def load_persona_library(json_path: str) -> Dict[str, Any]:
+    """加载人设特征库"""
+    with open(json_path, 'r', encoding='utf-8') as f:
+        return json.load(f)
+
+
+def is_category_or_feature(persona_name: str, persona_data: Dict[str, Any]) -> str:
+    """
+    递归判断人设项是特征还是分类
+
+    Args:
+        persona_name: 人设项名称
+        persona_data: 人设库数据
+
+    Returns:
+        'feature' 或 'category'
+    """
+    def search_in_dict(data, name, path=""):
+        """递归搜索字典"""
+        if isinstance(data, dict):
+            # 检查是否有特征列表
+            if '特征列表' in data:
+                for feature in data['特征列表']:
+                    if feature.get('特征名称') == name:
+                        return 'feature'
+
+            # 递归搜索子节点
+            for key, value in data.items():
+                if key == '_meta':
+                    continue
+
+                # 如果键名匹配,且值是字典(说明是分类节点)
+                if key == name and isinstance(value, dict):
+                    return 'category'
+
+                # 递归搜索
+                result = search_in_dict(value, name, f"{path}/{key}")
+                if result:
+                    return result
+
+        return None
+
+    # 在三个主要维度中搜索
+    for dimension in ['灵感点列表', '目的点列表', '关键点列表']:
+        if dimension in persona_data:
+            result = search_in_dict(persona_data[dimension], persona_name)
+            if result:
+                return result
+
+    # 默认返回特征
+    return 'feature'
+
+
+def extract_relationship_data(how_json_path: str, persona_data: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """
+    从how解构结果中提取关系数据
+
+    Returns:
+        List of relationship data with format:
+        {
+            'post_feature': str,  # 帖子特征名称
+            'dimension': str,     # 所属维度(灵感点/目的点/关键点)
+            'weight': float,      # 权重
+            'persona_item': str,  # 人设特征/分类名称
+            'similarity': float,  # 相似度
+            'item_type': str,     # 'feature' or 'category'
+        }
+    """
+    with open(how_json_path, 'r', encoding='utf-8') as f:
+        how_data = json.load(f)
+
+    relationships = []
+
+    # 遍历how解构结果
+    for dimension_key in ['灵感点列表', '目的点列表', '关键点列表']:
+        dimension_list = how_data.get('how解构结果', {}).get(dimension_key, [])
+        dimension_name = dimension_key.replace('列表', '')
+
+        for point in dimension_list:
+            features = point.get('特征列表', [])
+
+            for feature in features:
+                feature_name = feature.get('特征名称', '')
+                weight = feature.get('权重', 0)
+
+                # 查找匹配结果中相似度最高的
+                how_steps = point.get('how步骤列表', [])
+                max_match = None
+                max_similarity = -1
+
+                for step in how_steps:
+                    step_features = step.get('特征列表', [])
+                    for step_feature in step_features:
+                        if step_feature.get('特征名称') == feature_name:
+                            matches = step_feature.get('匹配结果', [])
+                            for match in matches:
+                                similarity = match.get('匹配结果', {}).get('相似度', 0)
+                                if similarity > max_similarity:
+                                    max_similarity = similarity
+                                    max_match = match
+
+                # 如果找到匹配
+                if max_match:
+                    persona_name = max_match.get('人设特征名称', '')
+                    item_type = is_category_or_feature(persona_name, persona_data)
+
+                    relationships.append({
+                        'post_feature': feature_name,
+                        'dimension': dimension_name,
+                        'weight': weight,
+                        'persona_item': persona_name,
+                        'similarity': max_similarity,
+                        'item_type': item_type
+                    })
+
+    return relationships
+
+
+def generate_relationship_graph_html(relationships: List[Dict[str, Any]]) -> str:
+    """
+    生成关系图的SVG HTML代码
+
+    Args:
+        relationships: extract_relationship_data返回的关系数据列表
+
+    Returns:
+        HTML字符串(包含SVG)
+    """
+    if not relationships:
+        return '<div style="padding: 40px; text-align: center; color: #6b7280;">暂无关系数据</div>'
+
+    # 提取唯一的人设项和帖子特征
+    persona_items = list({(r['persona_item'], r['item_type'], r['similarity']) for r in relationships})
+
+    # 为每个帖子特征关联最高相似度
+    post_feature_map = {}
+    for r in relationships:
+        feature = r['post_feature']
+        if feature not in post_feature_map or r['similarity'] > post_feature_map[feature][3]:
+            post_feature_map[feature] = (feature, r['dimension'], r['weight'], r['similarity'])
+
+    post_features = list(post_feature_map.values())
+
+    # 排序:都按相似度降序
+    persona_items.sort(key=lambda x: x[2], reverse=True)  # 按相似度降序
+    post_features.sort(key=lambda x: x[3], reverse=True)  # 按相似度降序
+
+    # 布局参数
+    node_spacing = 60  # 节点间距
+    node_radius = 20   # 圆形半径
+    node_size = 40     # 方形大小
+    left_margin = 250  # 左边距
+    right_margin = 250 # 右边距
+    middle_space = 400 # 中间空间
+    top_margin = 50    # 顶部边距
+
+    # 计算SVG尺寸
+    max_nodes = max(len(persona_items), len(post_features))
+    svg_height = max_nodes * node_spacing + top_margin * 2
+    svg_width = left_margin + middle_space + right_margin
+
+    # 辅助函数:获取颜色
+    def get_color(similarity):
+        if similarity > 0.8:
+            return '#10b981'  # 绿色
+        elif similarity > 0.4:
+            return '#f59e0b'  # 黄色
+        else:
+            return '#ef4444'  # 红色
+
+    # 辅助函数:获取线条样式
+    def get_stroke_dasharray(similarity):
+        if similarity > 0.8:
+            return 'none'  # 实线
+        else:
+            return '5,5'  # 虚线
+
+    # 创建节点位置映射
+    persona_positions = {}
+    post_positions = {}
+
+    for idx, (item, item_type, similarity) in enumerate(persona_items):
+        y = top_margin + idx * node_spacing
+        persona_positions[item] = {
+            'x': left_margin,
+            'y': y,
+            'type': item_type,
+            'similarity': similarity,
+            'idx': idx  # 添加索引用于生成ID
+        }
+
+    for idx, (feature, dimension, weight, similarity) in enumerate(post_features):
+        y = top_margin + idx * node_spacing
+        post_positions[feature] = {
+            'x': left_margin + middle_space,
+            'y': y,
+            'dimension': dimension,
+            'weight': weight,
+            'similarity': similarity,
+            'idx': idx  # 添加索引用于生成ID
+        }
+
+    # 开始生成SVG
+    svg_parts = [f'''
+    <div class="relationship-graph-container">
+        <svg width="{svg_width}" height="{svg_height}" xmlns="http://www.w3.org/2000/svg">
+            <!-- 定义箭头标记 -->
+            <defs>
+                <marker id="arrowhead-green" markerWidth="10" markerHeight="10" refX="9" refY="3" orient="auto">
+                    <polygon points="0 0, 10 3, 0 6" fill="#10b981" />
+                </marker>
+                <marker id="arrowhead-yellow" markerWidth="10" markerHeight="10" refX="9" refY="3" orient="auto">
+                    <polygon points="0 0, 10 3, 0 6" fill="#f59e0b" />
+                </marker>
+                <marker id="arrowhead-red" markerWidth="10" markerHeight="10" refX="9" refY="3" orient="auto">
+                    <polygon points="0 0, 10 3, 0 6" fill="#ef4444" />
+                </marker>
+            </defs>
+
+            <!-- 连接线 -->
+            <g class="connections">
+    ''']
+
+    # 绘制连接线
+    line_idx = 0  # 计数器用于标签错开
+    for rel in relationships:
+        persona_pos = persona_positions.get(rel['persona_item'])
+        post_pos = post_positions.get(rel['post_feature'])
+
+        if persona_pos and post_pos:
+            x1 = persona_pos['x'] + (node_size if persona_pos['type'] == 'category' else node_radius)
+            y1 = persona_pos['y']
+            x2 = post_pos['x'] - node_radius
+            y2 = post_pos['y']
+
+            # 贝塞尔曲线控制点
+            cx1 = x1 + (x2 - x1) * 0.3
+            cx2 = x1 + (x2 - x1) * 0.7
+
+            color = get_color(rel['similarity'])
+            dasharray = get_stroke_dasharray(rel['similarity'])
+
+            # 生成节点ID
+            persona_id = f"persona-{persona_pos['idx']}"
+            post_id = f"post-{post_pos['idx']}"
+
+            # 标签位置:左右错开
+            if line_idx % 2 == 0:
+                label_x = x1 + (x2 - x1) * 0.35  # 左侧位置
+            else:
+                label_x = x1 + (x2 - x1) * 0.65  # 右侧位置
+            line_idx += 1
+
+            svg_parts.append(f'''
+                <path class="connection-line"
+                      data-from="{persona_id}"
+                      data-to="{post_id}"
+                      d="M {x1},{y1} C {cx1},{y1} {cx2},{y2} {x2},{y2}"
+                      stroke="{color}"
+                      stroke-width="2"
+                      stroke-dasharray="{dasharray}"
+                      fill="none"
+                      opacity="0.7" />
+
+                <!-- 相似度标签 -->
+                <text x="{label_x}" y="{(y1 + y2) / 2 - 5}"
+                      font-size="12"
+                      fill="{color}"
+                      text-anchor="middle"
+                      font-weight="600"
+                      pointer-events="none">
+                    {rel['similarity']:.2f}
+                </text>
+            ''')
+
+    svg_parts.append('</g>\n\n<!-- 左侧:人设特征/分类节点 -->\n<g class="persona-nodes">')
+
+    # 绘制左侧人设节点
+    for item, pos in persona_positions.items():
+        color = get_color(pos['similarity'])
+        node_id = f"persona-{pos['idx']}"
+
+        if pos['type'] == 'category':
+            # 方形
+            half_size = node_size / 2
+            svg_parts.append(f'''
+                <g class="clickable" data-node-id="{node_id}" onclick="handleNodeClick('{node_id}', 'persona')">
+                    <rect x="{pos['x'] - half_size}" y="{pos['y'] - half_size}"
+                          width="{node_size}" height="{node_size}"
+                          fill="{color}"
+                          stroke="white"
+                          stroke-width="2"
+                          rx="4" />
+                    <text x="{pos['x'] - half_size - 10}" y="{pos['y'] + 5}"
+                          font-size="14"
+                          fill="#374151"
+                          text-anchor="end"
+                          font-weight="500"
+                          pointer-events="none">⬜ {item}</text>
+                </g>
+            ''')
+        else:
+            # 圆形
+            svg_parts.append(f'''
+                <g class="clickable" data-node-id="{node_id}" onclick="handleNodeClick('{node_id}', 'persona')">
+                    <circle cx="{pos['x']}" cy="{pos['y']}"
+                            r="{node_radius}"
+                            fill="{color}"
+                            stroke="white"
+                            stroke-width="2" />
+                    <text x="{pos['x'] - node_radius - 10}" y="{pos['y'] + 5}"
+                          font-size="14"
+                          fill="#374151"
+                          text-anchor="end"
+                          font-weight="500"
+                          pointer-events="none">⚪ {item}</text>
+                </g>
+            ''')
+
+    svg_parts.append('</g>\n\n<!-- 右侧:帖子特征节点 -->\n<g class="post-nodes">')
+
+    # 绘制右侧帖子节点
+    for feature, pos in post_positions.items():
+        # 帖子特征都是圆形,蓝色
+        node_id = f"post-{pos['idx']}"
+        svg_parts.append(f'''
+            <g class="clickable" data-node-id="{node_id}" onclick="handleNodeClick('{node_id}', 'post')">
+                <circle cx="{pos['x']}" cy="{pos['y']}"
+                        r="{node_radius}"
+                        fill="#3b82f6"
+                        stroke="white"
+                        stroke-width="2" />
+                <text x="{pos['x'] + node_radius + 10}" y="{pos['y'] + 5}"
+                      font-size="14"
+                      fill="#374151"
+                      text-anchor="start"
+                      font-weight="500"
+                      pointer-events="none">📝 {feature}</text>
+                <text x="{pos['x'] + node_radius + 10}" y="{pos['y'] + 20}"
+                      font-size="11"
+                      fill="#6b7280"
+                      text-anchor="start"
+                      pointer-events="none">({pos['dimension']} · 权重{pos['weight']:.1f})</text>
+            </g>
+        ''')
+
+    svg_parts.append('''
+            </g>
+        </svg>
+    </div>
+    ''')
+
+    return ''.join(svg_parts)
+
+
 def calculate_statistics(data: List[Dict[str, Any]]) -> Dict[str, Any]:
     """计算统计数据(包括评估结果)"""
     total_features = len(data)
@@ -128,6 +483,7 @@ def calculate_statistics(data: List[Dict[str, Any]]) -> Dict[str, Any]:
 
 def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
                   stage7_mapping: Dict[str, Any], stage8_mapping: Dict[str, Any],
+                  relationship_graph_html: str,
                   output_path: str):
     """生成HTML可视化页面"""
 
@@ -1685,6 +2041,124 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
         .hidden {{
             display: none !important;
         }}
+
+        /* 关系图样式 */
+        .relationship-section {{
+            margin: 20px;
+            padding: 20px;
+            background: white;
+            border-radius: 8px;
+            box-shadow: 0 1px 3px rgba(0,0,0,0.1);
+        }}
+
+        .relationship-section .section-header {{
+            font-size: 18px;
+            font-weight: 600;
+            color: #1f2937;
+            margin-bottom: 20px;
+            padding-bottom: 10px;
+            border-bottom: 2px solid #e5e7eb;
+        }}
+
+        .relationship-graph-container {{
+            display: flex;
+            justify-content: center;
+            align-items: center;
+            padding: 20px;
+            background: #f9fafb;
+            border-radius: 8px;
+            overflow-x: auto;
+        }}
+
+        .relationship-graph-container svg {{
+            max-width: 100%;
+            height: auto;
+        }}
+
+        /* Tab导航样式 */
+        .tab-navigation {{
+            display: flex;
+            gap: 4px;
+            background: white;
+            padding: 10px 20px 0;
+            margin: 0 20px;
+            border-bottom: 2px solid #e5e7eb;
+        }}
+
+        .tab-button {{
+            padding: 12px 24px;
+            background: none;
+            border: none;
+            border-bottom: 3px solid transparent;
+            cursor: pointer;
+            font-size: 15px;
+            font-weight: 500;
+            color: #6b7280;
+            transition: all 0.2s;
+            position: relative;
+            top: 2px;
+        }}
+
+        .tab-button:hover {{
+            color: #374151;
+            background: #f9fafb;
+            border-radius: 6px 6px 0 0;
+        }}
+
+        .tab-button.active {{
+            color: #2563eb;
+            border-bottom-color: #2563eb;
+            font-weight: 600;
+        }}
+
+        /* Tab内容区域 */
+        .tab-content {{
+            min-height: 600px;
+        }}
+
+        .tab-pane {{
+            display: none;
+        }}
+
+        .tab-pane.active {{
+            display: block;
+        }}
+
+        /* 交互高亮样式 */
+        .clickable {{
+            cursor: pointer;
+            transition: opacity 0.3s ease, filter 0.3s ease;
+        }}
+
+        .clickable:hover {{
+            filter: brightness(1.15);
+        }}
+
+        .highlighted {{
+            filter: drop-shadow(0 0 8px currentColor);
+        }}
+
+        .highlighted circle,
+        .highlighted rect {{
+            stroke-width: 3;
+        }}
+
+        .dimmed {{
+            opacity: 0.15;
+        }}
+
+        .connection-line {{
+            transition: opacity 0.3s ease, stroke-width 0.3s ease;
+        }}
+
+        .connection-line.highlighted {{
+            stroke-width: 4;
+            opacity: 1 !important;
+        }}
+
+        .connection-line.dimmed {{
+            opacity: 0.1 !important;
+        }}
     </style>
 </head>
 <body>
@@ -1764,13 +2238,33 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
         </div>
     </div>
 
-    <!-- 主容器 -->
-    <div class="main-container">
-        <!-- 左侧导航 -->
-        <div class="left-sidebar" id="leftSidebar"></div>
+    <!-- Tab导航 -->
+    <div class="tab-navigation">
+        <button class="tab-button active" onclick="switchTab('search')">📋 搜索结果</button>
+        <button class="tab-button" onclick="switchTab('relationship')">📊 关系图</button>
+    </div>
+
+    <!-- Tab内容区域 -->
+    <div class="tab-content">
+        <!-- Tab1: 搜索结果 -->
+        <div class="tab-pane active" id="tab-search">
+            <!-- 主容器 -->
+            <div class="main-container">
+                <!-- 左侧导航 -->
+                <div class="left-sidebar" id="leftSidebar"></div>
+
+                <!-- 右侧结果区 -->
+                <div class="right-content" id="rightContent"></div>
+            </div>
+        </div>
 
-        <!-- 右侧结果区 -->
-        <div class="right-content" id="rightContent"></div>
+        <!-- Tab2: 关系图 -->
+        <div class="tab-pane" id="tab-relationship">
+            <div class="relationship-section">
+                <div class="section-header">📊 帖子-人设关系图</div>
+                {relationship_graph_html}
+            </div>
+        </div>
     </div>
 
     <!-- 解构结果模态窗口 -->
@@ -1818,6 +2312,83 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
         const stage8Data = {stage8_json};
         let currentFilter = 'all';
 
+        // Tab切换功能
+        function switchTab(tabName) {{
+            // 更新Tab按钮状态
+            document.querySelectorAll('.tab-button').forEach(btn => {{
+                btn.classList.remove('active');
+            }});
+            event.target.classList.add('active');
+
+            // 更新Tab内容
+            document.querySelectorAll('.tab-pane').forEach(pane => {{
+                pane.classList.remove('active');
+            }});
+            document.getElementById('tab-' + tabName).classList.add('active');
+        }}
+
+        // 关系图节点点击高亮功能
+        let selectedNodeId = null;
+
+        function handleNodeClick(nodeId, nodeType) {{
+            const svg = document.querySelector('.relationship-graph-container svg');
+            if (!svg) return;
+
+            // 如果点击的是已选中节点,取消高亮
+            if (selectedNodeId === nodeId) {{
+                clearHighlight();
+                selectedNodeId = null;
+                return;
+            }}
+
+            selectedNodeId = nodeId;
+
+            // 找到所有相关的连接线
+            const connections = svg.querySelectorAll('.connection-line');
+            const relatedNodes = new Set([nodeId]);
+
+            connections.forEach(conn => {{
+                const from = conn.getAttribute('data-from');
+                const to = conn.getAttribute('data-to');
+
+                if (from === nodeId || to === nodeId) {{
+                    // 这是相关的连接线
+                    conn.classList.add('highlighted');
+                    conn.classList.remove('dimmed');
+
+                    // 收集相关节点
+                    relatedNodes.add(from);
+                    relatedNodes.add(to);
+                }} else {{
+                    // 非相关连接线变暗
+                    conn.classList.add('dimmed');
+                    conn.classList.remove('highlighted');
+                }}
+            }});
+
+            // 处理所有节点
+            const allNodes = svg.querySelectorAll('.clickable');
+            allNodes.forEach(node => {{
+                const nId = node.getAttribute('data-node-id');
+                if (relatedNodes.has(nId)) {{
+                    node.classList.add('highlighted');
+                    node.classList.remove('dimmed');
+                }} else {{
+                    node.classList.add('dimmed');
+                    node.classList.remove('highlighted');
+                }}
+            }});
+        }}
+
+        function clearHighlight() {{
+            const svg = document.querySelector('.relationship-graph-container svg');
+            if (!svg) return;
+
+            svg.querySelectorAll('.highlighted, .dimmed').forEach(el => {{
+                el.classList.remove('highlighted', 'dimmed');
+            }});
+        }}
+
         // 创建评估映射
         const noteEvaluations = {{}};
         data.forEach((feature, fIdx) => {{
@@ -2858,6 +3429,8 @@ def main():
     stage6_path = os.path.join(script_dir, 'output_v2', 'stage6_with_evaluations.json')
     stage7_path = os.path.join(script_dir, 'output_v2', 'stage7_with_deconstruction.json')
     stage8_path = os.path.join(script_dir, 'output_v2', 'stage8_similarity_scores.json')
+    persona_library_path = os.path.join(script_dir, 'optimized_clustered_data_gemini-3-pro-preview.json')
+    how_json_path = os.path.join(script_dir, '690d977d0000000007036331_how.json')
 
     output_dir = os.path.join(script_dir, 'visualization')
     os.makedirs(output_dir, exist_ok=True)
@@ -2885,8 +3458,29 @@ def main():
     print(f"  - 帖子总数: {stats['total_notes']}")
     print(f"  - 完全匹配: {stats['match_complete']} ({stats['complete_rate']}%)")
 
+    # 生成关系图
+    print(f"\n📊 生成帖子-人设关系图...")
+    relationship_graph_html = ""
+    try:
+        print(f"📖 加载人设库: {persona_library_path}")
+        persona_data = load_persona_library(persona_library_path)
+        print(f"✓ 加载人设库完成")
+
+        print(f"📖 提取关系数据: {how_json_path}")
+        relationships = extract_relationship_data(how_json_path, persona_data)
+        print(f"✓ 提取了 {len(relationships)} 个关系")
+
+        relationship_graph_html = generate_relationship_graph_html(relationships)
+        print(f"✓ 关系图生成完成")
+    except FileNotFoundError as e:
+        print(f"⚠️ 警告: 无法生成关系图 - {e}")
+        relationship_graph_html = '<div style="padding: 40px; text-align: center; color: #ef4444;">关系图数据文件未找到</div>'
+    except Exception as e:
+        print(f"⚠️ 警告: 生成关系图时出错 - {e}")
+        relationship_graph_html = f'<div style="padding: 40px; text-align: center; color: #ef4444;">关系图生成失败: {e}</div>'
+
     print(f"\n🎨 生成可视化页面...")
-    generate_html(data, stats, stage7_mapping, stage8_mapping, output_path)
+    generate_html(data, stats, stage7_mapping, stage8_mapping, relationship_graph_html, output_path)
     print(f"✓ 生成完成: {output_path}")
 
     print(f"\n🌐 在浏览器中打开查看:")