刘立冬 2 meses atrás
pai
commit
7041ab7d75
2 arquivos alterados com 699 adições e 11 exclusões
  1. 322 10
      extract_topn_multimodal.py
  2. 377 1
      visualization/knowledge_search_traverse/index.js

+ 322 - 10
extract_topn_multimodal.py

@@ -1,15 +1,17 @@
 """
-从 run_context_v3.json 中提取 topN 帖子并进行多模态解析
+从 run_context_v3.json 中提取 topN 帖子并进行多模态解析和清洗
 
 功能:
 1. 读取 run_context_v3.json
 2. 提取所有帖子,按 final_score 排序,取 topN
 3. 使用 multimodal_extractor 进行图片内容解析
-4. 保存结果到独立的 JSON 文件
+4. 自动进行数据清洗和结构化
+5. 输出清洗后的 JSON 文件(默认不保留原始文件)
 
 参数化配置:
 - top_n: 提取前N个帖子(默认10)
 - max_concurrent: 最大并发数(默认5)
+- keep_raw: 是否保留原始提取结果(默认False)
 """
 
 import argparse
@@ -19,12 +21,283 @@ import os
 import sys
 from pathlib import Path
 from typing import Optional
+import requests
 
 # 导入必要的模块
 from knowledge_search_traverse import Post
 from multimodal_extractor import extract_all_posts
 
 
+# ============================================================================
+# 清洗模块 - 整合自 clean_multimodal_data.py
+# ============================================================================
+
+MODEL_NAME = "google/gemini-2.5-flash"
+API_TIMEOUT = 60  # API 超时时间(秒)
+
+CLEAN_TEXT_PROMPT = """
+请清洗以下图片文本,要求:
+
+1. 去除品牌标识和装饰性文字(如"Blank Plan 计划留白"、"品牌诊断|战略定位|创意内容|VI设计|爆品传播"等)
+2. 去除多余换行符,整理成连贯文本
+3. **完整保留所有核心内容**,不要概括或删减
+4. 保持原文表达和语气
+5. 将内容整理成流畅的段落
+
+图片文本:
+{extract_text}
+
+请直接输出清洗后的文本(纯文本,不要任何格式标记)。
+"""
+
+
+async def call_llm_for_text_cleaning(extract_text: str) -> str:
+    """
+    调用LLM清洗文本
+
+    Args:
+        extract_text: 原始图片文本
+
+    Returns:
+        清洗后的文本
+    """
+    # 获取API密钥
+    api_key = os.getenv("OPENROUTER_API_KEY")
+    if not api_key:
+        raise ValueError("OPENROUTER_API_KEY environment variable not set")
+
+    # 构建prompt
+    prompt = CLEAN_TEXT_PROMPT.format(extract_text=extract_text)
+
+    # 构建API请求
+    payload = {
+        "model": MODEL_NAME,
+        "messages": [
+            {
+                "role": "user",
+                "content": prompt
+            }
+        ]
+    }
+
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+
+    # 在异步上下文中执行同步请求
+    loop = asyncio.get_event_loop()
+    response = await loop.run_in_executor(
+        None,
+        lambda: requests.post(
+            "https://openrouter.ai/api/v1/chat/completions",
+            headers=headers,
+            json=payload,
+            timeout=API_TIMEOUT
+        )
+    )
+
+    # 检查响应
+    if response.status_code != 200:
+        raise Exception(f"OpenRouter API error: {response.status_code} - {response.text[:200]}")
+
+    # 解析响应
+    result = response.json()
+    cleaned_text = result["choices"][0]["message"]["content"].strip()
+
+    return cleaned_text
+
+
+async def clean_single_image_text(
+    extract_text: str,
+    semaphore: Optional[asyncio.Semaphore] = None
+) -> str:
+    """
+    清洗单张图片的文本
+
+    Args:
+        extract_text: 原始文本
+        semaphore: 并发控制信号量
+
+    Returns:
+        清洗后的文本
+    """
+    try:
+        if semaphore:
+            async with semaphore:
+                cleaned = await call_llm_for_text_cleaning(extract_text)
+        else:
+            cleaned = await call_llm_for_text_cleaning(extract_text)
+
+        return cleaned
+
+    except Exception as e:
+        print(f"      ⚠️  清洗失败,保留原文: {str(e)[:100]}")
+        # 如果清洗失败,返回简单清理的版本(去换行)
+        return extract_text.replace('\n', ' ').strip()
+
+
+async def structure_post_content(
+    post: dict,
+    max_concurrent: int = 5
+) -> dict:
+    """
+    结构化整理单个帖子的内容
+
+    Args:
+        post: 帖子数据(包含images列表)
+        max_concurrent: 最大并发数
+
+    Returns:
+        添加了 content_structured 字段的帖子数据
+    """
+    images = post.get('images', [])
+
+    if not images:
+        # 如果没有图片,直接返回
+        post['content_structured'] = {
+            "total_images": 0,
+            "points": [],
+            "formatted_text": ""
+        }
+        return post
+
+    print(f"      🧹 清洗帖子: {post.get('note_id')} ({len(images)}张图片)")
+
+    # 创建信号量控制并发
+    semaphore = asyncio.Semaphore(max_concurrent)
+
+    # 并发清洗所有图片的文本
+    tasks = []
+    for img in images:
+        extract_text = img.get('extract_text', '')
+        if extract_text:
+            task = clean_single_image_text(extract_text, semaphore)
+        else:
+            # 如果原始文本为空,直接返回空字符串
+            task = asyncio.sleep(0, result='')
+        tasks.append(task)
+
+    cleaned_texts = await asyncio.gather(*tasks)
+
+    # 构建结构化points
+    points = []
+    for idx, (img, cleaned_text) in enumerate(zip(images, cleaned_texts)):
+        # 保存清洗后的文本到图片信息中
+        img['extract_text_cleaned'] = cleaned_text
+
+        # 添加到points(如果清洗后文本不为空)
+        if cleaned_text:
+            points.append({
+                "index": idx + 1,
+                "source_image": idx,
+                "content": cleaned_text
+            })
+
+    # 生成格式化文本
+    formatted_text = "\n".join([
+        f"{p['index']}. {p['content']}"
+        for p in points
+    ])
+
+    # 构建content_structured
+    post['content_structured'] = {
+        "total_images": len(images),
+        "points": points,
+        "formatted_text": formatted_text
+    }
+
+    print(f"      ✅ 清洗完成: {post.get('note_id')}")
+
+    return post
+
+
+async def clean_all_posts(
+    posts: list[dict],
+    max_concurrent: int = 5
+) -> list[dict]:
+    """
+    批量清洗所有帖子
+
+    Args:
+        posts: 帖子列表
+        max_concurrent: 最大并发数
+
+    Returns:
+        清洗后的帖子列表
+    """
+    print(f"\n   开始清洗 {len(posts)} 个帖子...")
+
+    # 顺序处理每个帖子(但每个帖子内部的图片是并发处理的)
+    cleaned_posts = []
+    for post in posts:
+        cleaned_post = await structure_post_content(post, max_concurrent)
+        cleaned_posts.append(cleaned_post)
+
+    print(f"   清洗完成: {len(cleaned_posts)} 个帖子")
+
+    return cleaned_posts
+
+
+async def clean_and_merge_to_context(
+    context_file_path: str,
+    extraction_file_path: str,
+    max_concurrent: int = 5
+) -> list[dict]:
+    """
+    清洗数据并合并到 run_context_v3.json
+
+    Args:
+        context_file_path: run_context_v3.json 文件路径
+        extraction_file_path: 临时提取结果文件路径
+        max_concurrent: 最大并发数
+
+    Returns:
+        清洗后的帖子列表
+    """
+    # 步骤1: 加载临时提取数据
+    print(f"\n   📂 加载临时提取数据: {extraction_file_path}")
+    with open(extraction_file_path, 'r', encoding='utf-8') as f:
+        extraction_data = json.load(f)
+
+    posts = extraction_data.get('extraction_results', [])
+
+    if not posts:
+        print("   ⚠️  没有找到需要清洗的帖子")
+        return []
+
+    # 步骤2: LLM清洗所有帖子
+    cleaned_posts = await clean_all_posts(posts, max_concurrent)
+
+    # 步骤3: 读取 run_context_v3.json
+    print(f"\n   📂 读取 run_context: {context_file_path}")
+    with open(context_file_path, 'r', encoding='utf-8') as f:
+        context_data = json.load(f)
+
+    # 步骤4: 将清洗结果写入 multimodal_cleaned_posts 字段
+    from datetime import datetime
+    context_data['multimodal_cleaned_posts'] = {
+        'total_posts': len(cleaned_posts),
+        'posts': cleaned_posts,
+        'extraction_time': datetime.now().isoformat(),
+        'version': 'v1.0'
+    }
+
+    # 步骤5: 保存回 run_context_v3.json
+    print(f"\n   💾 保存回 run_context_v3.json...")
+    with open(context_file_path, 'w', encoding='utf-8') as f:
+        json.dump(context_data, f, ensure_ascii=False, indent=2)
+
+    print(f"   ✅ 清洗结果已写入 multimodal_cleaned_posts 字段")
+
+    return cleaned_posts
+
+
+# ============================================================================
+# 原有函数
+# ============================================================================
+
+
 def load_run_context(json_path: str) -> dict:
     """加载 run_context_v3.json 文件"""
     with open(json_path, 'r', encoding='utf-8') as f:
@@ -144,7 +417,7 @@ def save_extraction_results(results: dict, output_path: str, topn_posts: list[di
 
 
 async def main(context_file_path: str, output_file_path: str, top_n: int = 10,
-               max_concurrent: int = 5):
+               max_concurrent: int = 5, keep_raw: bool = False):
     """主函数
 
     Args:
@@ -152,6 +425,7 @@ async def main(context_file_path: str, output_file_path: str, top_n: int = 10,
         output_file_path: 输出文件路径
         top_n: 提取前N个帖子(默认10)
         max_concurrent: 最大并发数(默认5)
+        keep_raw: 是否保留原始提取结果文件(默认False)
     """
     print("=" * 80)
     print(f"多模态解析 - Top{top_n} 帖子")
@@ -194,9 +468,36 @@ async def main(context_file_path: str, output_file_path: str, top_n: int = 10,
         max_concurrent=max_concurrent
     )
 
-    # 6. 保存结果
-    print(f"\n💾 保存解析结果...")
-    save_extraction_results(extraction_results, output_file_path, topn_posts)
+    # 6. 保存原始提取结果到临时文件
+    print(f"\n💾 保存原始提取结果到临时文件...")
+    temp_output_path = output_file_path.replace('.json', '_temp_raw.json')
+    save_extraction_results(extraction_results, temp_output_path, topn_posts)
+
+    # 7. 数据清洗并写回到 run_context_v3.json
+    print(f"\n🧹 开始数据清洗并写回到 run_context...")
+    cleaned_posts = await clean_and_merge_to_context(
+        context_file_path,      # 写回到原始context文件
+        temp_output_path,       # 从临时文件读取
+        max_concurrent=max_concurrent
+    )
+
+    # 8. 可选:同时保存一份独立的清洗结果文件(方便查看)
+    if keep_raw:
+        output_data = {
+            'total_extracted': len(cleaned_posts),
+            'extraction_results': cleaned_posts
+        }
+        print(f"\n💾 保存独立清洗结果文件...")
+        with open(output_file_path, 'w', encoding='utf-8') as f:
+            json.dump(output_data, f, ensure_ascii=False, indent=2)
+        print(f"   ✅ 独立清洗结果已保存到: {output_file_path}")
+
+    # 9. 清理临时文件
+    if os.path.exists(temp_output_path):
+        os.remove(temp_output_path)
+        print(f"\n🗑️  已清理临时文件")
+
+    print(f"\n✅ 完成!清洗结果已写入 {context_file_path} 的 multimodal_cleaned_posts 字段")
 
     print("\n" + "=" * 80)
     print("✅ 处理完成!")
@@ -210,7 +511,7 @@ if __name__ == "__main__":
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog='''
 示例用法:
-  # 使用默认参数 (top10, 并发5)
+  # 使用默认参数 (top10, 并发5, 只输出清洗后结果)
   python3 extract_topn_multimodal.py
 
   # 提取前20个帖子
@@ -219,14 +520,17 @@ if __name__ == "__main__":
   # 自定义并发数
   python3 extract_topn_multimodal.py --top-n 15 --max-concurrent 10
 
+  # 保留原始提取结果(会生成 *_raw.json 文件)
+  python3 extract_topn_multimodal.py --keep-raw
+
   # 指定输入输出文件
   python3 extract_topn_multimodal.py -i input.json -o output.json --top-n 30
         '''
     )
 
     # 默认路径配置
-    DEFAULT_CONTEXT_FILE = "input/test_case/output/knowledge_search_traverse/20251114/005215_b1/run_context_v3.json"
-    DEFAULT_OUTPUT_FILE = "input/test_case/output/knowledge_search_traverse/20251114/005215_b1/multimodal_extraction_topn.json"
+    DEFAULT_CONTEXT_FILE = "input/test_case/output/knowledge_search_traverse/20251118/194351_e3/run_context_v3.json"
+    DEFAULT_OUTPUT_FILE = "input/test_case/output/knowledge_search_traverse/20251118/194351_e3/multimodal_extraction_topn_cleaned.json"
 
     # 添加参数
     parser.add_argument(
@@ -255,6 +559,12 @@ if __name__ == "__main__":
         default=5,
         help='最大并发数 (默认: 5)'
     )
+    parser.add_argument(
+        '--keep-raw',
+        dest='keep_raw',
+        action='store_true',
+        help='保留原始提取结果文件(默认只保留清洗后的结果)'
+    )
 
     # 解析参数
     args = parser.parse_args()
@@ -270,6 +580,7 @@ if __name__ == "__main__":
     print(f"   输出文件: {args.output_file}")
     print(f"   提取数量: Top{args.top_n}")
     print(f"   最大并发: {args.max_concurrent}")
+    print(f"   保留原始: {'是' if args.keep_raw else '否'}")
     print()
 
     # 运行主函数
@@ -277,5 +588,6 @@ if __name__ == "__main__":
         args.context_file,
         args.output_file,
         args.top_n,
-        args.max_concurrent
+        args.max_concurrent,
+        keep_raw=args.keep_raw
     ))

+ 377 - 1
visualization/knowledge_search_traverse/index.js

@@ -20,6 +20,11 @@ const useSimplified = args.includes('--simplified');
 // 读取输入数据
 const inputData = JSON.parse(fs.readFileSync(inputFile, 'utf-8'));
 
+// 提取清洗后的数据
+const cleanedData = inputData.multimodal_cleaned_posts || null;
+const cleanedPosts = cleanedData ? cleanedData.posts : [];
+console.log(`📊 发现清洗数据: ${cleanedPosts.length} 个帖子`);
+
 // 检测数据格式并转换
 let data;
 if (inputData.rounds && inputData.o) {
@@ -103,6 +108,7 @@ import {
 import '@xyflow/react/dist/style.css';
 
 const data = ${JSON.stringify(data, null, 2)};
+const cleanedPosts = ${JSON.stringify(cleanedPosts, null, 2)};
 
 // 根据节点类型获取边框颜色
 function getNodeTypeColor(type) {
@@ -1998,6 +2004,8 @@ function FlowContent() {
   const [focusedNodeId, setFocusedNodeId] = useState(null); // 单独聚焦的节点ID
   const [sidebarWidth, setSidebarWidth] = useState(400); // 左侧目录宽度
   const [isResizing, setIsResizing] = useState(false); // 是否正在拖拽调整宽度
+  const [activeTab, setActiveTab] = useState('directory'); // Tab切换: 'directory' | 'cleaned'
+  const [selectedPost, setSelectedPost] = useState(null); // 选中的清洗帖子
 
   // 拖拽调整侧边栏宽度的处理逻辑
   const handleMouseDown = useCallback(() => {
@@ -2856,7 +2864,7 @@ function FlowContent() {
         cursor: isResizing ? 'col-resize' : 'default',
         userSelect: isResizing ? 'none' : 'auto',
       }}>
-        {/* 左侧目录树 */}
+        {/* 左侧目录树/清洗结果 */}
         <div style={{
           width: \`\${sidebarWidth}px\`,
           background: 'white',
@@ -2865,6 +2873,51 @@ function FlowContent() {
           flexDirection: 'column',
           flexShrink: 0,
         }}>
+          {/* Tab切换 */}
+          <div style={{
+            display: 'flex',
+            borderBottom: '1px solid #e5e7eb',
+            background: '#f9fafb',
+          }}>
+            <button
+              onClick={() => setActiveTab('directory')}
+              style={{
+                flex: 1,
+                padding: '12px 16px',
+                border: 'none',
+                background: activeTab === 'directory' ? 'white' : 'transparent',
+                borderBottom: activeTab === 'directory' ? '2px solid #3b82f6' : '2px solid transparent',
+                color: activeTab === 'directory' ? '#111827' : '#6b7280',
+                fontWeight: activeTab === 'directory' ? '600' : '400',
+                fontSize: '14px',
+                cursor: 'pointer',
+                transition: 'all 0.2s',
+              }}
+            >
+              节点目录
+            </button>
+            <button
+              onClick={() => setActiveTab('cleaned')}
+              style={{
+                flex: 1,
+                padding: '12px 16px',
+                border: 'none',
+                background: activeTab === 'cleaned' ? 'white' : 'transparent',
+                borderBottom: activeTab === 'cleaned' ? '2px solid #3b82f6' : '2px solid transparent',
+                color: activeTab === 'cleaned' ? '#111827' : '#6b7280',
+                fontWeight: activeTab === 'cleaned' ? '600' : '400',
+                fontSize: '14px',
+                cursor: 'pointer',
+                transition: 'all 0.2s',
+              }}
+            >
+              排序&清洗结果 ({cleanedPosts.length})
+            </button>
+          </div>
+
+          {/* Tab内容区 */}
+          {activeTab === 'directory' && (
+          <>
           <div style={{
             padding: '12px 16px',
             borderBottom: '1px solid #e5e7eb',
@@ -2958,6 +3011,105 @@ function FlowContent() {
               {renderTree(treeRoots)}
             </div>
           </div>
+          </>
+          )}
+
+          {/* 清洗结果列表 */}
+          {activeTab === 'cleaned' && (
+            <div style={{
+              flex: 1,
+              overflowY: 'auto',
+              padding: '8px',
+            }}>
+              {cleanedPosts.length === 0 ? (
+                <div style={{
+                  padding: '20px',
+                  textAlign: 'center',
+                  color: '#9ca3af',
+                  fontSize: '14px',
+                }}>
+                  暂无清洗数据<br/>
+                  <span style={{ fontSize: '12px', marginTop: '8px', display: 'block' }}>
+                    请先运行 extract_topn_multimodal.py 脚本
+                  </span>
+                </div>
+              ) : (
+                <div>
+                  <div style={{
+                    padding: '8px 12px',
+                    marginBottom: '8px',
+                    background: '#f3f4f6',
+                    borderRadius: '6px',
+                    fontSize: '12px',
+                    color: '#6b7280',
+                  }}>
+                    共 {cleanedPosts.length} 个帖子
+                  </div>
+                  {cleanedPosts.map((post, index) => (
+                    <div
+                      key={post.note_id}
+                      onClick={() => setSelectedPost(post)}
+                      style={{
+                        padding: '12px',
+                        marginBottom: '8px',
+                        background: selectedPost?.note_id === post.note_id ? '#eff6ff' : 'white',
+                        border: \`1px solid \${selectedPost?.note_id === post.note_id ? '#3b82f6' : '#e5e7eb'}\`,
+                        borderRadius: '6px',
+                        cursor: 'pointer',
+                        transition: 'all 0.2s',
+                      }}
+                      onMouseEnter={(e) => {
+                        if (selectedPost?.note_id !== post.note_id) {
+                          e.currentTarget.style.background = '#f9fafb';
+                        }
+                      }}
+                      onMouseLeave={(e) => {
+                        if (selectedPost?.note_id !== post.note_id) {
+                          e.currentTarget.style.background = 'white';
+                        }
+                      }}
+                    >
+                      <div style={{ display: 'flex', alignItems: 'flex-start', gap: '8px' }}>
+                        <div style={{
+                          minWidth: '24px',
+                          height: '24px',
+                          borderRadius: '50%',
+                          background: '#3b82f6',
+                          color: 'white',
+                          display: 'flex',
+                          alignItems: 'center',
+                          justifyContent: 'center',
+                          fontSize: '12px',
+                          fontWeight: '600',
+                        }}>
+                          {index + 1}
+                        </div>
+                        <div style={{ flex: 1, minWidth: 0 }}>
+                          <div style={{
+                            fontSize: '13px',
+                            fontWeight: '500',
+                            color: '#111827',
+                            marginBottom: '4px',
+                            overflow: 'hidden',
+                            textOverflow: 'ellipsis',
+                            display: '-webkit-box',
+                            WebkitLineClamp: 2,
+                            WebkitBoxOrient: 'vertical',
+                          }}>
+                            {post.title}
+                          </div>
+                          <div style={{ display: 'flex', gap: '12px', fontSize: '11px', color: '#6b7280' }}>
+                            <span>得分: {post.final_score?.toFixed(1) || 'N/A'}</span>
+                            <span>图片: {post.content_structured?.total_images || 0}</span>
+                          </div>
+                        </div>
+                      </div>
+                    </div>
+                  ))}
+                </div>
+              )}
+            </div>
+          )}
         </div>
 
         {/* 可拖拽的分隔条 */}
@@ -3225,6 +3377,230 @@ function FlowContent() {
             <Controls style={{ bottom: '20px', left: 'auto', right: '20px' }} />
             <Background variant="dots" gap={20} size={1} color="#e5e7eb" />
           </ReactFlow>
+
+          {/* 清洗帖子详情卡片 */}
+          {selectedPost && (
+            <div
+              style={{
+                position: 'absolute',
+                top: 0,
+                right: 0,
+                bottom: 0,
+                left: 0,
+                background: 'rgba(0, 0, 0, 0.3)',
+                zIndex: 1000,
+                display: 'flex',
+                justifyContent: 'flex-end',
+              }}
+              onClick={() => setSelectedPost(null)}
+            >
+              <div
+                style={{
+                  width: '600px',
+                  maxWidth: '90%',
+                  background: '#fff5f5',
+                  boxShadow: '-4px 0 20px rgba(0, 0, 0, 0.15)',
+                  overflowY: 'auto',
+                  display: 'flex',
+                  flexDirection: 'column',
+                }}
+                onClick={(e) => e.stopPropagation()}
+              >
+                {/* 卡片头部 */}
+                <div style={{
+                  padding: '20px 24px',
+                  borderBottom: '2px solid #fecaca',
+                  background: 'white',
+                  position: 'sticky',
+                  top: 0,
+                  zIndex: 1,
+                }}>
+                  <div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'flex-start' }}>
+                    <h3 style={{
+                      margin: 0,
+                      fontSize: '18px',
+                      fontWeight: '600',
+                      color: '#111827',
+                      flex: 1,
+                      paddingRight: '16px',
+                      lineHeight: '1.4',
+                    }}>
+                      {selectedPost.title}
+                    </h3>
+                    <button
+                      onClick={() => setSelectedPost(null)}
+                      style={{
+                        fontSize: '24px',
+                        lineHeight: '24px',
+                        border: 'none',
+                        background: 'none',
+                        color: '#9ca3af',
+                        cursor: 'pointer',
+                        padding: '0',
+                        width: '24px',
+                        height: '24px',
+                        display: 'flex',
+                        alignItems: 'center',
+                        justifyContent: 'center',
+                        borderRadius: '4px',
+                        transition: 'all 0.2s',
+                      }}
+                      onMouseEnter={(e) => {
+                        e.currentTarget.style.background = '#f3f4f6';
+                        e.currentTarget.style.color = '#111827';
+                      }}
+                      onMouseLeave={(e) => {
+                        e.currentTarget.style.background = 'none';
+                        e.currentTarget.style.color = '#9ca3af';
+                      }}
+                    >
+                      ×
+                    </button>
+                  </div>
+                </div>
+
+                {/* 卡片内容 */}
+                <div style={{ flex: 1, overflowY: 'auto', padding: '24px' }}>
+                  {/* 基本信息 */}
+                  <div style={{
+                    display: 'flex',
+                    gap: '16px',
+                    marginBottom: '24px',
+                    padding: '16px',
+                    background: 'white',
+                    borderRadius: '8px',
+                    border: '1px solid #fecaca',
+                  }}>
+                    <div style={{ flex: 1 }}>
+                      <div style={{ fontSize: '12px', color: '#6b7280', marginBottom: '4px' }}>得分</div>
+                      <div style={{ fontSize: '20px', fontWeight: '600', color: '#ef4444' }}>
+                        {selectedPost.final_score?.toFixed(1) || 'N/A'}
+                      </div>
+                    </div>
+                    <div style={{ flex: 1 }}>
+                      <div style={{ fontSize: '12px', color: '#6b7280', marginBottom: '4px' }}>图片数量</div>
+                      <div style={{ fontSize: '20px', fontWeight: '600', color: '#3b82f6' }}>
+                        {selectedPost.content_structured?.total_images || 0}
+                      </div>
+                    </div>
+                    <div style={{ flex: 1 }}>
+                      <a
+                        href={selectedPost.note_url}
+                        target="_blank"
+                        rel="noopener noreferrer"
+                        style={{
+                          display: 'inline-block',
+                          marginTop: '20px',
+                          padding: '8px 16px',
+                          background: '#3b82f6',
+                          color: 'white',
+                          borderRadius: '6px',
+                          textDecoration: 'none',
+                          fontSize: '12px',
+                          fontWeight: '500',
+                          transition: 'all 0.2s',
+                        }}
+                        onMouseEnter={(e) => e.currentTarget.style.background = '#2563eb'}
+                        onMouseLeave={(e) => e.currentTarget.style.background = '#3b82f6'}
+                      >
+                        查看原帖 →
+                      </a>
+                    </div>
+                  </div>
+
+                  {/* 清洗后的内容 */}
+                  {selectedPost.content_structured && selectedPost.content_structured.formatted_text && (
+                    <div style={{ marginBottom: '24px' }}>
+                      <h4 style={{
+                        fontSize: '14px',
+                        fontWeight: '600',
+                        color: '#111827',
+                        marginBottom: '12px',
+                        display: 'flex',
+                        alignItems: 'center',
+                        gap: '8px',
+                      }}>
+                        <span style={{ fontSize: '16px' }}>📝</span>
+                        清洗后的结构化内容
+                      </h4>
+                      <div style={{
+                        padding: '16px',
+                        background: 'white',
+                        borderRadius: '8px',
+                        border: '1px solid #fecaca',
+                        whiteSpace: 'pre-wrap',
+                        fontSize: '13px',
+                        lineHeight: '1.6',
+                        color: '#374151',
+                      }}>
+                        {selectedPost.content_structured.formatted_text}
+                      </div>
+                    </div>
+                  )}
+
+                  {/* 图片列表 */}
+                  {selectedPost.images && selectedPost.images.length > 0 && (
+                    <div>
+                      <h4 style={{
+                        fontSize: '14px',
+                        fontWeight: '600',
+                        color: '#111827',
+                        marginBottom: '12px',
+                        display: 'flex',
+                        alignItems: 'center',
+                        gap: '8px',
+                      }}>
+                        <span style={{ fontSize: '16px' }}>🖼️</span>
+                        图片详情 ({selectedPost.images.length})
+                      </h4>
+                      {selectedPost.images.map((img, idx) => (
+                        <div
+                          key={idx}
+                          style={{
+                            marginBottom: '16px',
+                            padding: '16px',
+                            background: 'white',
+                            borderRadius: '8px',
+                            border: '1px solid #fecaca',
+                          }}
+                        >
+                          <div style={{
+                            fontSize: '12px',
+                            fontWeight: '600',
+                            color: '#6b7280',
+                            marginBottom: '8px',
+                          }}>
+                            图片 {idx + 1}
+                          </div>
+                          <img
+                            src={img.original_url}
+                            alt={'图片' + (idx + 1)}
+                            style={{
+                              width: '100%',
+                              borderRadius: '6px',
+                              marginBottom: '12px',
+                            }}
+                          />
+                          {img.extract_text_cleaned && (
+                            <div style={{
+                              padding: '12px',
+                              background: '#f9fafb',
+                              borderRadius: '6px',
+                              fontSize: '12px',
+                              lineHeight: '1.6',
+                              color: '#374151',
+                            }}>
+                              {img.extract_text_cleaned}
+                            </div>
+                          )}
+                        </div>
+                      ))}
+                    </div>
+                  )}
+                </div>
+              </div>
+            </div>
+          )}
         </div>
       </div>
     </div>