Kaynağa Gözat

Merge remote-tracking branch 'origin/how_1120_v1' into how_1120_v2

yangxiaohui 1 hafta önce
ebeveyn
işleme
56762d5a8e
1 değiştirilmiş dosya ile 101 ekleme ve 18 silme
  1. 101 18
      script/data_processing/visualize_how_results.py

+ 101 - 18
script/data_processing/visualize_how_results.py

@@ -150,8 +150,8 @@ def generate_post_detail_html(post_data: Dict, post_idx: int) -> str:
     # 生成缩略图HTML
     thumbnail_html = ""
     if images and len(images) > 0:
-        # 使用第一张图片作为缩略图
-        thumbnail_html = f'<img src="{images[0]}" class="post-card-thumbnail" alt="缩略图">'
+        # 使用第一张图片作为缩略图,添加懒加载
+        thumbnail_html = f'<img src="{images[0]}" class="post-card-thumbnail" alt="缩略图" loading="lazy">'
     else:
         thumbnail_html = '<div class="post-card-thumbnail-placeholder">📄</div>'
 
@@ -546,7 +546,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
             continue
 
         # 生成该层级的折叠区域
-        level_section_id = f"post-{post_idx}-insp-{insp_idx}-feat-{feature_idx}-level-{level_name}"
+        level_section_id = f"post-{post_idx}-{current_point_type}-{insp_idx}-feat-{feature_idx}-level-{level_name}"
 
         # 找出该层级的最高分匹配
         all_level_matches = level_data["标签"] + level_data["分类"]
@@ -604,7 +604,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
         matches_html += f'''
         <div class="level-group-section">
-            <div class="level-group-header" onclick="toggleMatchGroup('{level_section_id}')">
+            <div class="level-group-header" onclick="toggleMatchGroup(event, '{level_section_id}')">
                 <div class="level-header-left">
                     <span class="expand-icon" id="{level_section_id}-icon">▶</span>
                     <h4 class="level-group-title">{feature_number}.{level_index} 匹配人设{level_name} ({total_count})</h4>
@@ -618,7 +618,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
         # 该层级下的标签分组
         subgroup_index = 1
         if level_data["标签"]:
-            group_id = f"post-{post_idx}-insp-{insp_idx}-feat-{feature_idx}-level-{level_name}-label"
+            group_id = f"post-{post_idx}-{current_point_type}-{insp_idx}-feat-{feature_idx}-level-{level_name}-label"
             group_matches_html = ""
 
             # 找出标签中的最高分
@@ -666,7 +666,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
             matches_html += f'''
             <div class="match-subgroup-section">
-                <div class="match-subgroup-header" onclick="toggleMatchGroup('{group_id}')">
+                <div class="match-subgroup-header" onclick="toggleMatchGroup(event, '{group_id}')">
                     <div class="subgroup-header-left">
                         <span class="expand-icon" id="{group_id}-icon">▼</span>
                         <h5 class="match-subgroup-title">{feature_number}.{level_index}.{subgroup_index} 标签 ({len(level_data["标签"])})</h5>
@@ -682,7 +682,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
         # 该层级下的分类分组
         if level_data["分类"]:
-            group_id = f"post-{post_idx}-insp-{insp_idx}-feat-{feature_idx}-level-{level_name}-category"
+            group_id = f"post-{post_idx}-{current_point_type}-{insp_idx}-feat-{feature_idx}-level-{level_name}-category"
             group_matches_html = ""
 
             # 找出分类中的最高分
@@ -730,7 +730,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
             matches_html += f'''
             <div class="match-subgroup-section">
-                <div class="match-subgroup-header" onclick="toggleMatchGroup('{group_id}')">
+                <div class="match-subgroup-header" onclick="toggleMatchGroup(event, '{group_id}')">
                     <div class="subgroup-header-left">
                         <span class="expand-icon" id="{group_id}-icon">▼</span>
                         <h5 class="match-subgroup-title">{feature_number}.{level_index}.{subgroup_index} 分类 ({len(level_data["分类"])})</h5>
@@ -789,7 +789,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
     html = f'''
     <div class="match-results-section">
-        <div class="match-section-header collapsible-header" onclick="toggleFeatureSection('{section_id}')">
+        <div class="match-section-header collapsible-header" onclick="toggleFeatureSection(event, '{section_id}')">
             <div class="header-left">
                 <span class="expand-icon" id="{section_id}-icon">▼</span>
                 <h4>{feature_number}. 匹配结果: {html_module.escape(feature_name)} <span class="feature-weight-display">(权重: {feature_weight})</span></h4>
@@ -3150,11 +3150,20 @@ def generate_combined_html(posts_data: List[Dict], category_mapping: Dict = None
                 }}
             }}
 
-            function toggleFeatureSection(sectionId) {{
+            function toggleFeatureSection(event, sectionId) {{
+                event.stopPropagation();
+
                 var content = document.getElementById(sectionId + '-content');
                 var icon = document.getElementById(sectionId + '-icon');
 
-                if (content.style.display === 'none') {{
+                if (!content || !icon) {{
+                    console.error('Element not found:', sectionId);
+                    return;
+                }}
+
+                var isHidden = content.style.display === 'none' || !content.style.display || window.getComputedStyle(content).display === 'none';
+
+                if (isHidden) {{
                     content.style.display = 'flex';
                     icon.textContent = '▼';
                 }} else {{
@@ -3176,11 +3185,20 @@ def generate_combined_html(posts_data: List[Dict], category_mapping: Dict = None
                 }}
             }}
 
-            function toggleMatchGroup(groupId) {{
+            function toggleMatchGroup(event, groupId) {{
+                event.stopPropagation();
+
                 var content = document.getElementById(groupId + '-content');
                 var icon = document.getElementById(groupId + '-icon');
 
-                if (content.style.display === 'none') {{
+                if (!content || !icon) {{
+                    console.error('Element not found:', groupId);
+                    return;
+                }}
+
+                var isHidden = content.style.display === 'none' || !content.style.display || window.getComputedStyle(content).display === 'none';
+
+                if (isHidden) {{
                     // 根据class决定使用什么display值
                     if (content.classList.contains('match-subgroup-content')) {{
                         content.style.display = 'flex';
@@ -3257,11 +3275,11 @@ def generate_combined_html(posts_data: List[Dict], category_mapping: Dict = None
                 try {{
                     const postData = JSON.parse(postDataStr);
 
-                    // 生成图片HTML
+                    // 生成图片HTML(添加懒加载)
                     let imagesHtml = '';
                     if (postData.images && postData.images.length > 0) {{
                         imagesHtml = postData.images.map(img =>
-                            `<img src="${{img}}" class="post-detail-image" alt="图片">`
+                            `<img src="${{img}}" class="post-detail-image" alt="图片" loading="lazy">`
                         ).join('');
                     }} else {{
                         imagesHtml = '<div style="text-align: center; color: #9ca3af; padding: 40px;">暂无图片</div>';
@@ -3640,6 +3658,42 @@ def generate_combined_html(posts_data: List[Dict], category_mapping: Dict = None
     return html
 
 
+def minify_html(html: str) -> str:
+    """压缩HTML,去除多余空格和换行"""
+    import re
+
+    # 保护script和style标签内容
+    scripts = []
+    styles = []
+
+    def save_script(match):
+        scripts.append(match.group(0))
+        return f"___SCRIPT_{len(scripts)-1}___"
+
+    def save_style(match):
+        styles.append(match.group(0))
+        return f"___STYLE_{len(styles)-1}___"
+
+    # 保存script和style
+    html = re.sub(r'<script[^>]*>.*?</script>', save_script, html, flags=re.DOTALL)
+    html = re.sub(r'<style[^>]*>.*?</style>', save_style, html, flags=re.DOTALL)
+
+    # 去除HTML注释
+    html = re.sub(r'<!--.*?-->', '', html, flags=re.DOTALL)
+
+    # 去除多余空格和换行
+    html = re.sub(r'\s+', ' ', html)
+    html = re.sub(r'>\s+<', '><', html)
+
+    # 恢复script和style
+    for i, script in enumerate(scripts):
+        html = html.replace(f"___SCRIPT_{i}___", script)
+    for i, style in enumerate(styles):
+        html = html.replace(f"___STYLE_{i}___", style)
+
+    return html.strip()
+
+
 def main():
     """主函数"""
     script_dir = Path(__file__).parent
@@ -3674,12 +3728,41 @@ def main():
     print(f"\n生成合并的 HTML...")
     html_content = generate_combined_html(posts_data, category_mapping, source_mapping)
 
-    print(f"保存到: {output_file}")
+    # 保存原始版本
+    print(f"保存原始HTML到: {output_file}")
     with open(output_file, "w", encoding="utf-8") as f:
         f.write(html_content)
 
-    print(f"\n完成! 可视化文件已保存")
-    print(f"请在浏览器中打开: {output_file}")
+    original_size = len(html_content) / 1024 / 1024
+    print(f"原始HTML大小: {original_size:.1f} MB")
+
+    # 压缩HTML
+    print(f"\n压缩HTML...")
+    minified_html = minify_html(html_content)
+
+    minified_file = data_dir / "当前帖子_how解构结果_可视化.min.html"
+    print(f"保存压缩HTML到: {minified_file}")
+    with open(minified_file, "w", encoding="utf-8") as f:
+        f.write(minified_html)
+
+    minified_size = len(minified_html) / 1024 / 1024
+    print(f"压缩HTML大小: {minified_size:.1f} MB (减少 {(1 - minified_size/original_size)*100:.1f}%)")
+
+    # Gzip压缩
+    import gzip
+    print(f"\n生成Gzip压缩版本...")
+    gzip_file = data_dir / "当前帖子_how解构结果_可视化.html.gz"
+    with gzip.open(gzip_file, "wb") as f:
+        f.write(minified_html.encode('utf-8'))
+
+    gzip_size = gzip_file.stat().st_size / 1024 / 1024
+    print(f"Gzip压缩大小: {gzip_size:.1f} MB (比原始减少 {(1 - gzip_size/original_size)*100:.1f}%)")
+
+    print(f"\n完成! 生成了3个版本:")
+    print(f"1. 原始版本: {output_file} ({original_size:.1f} MB)")
+    print(f"2. 压缩版本: {minified_file} ({minified_size:.1f} MB)")
+    print(f"3. Gzip版本: {gzip_file} ({gzip_size:.1f} MB)")
+    print(f"\n建议分享: {gzip_file.name} (浏览器可直接打开)")
 
 
 if __name__ == "__main__":