1 hafta önce · 56762d5a8e
--- a/script/data_processing/visualize_how_results.py
+++ b/script/data_processing/visualize_how_results.py
@@ -150,8 +150,8 @@ def generate_post_detail_html(post_data: Dict, post_idx: int) -> str:
 
				     # 生成缩略图HTML
			
 
				     thumbnail_html = ""
			
 
				     if images and len(images) > 0:
			
 
				-        # 使用第一张图片作为缩略图
			
 
				-        thumbnail_html = f'<img src="{images[0]}" class="post-card-thumbnail" alt="缩略图">'
			
 
				+        # 使用第一张图片作为缩略图，添加懒加载
			
 
				+        thumbnail_html = f'<img src="{images[0]}" class="post-card-thumbnail" alt="缩略图" loading="lazy">'
			
 
				     else:
			
 
				         thumbnail_html = '<div class="post-card-thumbnail-placeholder">📄</div>'
			
 
				 
			
@@ -546,7 +546,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
				             continue
			
 
				 
			
 
				         # 生成该层级的折叠区域
			
 
				-        level_section_id = f"post-{post_idx}-insp-{insp_idx}-feat-{feature_idx}-level-{level_name}"
			
 
				+        level_section_id = f"post-{post_idx}-{current_point_type}-{insp_idx}-feat-{feature_idx}-level-{level_name}"
			
 
				 
			
 
				         # 找出该层级的最高分匹配
			
 
				         all_level_matches = level_data["标签"] + level_data["分类"]
			
@@ -604,7 +604,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
				 
			
 
				         matches_html += f'''
			
 
				         <div class="level-group-section">
			
 
				-            <div class="level-group-header" onclick="toggleMatchGroup('{level_section_id}')">
			
 
				+            <div class="level-group-header" onclick="toggleMatchGroup(event, '{level_section_id}')">
			
 
				                 <div class="level-header-left">
			
 
				                     <span class="expand-icon" id="{level_section_id}-icon">▶</span>
			
 
				                     <h4 class="level-group-title">{feature_number}.{level_index} 匹配人设{level_name} ({total_count})</h4>
			
@@ -618,7 +618,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
				         # 该层级下的标签分组
			
 
				         subgroup_index = 1
			
 
				         if level_data["标签"]:
			
 
				-            group_id = f"post-{post_idx}-insp-{insp_idx}-feat-{feature_idx}-level-{level_name}-label"
			
 
				+            group_id = f"post-{post_idx}-{current_point_type}-{insp_idx}-feat-{feature_idx}-level-{level_name}-label"
			
 
				             group_matches_html = ""
			
 
				 
			
 
				             # 找出标签中的最高分
			
@@ -666,7 +666,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
				 
			
 
				             matches_html += f'''
			
 
				             <div class="match-subgroup-section">
			
 
				-                <div class="match-subgroup-header" onclick="toggleMatchGroup('{group_id}')">
			
 
				+                <div class="match-subgroup-header" onclick="toggleMatchGroup(event, '{group_id}')">
			
 
				                     <div class="subgroup-header-left">
			
 
				                         <span class="expand-icon" id="{group_id}-icon">▼</span>
			
 
				                         <h5 class="match-subgroup-title">{feature_number}.{level_index}.{subgroup_index} 标签 ({len(level_data["标签"])})</h5>
			
@@ -682,7 +682,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
				 
			
 
				         # 该层级下的分类分组
			
 
				         if level_data["分类"]:
			
 
				-            group_id = f"post-{post_idx}-insp-{insp_idx}-feat-{feature_idx}-level-{level_name}-category"
			
 
				+            group_id = f"post-{post_idx}-{current_point_type}-{insp_idx}-feat-{feature_idx}-level-{level_name}-category"
			
 
				             group_matches_html = ""
			
 
				 
			
 
				             # 找出分类中的最高分
			
@@ -730,7 +730,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
				 
			
 
				             matches_html += f'''
			
 
				             <div class="match-subgroup-section">
			
 
				-                <div class="match-subgroup-header" onclick="toggleMatchGroup('{group_id}')">
			
 
				+                <div class="match-subgroup-header" onclick="toggleMatchGroup(event, '{group_id}')">
			
 
				                     <div class="subgroup-header-left">
			
 
				                         <span class="expand-icon" id="{group_id}-icon">▼</span>
			
 
				                         <h5 class="match-subgroup-title">{feature_number}.{level_index}.{subgroup_index} 分类 ({len(level_data["分类"])})</h5>
			
@@ -789,7 +789,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
				 
			
 
				     html = f'''
			
 
				     <div class="match-results-section">
			
 
				-        <div class="match-section-header collapsible-header" onclick="toggleFeatureSection('{section_id}')">
			
 
				+        <div class="match-section-header collapsible-header" onclick="toggleFeatureSection(event, '{section_id}')">
			
 
				             <div class="header-left">
			
 
				                 <span class="expand-icon" id="{section_id}-icon">▼</span>
			
 
				                 <h4>{feature_number}. 匹配结果: {html_module.escape(feature_name)} <span class="feature-weight-display">(权重: {feature_weight})</span></h4>
			
@@ -3150,11 +3150,20 @@ def generate_combined_html(posts_data: List[Dict], category_mapping: Dict = None
 
				                 }}
			
 
				             }}
			
 
				 
			
 
				-            function toggleFeatureSection(sectionId) {{
			
 
				+            function toggleFeatureSection(event, sectionId) {{
			
 
				+                event.stopPropagation();
			
 
				+
			
 
				                 var content = document.getElementById(sectionId + '-content');
			
 
				                 var icon = document.getElementById(sectionId + '-icon');
			
 
				 
			
 
				-                if (content.style.display === 'none') {{
			
 
				+                if (!content || !icon) {{
			
 
				+                    console.error('Element not found:', sectionId);
			
 
				+                    return;
			
 
				+                }}
			
 
				+
			
 
				+                var isHidden = content.style.display === 'none' || !content.style.display || window.getComputedStyle(content).display === 'none';
			
 
				+
			
 
				+                if (isHidden) {{
			
 
				                     content.style.display = 'flex';
			
 
				                     icon.textContent = '▼';
			
 
				                 }} else {{
			
@@ -3176,11 +3185,20 @@ def generate_combined_html(posts_data: List[Dict], category_mapping: Dict = None
 
				                 }}
			
 
				             }}
			
 
				 
			
 
				-            function toggleMatchGroup(groupId) {{
			
 
				+            function toggleMatchGroup(event, groupId) {{
			
 
				+                event.stopPropagation();
			
 
				+
			
 
				                 var content = document.getElementById(groupId + '-content');
			
 
				                 var icon = document.getElementById(groupId + '-icon');
			
 
				 
			
 
				-                if (content.style.display === 'none') {{
			
 
				+                if (!content || !icon) {{
			
 
				+                    console.error('Element not found:', groupId);
			
 
				+                    return;
			
 
				+                }}
			
 
				+
			
 
				+                var isHidden = content.style.display === 'none' || !content.style.display || window.getComputedStyle(content).display === 'none';
			
 
				+
			
 
				+                if (isHidden) {{
			
 
				                     // 根据class决定使用什么display值
			
 
				                     if (content.classList.contains('match-subgroup-content')) {{
			
 
				                         content.style.display = 'flex';
			
@@ -3257,11 +3275,11 @@ def generate_combined_html(posts_data: List[Dict], category_mapping: Dict = None
 
				                 try {{
			
 
				                     const postData = JSON.parse(postDataStr);
			
 
				 
			
 
				-                    // 生成图片HTML
			
 
				+                    // 生成图片HTML（添加懒加载）
			
 
				                     let imagesHtml = '';
			
 
				                     if (postData.images && postData.images.length > 0) {{
			
 
				                         imagesHtml = postData.images.map(img =>
			
 
				-                            `<img src="${{img}}" class="post-detail-image" alt="图片">`
			
 
				+                            `<img src="${{img}}" class="post-detail-image" alt="图片" loading="lazy">`
			
 
				                         ).join('');
			
 
				                     }} else {{
			
 
				                         imagesHtml = '<div style="text-align: center; color: #9ca3af; padding: 40px;">暂无图片</div>';
			
@@ -3640,6 +3658,42 @@ def generate_combined_html(posts_data: List[Dict], category_mapping: Dict = None
 
				     return html
			
 
				 
			
 
				 
			
 
				+def minify_html(html: str) -> str:
			
 
				+    """压缩HTML，去除多余空格和换行"""
			
 
				+    import re
			
 
				+
			
 
				+    # 保护script和style标签内容
			
 
				+    scripts = []
			
 
				+    styles = []
			
 
				+
			
 
				+    def save_script(match):
			
 
				+        scripts.append(match.group(0))
			
 
				+        return f"___SCRIPT_{len(scripts)-1}___"
			
 
				+
			
 
				+    def save_style(match):
			
 
				+        styles.append(match.group(0))
			
 
				+        return f"___STYLE_{len(styles)-1}___"
			
 
				+
			
 
				+    # 保存script和style
			
 
				+    html = re.sub(r'<script[^>]*>.*?</script>', save_script, html, flags=re.DOTALL)
			
 
				+    html = re.sub(r'<style[^>]*>.*?</style>', save_style, html, flags=re.DOTALL)
			
 
				+
			
 
				+    # 去除HTML注释
			
 
				+    html = re.sub(r'<!--.*?-->', '', html, flags=re.DOTALL)
			
 
				+
			
 
				+    # 去除多余空格和换行
			
 
				+    html = re.sub(r'\s+', ' ', html)
			
 
				+    html = re.sub(r'>\s+<', '><', html)
			
 
				+
			
 
				+    # 恢复script和style
			
 
				+    for i, script in enumerate(scripts):
			
 
				+        html = html.replace(f"___SCRIPT_{i}___", script)
			
 
				+    for i, style in enumerate(styles):
			
 
				+        html = html.replace(f"___STYLE_{i}___", style)
			
 
				+
			
 
				+    return html.strip()
			
 
				+
			
 
				+
			
 
				 def main():
			
 
				     """主函数"""
			
 
				     script_dir = Path(__file__).parent
			
@@ -3674,12 +3728,41 @@ def main():
 
				     print(f"\n生成合并的 HTML...")
			
 
				     html_content = generate_combined_html(posts_data, category_mapping, source_mapping)
			
 
				 
			
 
				-    print(f"保存到: {output_file}")
			
 
				+    # 保存原始版本
			
 
				+    print(f"保存原始HTML到: {output_file}")
			
 
				     with open(output_file, "w", encoding="utf-8") as f:
			
 
				         f.write(html_content)
			
 
				 
			
 
				-    print(f"\n完成! 可视化文件已保存")
			
 
				-    print(f"请在浏览器中打开: {output_file}")
			
 
				+    original_size = len(html_content) / 1024 / 1024
			
 
				+    print(f"原始HTML大小: {original_size:.1f} MB")
			
 
				+
			
 
				+    # 压缩HTML
			
 
				+    print(f"\n压缩HTML...")
			
 
				+    minified_html = minify_html(html_content)
			
 
				+
			
 
				+    minified_file = data_dir / "当前帖子_how解构结果_可视化.min.html"
			
 
				+    print(f"保存压缩HTML到: {minified_file}")
			
 
				+    with open(minified_file, "w", encoding="utf-8") as f:
			
 
				+        f.write(minified_html)
			
 
				+
			
 
				+    minified_size = len(minified_html) / 1024 / 1024
			
 
				+    print(f"压缩HTML大小: {minified_size:.1f} MB (减少 {(1 - minified_size/original_size)*100:.1f}%)")
			
 
				+
			
 
				+    # Gzip压缩
			
 
				+    import gzip
			
 
				+    print(f"\n生成Gzip压缩版本...")
			
 
				+    gzip_file = data_dir / "当前帖子_how解构结果_可视化.html.gz"
			
 
				+    with gzip.open(gzip_file, "wb") as f:
			
 
				+        f.write(minified_html.encode('utf-8'))
			
 
				+
			
 
				+    gzip_size = gzip_file.stat().st_size / 1024 / 1024
			
 
				+    print(f"Gzip压缩大小: {gzip_size:.1f} MB (比原始减少 {(1 - gzip_size/original_size)*100:.1f}%)")
			
 
				+
			
 
				+    print(f"\n完成! 生成了3个版本:")
			
 
				+    print(f"1. 原始版本: {output_file} ({original_size:.1f} MB)")
			
 
				+    print(f"2. 压缩版本: {minified_file} ({minified_size:.1f} MB)")
			
 
				+    print(f"3. Gzip版本: {gzip_file} ({gzip_size:.1f} MB)")
			
 
				+    print(f"\n建议分享: {gzip_file.name} (浏览器可直接打开)")
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":