#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 分析 dimension_associations_analysis.json 中的关联关系 """ import json from collections import defaultdict, Counter from typing import Dict, List, Any def load_data(file_path: str) -> Dict: """加载JSON数据""" with open(file_path, 'r', encoding='utf-8') as f: return json.load(f) def analyze_basic_info(data: Dict) -> None: """分析基本信息""" print("=" * 80) print("📊 基本信息分析") print("=" * 80) info = data.get("分析说明", {}) print(f"\n分析类型: {', '.join(info.get('分析类型', []))}") print(f"最小共同帖子数: {info.get('最小共同帖子数', 0)}") print(f"\n维度统计:") print(f" 灵感点: {info.get('灵感点分类数(全部)', 0)} 个分类 (非一级: {info.get('灵感点非一级分类数', 0)})") print(f" 目的点: {info.get('目的点分类数(全部)', 0)} 个分类 (非一级: {info.get('目的点非一级分类数', 0)})") print(f" 关键点: {info.get('关键点分类数(全部)', 0)} 个分类 (非一级: {info.get('关键点非一级分类数', 0)})") def analyze_single_dimension(data: Dict) -> None: """分析单维度关联""" print("\n" + "=" * 80) print("🔗 单维度关联分析") print("=" * 80) single_dim = data.get("单维度关联分析", {}) for dimension_name, dimension_data in single_dim.items(): print(f"\n【{dimension_name}】") print(f"说明: {dimension_data.get('说明', '')}") # 统计每种关联方向 for direction, associations in dimension_data.items(): if direction == "说明": continue print(f"\n {direction}:") # 统计总体情况 total_sources = len(associations) total_associations = 0 high_similarity = [] # 高相似度关联 high_overlap = [] # 高重叠系数关联 for source_name, source_data in associations.items(): assoc_list = source_data.get("与目的点的关联", []) or \ source_data.get("与关键点的关联", []) or \ source_data.get("与灵感点的关联", []) total_associations += len(assoc_list) # 找出高相似度和高重叠系数的关联 for assoc in assoc_list: jaccard = assoc.get("Jaccard相似度", 0) overlap = assoc.get("重叠系数", 0) if jaccard >= 0.5: high_similarity.append({ "源": source_name, "目标": assoc.get("目标分类", ""), "Jaccard": jaccard, "共同帖子数": assoc.get("共同帖子数", 0) }) if overlap >= 0.8: high_overlap.append({ "源": source_name, "目标": assoc.get("目标分类", ""), "重叠系数": overlap, "共同帖子数": assoc.get("共同帖子数", 0) }) print(f" 总源分类数: {total_sources}") print(f" 总关联数: {total_associations}") print(f" 平均每个源分类的关联数: {total_associations/total_sources:.2f}" if total_sources > 0 else " 平均每个源分类的关联数: 0") if high_similarity: print(f"\n 🔥 高相似度关联 (Jaccard >= 0.5): {len(high_similarity)} 个") for item in sorted(high_similarity, key=lambda x: x["Jaccard"], reverse=True)[:5]: print(f" • {item['源']} → {item['目标']}") print(f" Jaccard: {item['Jaccard']:.4f}, 共同帖子: {item['共同帖子数']}") if high_overlap: print(f"\n 🎯 高重叠系数关联 (重叠 >= 0.8): {len(high_overlap)} 个") for item in sorted(high_overlap, key=lambda x: x["重叠系数"], reverse=True)[:5]: print(f" • {item['源']} → {item['目标']}") print(f" 重叠系数: {item['重叠系数']:.4f}, 共同帖子: {item['共同帖子数']}") def analyze_triple_dimension(data: Dict) -> None: """分析三维正交关联""" print("\n" + "=" * 80) print("🎲 三维正交关联分析") print("=" * 80) triple_dim = data.get("三维正交关联分析", {}) if not triple_dim: print("未找到三维正交关联数据") return # 按灵感点分类组织 total_inspiration_classes = len(triple_dim) total_orthogonal_combinations = 0 all_combinations = [] print(f"\n灵感点分类数: {total_inspiration_classes}") for inspiration_class, inspiration_data in triple_dim.items(): orthogonal_list = inspiration_data.get("正交关联", []) total_orthogonal_combinations += len(orthogonal_list) for combo in orthogonal_list: all_combinations.append({ "灵感点": inspiration_class, "目的点": combo.get("目的点分类", ""), "关键点": combo.get("关键点分类", ""), "三维共同帖子数": combo.get("三维共同帖子数", 0), "三维交集占灵感点比例": combo.get("三维交集占灵感点比例", 0), "三维交集占目的点比例": combo.get("三维交集占目的点比例", 0), "三维交集占关键点比例": combo.get("三维交集占关键点比例", 0), "共同帖子ID": combo.get("三维共同帖子ID", []) }) print(f"总正交组合数: {total_orthogonal_combinations}") print(f"平均每个灵感点的正交组合数: {total_orthogonal_combinations/total_inspiration_classes:.2f}" if total_inspiration_classes > 0 else "平均每个灵感点的正交组合数: 0") if all_combinations: post_counts = [c["三维共同帖子数"] for c in all_combinations] print(f"\n正交组合帖子数统计:") print(f" 平均值: {sum(post_counts)/len(post_counts):.2f}") print(f" 最大值: {max(post_counts)}") print(f" 最小值: {min(post_counts)}") # 高频组合 high_post_combinations = [c for c in all_combinations if c["三维共同帖子数"] >= 2] if high_post_combinations: print(f"\n🌟 高频三维正交组合 (三维共同帖子数 >= 2): {len(high_post_combinations)} 个") for combo in sorted(high_post_combinations, key=lambda x: x["三维共同帖子数"], reverse=True)[:10]: print(f"\n 三维共同帖子数: {combo['三维共同帖子数']}") print(f" 灵感点: {combo['灵感点']}") print(f" 目的点: {combo['目的点']}") print(f" 关键点: {combo['关键点']}") print(f" 交集占比 - 灵感:{combo['三维交集占灵感点比例']:.2f} 目的:{combo['三维交集占目的点比例']:.2f} 关键:{combo['三维交集占关键点比例']:.2f}") # 高交集占比组合 high_ratio_combinations = [c for c in all_combinations if c["三维交集占灵感点比例"] >= 0.5 and c["三维交集占目的点比例"] >= 0.5 and c["三维交集占关键点比例"] >= 0.5] if high_ratio_combinations: print(f"\n🔥 高交集占比正交组合 (三维度占比均 >= 0.5): {len(high_ratio_combinations)} 个") for combo in sorted(high_ratio_combinations, key=lambda x: x["三维共同帖子数"], reverse=True)[:5]: print(f"\n 三维共同帖子数: {combo['三维共同帖子数']}") print(f" 灵感点: {combo['灵感点']}") print(f" 目的点: {combo['目的点']}") print(f" 关键点: {combo['关键点']}") print(f" 交集占比 - 灵感:{combo['三维交集占灵感点比例']:.2f} 目的:{combo['三维交集占目的点比例']:.2f} 关键:{combo['三维交集占关键点比例']:.2f}") def analyze_association_strength(data: Dict) -> None: """分析关联强度分布""" print("\n" + "=" * 80) print("📈 关联强度分布分析") print("=" * 80) single_dim = data.get("单维度关联分析", {}) all_jaccard = [] all_overlap = [] all_coverage_source = [] all_coverage_target = [] for dimension_name, dimension_data in single_dim.items(): for direction, associations in dimension_data.items(): if direction == "说明": continue for source_name, source_data in associations.items(): assoc_list = source_data.get("与目的点的关联", []) or \ source_data.get("与关键点的关联", []) or \ source_data.get("与灵感点的关联", []) for assoc in assoc_list: all_jaccard.append(assoc.get("Jaccard相似度", 0)) all_overlap.append(assoc.get("重叠系数", 0)) # 根据direction确定覆盖率字段 if "灵感点→" in direction: all_coverage_source.append(assoc.get("灵感点覆盖率", 0)) elif "目的点→" in direction: all_coverage_source.append(assoc.get("目的点覆盖率", 0)) elif "关键点→" in direction: all_coverage_source.append(assoc.get("关键点覆盖率", 0)) all_coverage_target.append(assoc.get("目标维度覆盖率", 0)) if all_jaccard: print(f"\nJaccard相似度分布:") print(f" 平均值: {sum(all_jaccard)/len(all_jaccard):.4f}") print(f" 中位数: {sorted(all_jaccard)[len(all_jaccard)//2]:.4f}") print(f" 最大值: {max(all_jaccard):.4f}") print(f" 最小值: {min(all_jaccard):.4f}") # 分段统计 ranges = [(0, 0.2), (0.2, 0.4), (0.4, 0.6), (0.6, 0.8), (0.8, 1.0)] for low, high in ranges: count = sum(1 for j in all_jaccard if low <= j < high) pct = count / len(all_jaccard) * 100 print(f" [{low:.1f}, {high:.1f}): {count} ({pct:.1f}%)") if all_overlap: print(f"\n重叠系数分布:") print(f" 平均值: {sum(all_overlap)/len(all_overlap):.4f}") print(f" 中位数: {sorted(all_overlap)[len(all_overlap)//2]:.4f}") print(f" 最大值: {max(all_overlap):.4f}") print(f" 最小值: {min(all_overlap):.4f}") # 统计完全重叠(1.0)的数量 perfect_overlap = sum(1 for o in all_overlap if o == 1.0) print(f" 完全重叠(1.0): {perfect_overlap} ({perfect_overlap/len(all_overlap)*100:.1f}%)") def main(): file_path = "/Users/liulidong/project/pattern相关文件/optimization/dimension_associations_analysis.json" print("🔍 加载数据...") data = load_data(file_path) # 执行各项分析 analyze_basic_info(data) analyze_single_dimension(data) analyze_triple_dimension(data) analyze_association_strength(data) print("\n" + "=" * 80) print("✅ 分析完成!") print("=" * 80) if __name__ == "__main__": main()