| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- 分析 dimension_associations_analysis.json 中的关联关系
- """
- import json
- from collections import defaultdict, Counter
- from typing import Dict, List, Any
- def load_data(file_path: str) -> Dict:
- """加载JSON数据"""
- with open(file_path, 'r', encoding='utf-8') as f:
- return json.load(f)
- def analyze_basic_info(data: Dict) -> None:
- """分析基本信息"""
- print("=" * 80)
- print("📊 基本信息分析")
- print("=" * 80)
- info = data.get("分析说明", {})
- print(f"\n分析类型: {', '.join(info.get('分析类型', []))}")
- print(f"最小共同帖子数: {info.get('最小共同帖子数', 0)}")
- print(f"\n维度统计:")
- print(f" 灵感点: {info.get('灵感点分类数(全部)', 0)} 个分类 (非一级: {info.get('灵感点非一级分类数', 0)})")
- print(f" 目的点: {info.get('目的点分类数(全部)', 0)} 个分类 (非一级: {info.get('目的点非一级分类数', 0)})")
- print(f" 关键点: {info.get('关键点分类数(全部)', 0)} 个分类 (非一级: {info.get('关键点非一级分类数', 0)})")
- def analyze_single_dimension(data: Dict) -> None:
- """分析单维度关联"""
- print("\n" + "=" * 80)
- print("🔗 单维度关联分析")
- print("=" * 80)
- single_dim = data.get("单维度关联分析", {})
- for dimension_name, dimension_data in single_dim.items():
- print(f"\n【{dimension_name}】")
- print(f"说明: {dimension_data.get('说明', '')}")
- # 统计每种关联方向
- for direction, associations in dimension_data.items():
- if direction == "说明":
- continue
- print(f"\n {direction}:")
- # 统计总体情况
- total_sources = len(associations)
- total_associations = 0
- high_similarity = [] # 高相似度关联
- high_overlap = [] # 高重叠系数关联
- for source_name, source_data in associations.items():
- assoc_list = source_data.get("与目的点的关联", []) or \
- source_data.get("与关键点的关联", []) or \
- source_data.get("与灵感点的关联", [])
- total_associations += len(assoc_list)
- # 找出高相似度和高重叠系数的关联
- for assoc in assoc_list:
- jaccard = assoc.get("Jaccard相似度", 0)
- overlap = assoc.get("重叠系数", 0)
- if jaccard >= 0.5:
- high_similarity.append({
- "源": source_name,
- "目标": assoc.get("目标分类", ""),
- "Jaccard": jaccard,
- "共同帖子数": assoc.get("共同帖子数", 0)
- })
- if overlap >= 0.8:
- high_overlap.append({
- "源": source_name,
- "目标": assoc.get("目标分类", ""),
- "重叠系数": overlap,
- "共同帖子数": assoc.get("共同帖子数", 0)
- })
- print(f" 总源分类数: {total_sources}")
- print(f" 总关联数: {total_associations}")
- print(f" 平均每个源分类的关联数: {total_associations/total_sources:.2f}" if total_sources > 0 else " 平均每个源分类的关联数: 0")
- if high_similarity:
- print(f"\n 🔥 高相似度关联 (Jaccard >= 0.5): {len(high_similarity)} 个")
- for item in sorted(high_similarity, key=lambda x: x["Jaccard"], reverse=True)[:5]:
- print(f" • {item['源']} → {item['目标']}")
- print(f" Jaccard: {item['Jaccard']:.4f}, 共同帖子: {item['共同帖子数']}")
- if high_overlap:
- print(f"\n 🎯 高重叠系数关联 (重叠 >= 0.8): {len(high_overlap)} 个")
- for item in sorted(high_overlap, key=lambda x: x["重叠系数"], reverse=True)[:5]:
- print(f" • {item['源']} → {item['目标']}")
- print(f" 重叠系数: {item['重叠系数']:.4f}, 共同帖子: {item['共同帖子数']}")
- def analyze_triple_dimension(data: Dict) -> None:
- """分析三维正交关联"""
- print("\n" + "=" * 80)
- print("🎲 三维正交关联分析")
- print("=" * 80)
- triple_dim = data.get("三维正交关联分析", {})
- if not triple_dim:
- print("未找到三维正交关联数据")
- return
- # 按灵感点分类组织
- total_inspiration_classes = len(triple_dim)
- total_orthogonal_combinations = 0
- all_combinations = []
- print(f"\n灵感点分类数: {total_inspiration_classes}")
- for inspiration_class, inspiration_data in triple_dim.items():
- orthogonal_list = inspiration_data.get("正交关联", [])
- total_orthogonal_combinations += len(orthogonal_list)
- for combo in orthogonal_list:
- all_combinations.append({
- "灵感点": inspiration_class,
- "目的点": combo.get("目的点分类", ""),
- "关键点": combo.get("关键点分类", ""),
- "三维共同帖子数": combo.get("三维共同帖子数", 0),
- "三维交集占灵感点比例": combo.get("三维交集占灵感点比例", 0),
- "三维交集占目的点比例": combo.get("三维交集占目的点比例", 0),
- "三维交集占关键点比例": combo.get("三维交集占关键点比例", 0),
- "共同帖子ID": combo.get("三维共同帖子ID", [])
- })
- print(f"总正交组合数: {total_orthogonal_combinations}")
- print(f"平均每个灵感点的正交组合数: {total_orthogonal_combinations/total_inspiration_classes:.2f}" if total_inspiration_classes > 0 else "平均每个灵感点的正交组合数: 0")
- if all_combinations:
- post_counts = [c["三维共同帖子数"] for c in all_combinations]
- print(f"\n正交组合帖子数统计:")
- print(f" 平均值: {sum(post_counts)/len(post_counts):.2f}")
- print(f" 最大值: {max(post_counts)}")
- print(f" 最小值: {min(post_counts)}")
- # 高频组合
- high_post_combinations = [c for c in all_combinations if c["三维共同帖子数"] >= 2]
- if high_post_combinations:
- print(f"\n🌟 高频三维正交组合 (三维共同帖子数 >= 2): {len(high_post_combinations)} 个")
- for combo in sorted(high_post_combinations, key=lambda x: x["三维共同帖子数"], reverse=True)[:10]:
- print(f"\n 三维共同帖子数: {combo['三维共同帖子数']}")
- print(f" 灵感点: {combo['灵感点']}")
- print(f" 目的点: {combo['目的点']}")
- print(f" 关键点: {combo['关键点']}")
- print(f" 交集占比 - 灵感:{combo['三维交集占灵感点比例']:.2f} 目的:{combo['三维交集占目的点比例']:.2f} 关键:{combo['三维交集占关键点比例']:.2f}")
- # 高交集占比组合
- high_ratio_combinations = [c for c in all_combinations if
- c["三维交集占灵感点比例"] >= 0.5 and
- c["三维交集占目的点比例"] >= 0.5 and
- c["三维交集占关键点比例"] >= 0.5]
- if high_ratio_combinations:
- print(f"\n🔥 高交集占比正交组合 (三维度占比均 >= 0.5): {len(high_ratio_combinations)} 个")
- for combo in sorted(high_ratio_combinations, key=lambda x: x["三维共同帖子数"], reverse=True)[:5]:
- print(f"\n 三维共同帖子数: {combo['三维共同帖子数']}")
- print(f" 灵感点: {combo['灵感点']}")
- print(f" 目的点: {combo['目的点']}")
- print(f" 关键点: {combo['关键点']}")
- print(f" 交集占比 - 灵感:{combo['三维交集占灵感点比例']:.2f} 目的:{combo['三维交集占目的点比例']:.2f} 关键:{combo['三维交集占关键点比例']:.2f}")
- def analyze_association_strength(data: Dict) -> None:
- """分析关联强度分布"""
- print("\n" + "=" * 80)
- print("📈 关联强度分布分析")
- print("=" * 80)
- single_dim = data.get("单维度关联分析", {})
- all_jaccard = []
- all_overlap = []
- all_coverage_source = []
- all_coverage_target = []
- for dimension_name, dimension_data in single_dim.items():
- for direction, associations in dimension_data.items():
- if direction == "说明":
- continue
- for source_name, source_data in associations.items():
- assoc_list = source_data.get("与目的点的关联", []) or \
- source_data.get("与关键点的关联", []) or \
- source_data.get("与灵感点的关联", [])
- for assoc in assoc_list:
- all_jaccard.append(assoc.get("Jaccard相似度", 0))
- all_overlap.append(assoc.get("重叠系数", 0))
- # 根据direction确定覆盖率字段
- if "灵感点→" in direction:
- all_coverage_source.append(assoc.get("灵感点覆盖率", 0))
- elif "目的点→" in direction:
- all_coverage_source.append(assoc.get("目的点覆盖率", 0))
- elif "关键点→" in direction:
- all_coverage_source.append(assoc.get("关键点覆盖率", 0))
- all_coverage_target.append(assoc.get("目标维度覆盖率", 0))
- if all_jaccard:
- print(f"\nJaccard相似度分布:")
- print(f" 平均值: {sum(all_jaccard)/len(all_jaccard):.4f}")
- print(f" 中位数: {sorted(all_jaccard)[len(all_jaccard)//2]:.4f}")
- print(f" 最大值: {max(all_jaccard):.4f}")
- print(f" 最小值: {min(all_jaccard):.4f}")
- # 分段统计
- ranges = [(0, 0.2), (0.2, 0.4), (0.4, 0.6), (0.6, 0.8), (0.8, 1.0)]
- for low, high in ranges:
- count = sum(1 for j in all_jaccard if low <= j < high)
- pct = count / len(all_jaccard) * 100
- print(f" [{low:.1f}, {high:.1f}): {count} ({pct:.1f}%)")
- if all_overlap:
- print(f"\n重叠系数分布:")
- print(f" 平均值: {sum(all_overlap)/len(all_overlap):.4f}")
- print(f" 中位数: {sorted(all_overlap)[len(all_overlap)//2]:.4f}")
- print(f" 最大值: {max(all_overlap):.4f}")
- print(f" 最小值: {min(all_overlap):.4f}")
- # 统计完全重叠(1.0)的数量
- perfect_overlap = sum(1 for o in all_overlap if o == 1.0)
- print(f" 完全重叠(1.0): {perfect_overlap} ({perfect_overlap/len(all_overlap)*100:.1f}%)")
- def main():
- file_path = "/Users/liulidong/project/pattern相关文件/optimization/dimension_associations_analysis.json"
- print("🔍 加载数据...")
- data = load_data(file_path)
- # 执行各项分析
- analyze_basic_info(data)
- analyze_single_dimension(data)
- analyze_triple_dimension(data)
- analyze_association_strength(data)
- print("\n" + "=" * 80)
- print("✅ 分析完成!")
- print("=" * 80)
- if __name__ == "__main__":
- main()
|