cascade_search_visualizer.py 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 级联搜索结果可视化工具
  5. 展示候选词 → Top3人设特征 → 搜索词 → 搜索结果的完整流程
  6. """
  7. import json
  8. import os
  9. import sys
  10. from datetime import datetime
  11. from typing import List, Dict, Any, Set
  12. import webbrowser
  13. def load_json(file_path: str) -> Any:
  14. """加载JSON文件"""
  15. with open(file_path, 'r', encoding='utf-8') as f:
  16. return json.load(f)
  17. def load_all_data(output_dir: str = "output_v2") -> Dict[str, Any]:
  18. """
  19. 加载所有需要的数据文件
  20. Returns:
  21. 包含所有数据的字典
  22. """
  23. print("正在加载数据文件...")
  24. data = {
  25. 'filtered_features': load_json(os.path.join(output_dir, 'filtered_features.json')),
  26. 'candidate_words': load_json(os.path.join(output_dir, 'candidate_words.json')),
  27. 'search_queries': load_json(os.path.join(output_dir, 'search_queries_evaluated.json')),
  28. 'search_results': load_json(os.path.join(output_dir, 'search_results.json')),
  29. 'evaluated_results': load_json(os.path.join(output_dir, 'evaluated_results.json'))
  30. }
  31. # 尝试加载深度分析数据(可选)
  32. deep_path = os.path.join(output_dir, 'deep_analysis_results.json')
  33. similarity_path = os.path.join(output_dir, 'similarity_analysis_results.json')
  34. if os.path.exists(deep_path):
  35. deep_data = load_json(deep_path)
  36. # 创建note_id到解构数据的映射
  37. data['stage7_mapping'] = {}
  38. for result in deep_data.get('results', []):
  39. note_id = result.get('note_id')
  40. if note_id:
  41. data['stage7_mapping'][note_id] = result
  42. else:
  43. data['stage7_mapping'] = {}
  44. if os.path.exists(similarity_path):
  45. sim_data = load_json(similarity_path)
  46. # 创建note_id到相似度数据的映射
  47. data['stage8_mapping'] = {}
  48. for result in sim_data.get('results', []):
  49. note_id = result.get('note_id')
  50. if note_id:
  51. data['stage8_mapping'][note_id] = result
  52. else:
  53. data['stage8_mapping'] = {}
  54. print(f" ✓ 已加载 {len(data['filtered_features'])} 个原始特征")
  55. print(f" ✓ 已加载 {len(data['candidate_words'])} 个候选词数据")
  56. print(f" ✓ 已加载解构数据: {len(data['stage7_mapping'])} 个帖子")
  57. print(f" ✓ 已加载相似度数据: {len(data['stage8_mapping'])} 个帖子")
  58. return data
  59. def extract_global_candidates(data: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]:
  60. """
  61. 提取全局候选词并按相似度分类
  62. Returns:
  63. {
  64. 'matched': [...], # 相似度 >= 0.8
  65. 'partial': [...], # 0.5 <= 相似度 < 0.8
  66. 'unmatched': [...] # 相似度 < 0.5
  67. }
  68. """
  69. print("\n提取全局候选词...")
  70. candidates_map = {} # 用于去重
  71. # 遍历所有特征的候选词
  72. for feature_data in data['candidate_words']:
  73. candidates_by_base = feature_data.get('高相似度候选_按base_word', {})
  74. for base_word, candidates in candidates_by_base.items():
  75. for cand in candidates:
  76. cand_name = cand.get('候选词', '')
  77. if not cand_name:
  78. continue
  79. # 计算相似度
  80. similarity = cand.get('相似度', 0)
  81. # 如果是帖子候选词,使用点最高人设相似度
  82. if cand.get('候选词类型') == 'post':
  83. similarity = cand.get('点最高人设相似度', similarity)
  84. # 去重:保留最高相似度
  85. if cand_name not in candidates_map or similarity > candidates_map[cand_name]['相似度']:
  86. candidates_map[cand_name] = {
  87. '名称': cand_name,
  88. '类型': cand.get('候选词类型', 'unknown'),
  89. '相似度': similarity,
  90. '特征类型': cand.get('特征类型', ''),
  91. '来源路径': cand.get('来源路径', ''),
  92. '匹配说明': cand.get('匹配说明', '')
  93. }
  94. # 按相似度分类
  95. result = {
  96. 'matched': [], # >= 0.8
  97. 'partial': [], # 0.5 ~ 0.8
  98. 'unmatched': [] # < 0.5
  99. }
  100. for cand in candidates_map.values():
  101. similarity = cand['相似度']
  102. if similarity >= 0.8:
  103. result['matched'].append(cand)
  104. elif similarity >= 0.5:
  105. result['partial'].append(cand)
  106. else:
  107. result['unmatched'].append(cand)
  108. # 排序:按相似度降序
  109. for category in result.values():
  110. category.sort(key=lambda x: x['相似度'], reverse=True)
  111. print(f" ✓ 已匹配: {len(result['matched'])} 个")
  112. print(f" ✓ 部分匹配: {len(result['partial'])} 个")
  113. print(f" ✓ 不匹配: {len(result['unmatched'])} 个")
  114. return result
  115. def render_left_candidates_html(global_candidates: Dict[str, List[Dict[str, Any]]]) -> str:
  116. """
  117. 渲染左侧固定候选词区域HTML
  118. Args:
  119. global_candidates: 分类后的全局候选词
  120. Returns:
  121. HTML字符串
  122. """
  123. html_parts = []
  124. html_parts.append('''
  125. <div class="left-candidates-panel">
  126. <div class="candidates-header">
  127. <div class="candidates-title">📚 可用候选词</div>
  128. <div class="candidates-hint">此区域固定展示 不随滚动</div>
  129. </div>
  130. <div class="candidates-content">
  131. ''')
  132. # 已匹配区域
  133. html_parts.append('''
  134. <div class="candidates-section matched-section">
  135. <div class="section-title">✅ 已匹配 <span class="section-count">({count})</span></div>
  136. <div class="section-hint">与人设相似度 ≥ 0.8</div>
  137. <div class="candidates-list">
  138. '''.format(count=len(global_candidates['matched'])))
  139. for cand in global_candidates['matched']:
  140. icon = '📝' if cand['类型'] == 'post' else '👤'
  141. type_label = '帖子' if cand['类型'] == 'post' else '人设'
  142. html_parts.append(f'''
  143. <div class="candidate-item matched">
  144. <div class="candidate-icon">{icon}</div>
  145. <div class="candidate-info">
  146. <div class="candidate-name">{cand['名称']}</div>
  147. <div class="candidate-meta">
  148. <span class="candidate-type">{type_label}</span>
  149. <span class="candidate-similarity">{cand['相似度']:.2f}</span>
  150. </div>
  151. </div>
  152. </div>
  153. ''')
  154. html_parts.append('''
  155. </div>
  156. </div>
  157. ''')
  158. # 部分匹配区域
  159. html_parts.append('''
  160. <div class="candidates-section partial-section">
  161. <div class="section-title">🟡 部分匹配 <span class="section-count">({count})</span></div>
  162. <div class="section-hint">与人设特征相似度 0.5-0.8</div>
  163. <div class="candidates-list">
  164. '''.format(count=len(global_candidates['partial'])))
  165. for cand in global_candidates['partial']:
  166. icon = '📝' if cand['类型'] == 'post' else '👤'
  167. type_label = '帖子' if cand['类型'] == 'post' else '人设'
  168. html_parts.append(f'''
  169. <div class="candidate-item partial">
  170. <div class="candidate-icon">{icon}</div>
  171. <div class="candidate-info">
  172. <div class="candidate-name">{cand['名称']}</div>
  173. <div class="candidate-meta">
  174. <span class="candidate-type">{type_label}</span>
  175. <span class="candidate-similarity">{cand['相似度']:.2f}</span>
  176. </div>
  177. </div>
  178. </div>
  179. ''')
  180. html_parts.append('''
  181. </div>
  182. </div>
  183. ''')
  184. # 不匹配区域
  185. html_parts.append('''
  186. <div class="candidates-section unmatched-section">
  187. <div class="section-title">❌ 不匹配 <span class="section-count">({count})</span></div>
  188. <div class="section-hint">与人设特征相似度 < 0.5</div>
  189. <div class="candidates-list">
  190. '''.format(count=len(global_candidates['unmatched'])))
  191. for cand in global_candidates['unmatched']:
  192. icon = '📝' if cand['类型'] == 'post' else '👤'
  193. type_label = '帖子' if cand['类型'] == 'post' else '人设'
  194. html_parts.append(f'''
  195. <div class="candidate-item unmatched">
  196. <div class="candidate-icon">{icon}</div>
  197. <div class="candidate-info">
  198. <div class="candidate-name">{cand['名称']}</div>
  199. <div class="candidate-meta">
  200. <span class="candidate-type">{type_label}</span>
  201. <span class="candidate-similarity">{cand['相似度']:.2f}</span>
  202. </div>
  203. </div>
  204. </div>
  205. ''')
  206. html_parts.append('''
  207. </div>
  208. </div>
  209. </div>
  210. </div>
  211. ''')
  212. return ''.join(html_parts)
  213. def render_cascade_flow_html(data: Dict[str, Any]) -> str:
  214. """
  215. 渲染中间级联流程HTML(三层结构)
  216. Returns:
  217. HTML字符串
  218. """
  219. html_parts = []
  220. html_parts.append('''
  221. <div class="cascade-flow-panel">
  222. <div class="cascade-header">
  223. <div class="cascade-title">🔄 级联搜索流程</div>
  224. </div>
  225. <div class="cascade-content" id="cascadeContent">
  226. ''')
  227. # 默认显示第一个特征的级联流程
  228. if data['evaluated_results']:
  229. first_feature = data['evaluated_results'][0]
  230. html_parts.append(render_single_cascade(first_feature, 0, data))
  231. html_parts.append('''
  232. </div>
  233. </div>
  234. ''')
  235. return ''.join(html_parts)
  236. def render_single_cascade(feature_data: Dict[str, Any], feature_idx: int, data: Dict[str, Any]) -> str:
  237. """
  238. 渲染单个特征的级联流程
  239. Args:
  240. feature_data: 特征数据
  241. feature_idx: 特征索引
  242. data: 全部数据
  243. Returns:
  244. HTML字符串
  245. """
  246. html_parts = []
  247. original_feature = feature_data.get('原始特征名称', '')
  248. top3_matches = feature_data.get('top3匹配信息', [])
  249. groups = feature_data.get('组合评估结果_分组', [])
  250. # 层级1: 原始特征
  251. html_parts.append(f'''
  252. <div class="cascade-layer layer-1">
  253. <div class="layer-title">📌 帖子选题点</div>
  254. <div class="feature-selector">
  255. <div class="selected-feature">
  256. <div class="feature-name">{original_feature}</div>
  257. <div class="feature-actions">
  258. <button class="switch-feature-btn" onclick="showFeatureSelector()">切换特征</button>
  259. </div>
  260. </div>
  261. </div>
  262. </div>
  263. ''')
  264. # 层级2: Top3人设特征
  265. html_parts.append('''
  266. <div class="cascade-arrow">↓</div>
  267. <div class="cascade-layer layer-2">
  268. <div class="layer-title">🎯 Top1各 相似度(x)</div>
  269. <div class="top3-container">
  270. ''')
  271. for idx, match in enumerate(top3_matches[:3], 1):
  272. base_word = match.get('人设特征名称', '')
  273. similarity = match.get('相似度', 0)
  274. is_top1 = (idx == 1)
  275. card_class = 'top3-card top1-card' if is_top1 else 'top3-card'
  276. html_parts.append(f'''
  277. <div class="{card_class}" data-feature-idx="{feature_idx}" data-match-idx="{idx-1}" onclick="selectBaseWord({feature_idx}, {idx-1})">
  278. <div class="top3-rank">Top{idx}</div>
  279. <div class="top3-name">{base_word}</div>
  280. <div class="top3-similarity">相似度: {similarity:.2f}</div>
  281. </div>
  282. ''')
  283. html_parts.append('''
  284. </div>
  285. </div>
  286. ''')
  287. # 层级3: 搜索词(默认展开Top1)
  288. if groups:
  289. html_parts.append('''
  290. <div class="cascade-arrow">↓</div>
  291. <div class="cascade-layer layer-3">
  292. <div class="layer-title">🔍 搜索词生成</div>
  293. <div class="search-words-container" id="searchWordsContainer">
  294. ''')
  295. # 默认显示第一个group(Top1)
  296. html_parts.append(render_search_words_group(groups[0], feature_idx, 0))
  297. html_parts.append('''
  298. </div>
  299. </div>
  300. ''')
  301. return ''.join(html_parts)
  302. def render_search_words_group(group: Dict[str, Any], feature_idx: int, group_idx: int) -> str:
  303. """
  304. 渲染搜索词组
  305. Args:
  306. group: 搜索词组数据
  307. feature_idx: 特征索引
  308. group_idx: 组索引
  309. Returns:
  310. HTML字符串
  311. """
  312. html_parts = []
  313. base_word = group.get('base_word', '')
  314. searches = group.get('top10_searches', [])
  315. available_words = group.get('available_words', [])
  316. html_parts.append(f'''
  317. <div class="search-words-group" data-base-word="{base_word}">
  318. <div class="base-word-label">中心词: <span class="base-word-value">{base_word}</span></div>
  319. ''')
  320. # 显示每个搜索词
  321. for sw_idx, search in enumerate(searches):
  322. html_parts.append(render_search_word_card(search, feature_idx, group_idx, sw_idx, available_words))
  323. html_parts.append('''
  324. </div>
  325. ''')
  326. return ''.join(html_parts)
  327. def render_search_word_card(search: Dict[str, Any], feature_idx: int, group_idx: int, sw_idx: int, available_words: List) -> str:
  328. """
  329. 渲染单个搜索词卡片
  330. Args:
  331. search: 搜索词数据
  332. feature_idx, group_idx, sw_idx: 索引
  333. available_words: 可用候选词列表
  334. Returns:
  335. HTML字符串
  336. """
  337. search_word = search.get('search_word', '')
  338. score = search.get('score', 0)
  339. reasoning = search.get('reasoning', '')
  340. has_result = search.get('search_result') is not None
  341. # 检查是否已执行搜索
  342. status_icon = '✅' if has_result else '⏸️'
  343. status_text = '已搜索' if has_result else '未搜索'
  344. status_class = 'searched' if has_result else 'not-searched'
  345. # 显示候选词(最多前10个)
  346. cand_names = [w.get('候选词', '') if isinstance(w, dict) else w for w in available_words[:10]]
  347. cand_display = ', '.join(cand_names) if cand_names else '无'
  348. html = f'''
  349. <div class="search-word-card {status_class}" data-feature-idx="{feature_idx}" data-group-idx="{group_idx}" data-sw-idx="{sw_idx}" onclick="selectSearchWord({feature_idx}, {group_idx}, {sw_idx})">
  350. <div class="sw-header">
  351. <div class="sw-status">{status_icon} {status_text}</div>
  352. <div class="sw-rank">#{sw_idx + 1}</div>
  353. </div>
  354. <div class="sw-candidates-pool">
  355. <div class="sw-label">可用候选词池:</div>
  356. <div class="sw-candidates">{cand_display}</div>
  357. </div>
  358. <div class="sw-arrow-container">
  359. <div class="sw-arrow">
  360. <span class="arrow-line">→</span>
  361. <span class="arrow-score">score: {score:.2f}</span>
  362. </div>
  363. </div>
  364. <div class="sw-result">
  365. <div class="sw-query">{search_word}</div>
  366. </div>
  367. <div class="sw-reasoning">
  368. <div class="reasoning-label">💡 LLM推理理由:</div>
  369. <div class="reasoning-content">{reasoning}</div>
  370. </div>
  371. </div>
  372. '''
  373. return html
  374. def generate_html(data: Dict[str, Any], global_candidates: Dict[str, List[Dict[str, Any]]]) -> str:
  375. """
  376. 生成完整HTML页面
  377. Args:
  378. data: 所有数据
  379. global_candidates: 全局候选词
  380. Returns:
  381. 完整HTML字符串
  382. """
  383. print("\n正在生成HTML...")
  384. # 准备数据JSON
  385. data_json = json.dumps(data['evaluated_results'], ensure_ascii=False)
  386. stage7_json = json.dumps(data['stage7_mapping'], ensure_ascii=False)
  387. stage8_json = json.dumps(data['stage8_mapping'], ensure_ascii=False)
  388. # 生成各部分HTML
  389. left_html = render_left_candidates_html(global_candidates)
  390. cascade_html = render_cascade_flow_html(data)
  391. # 生成完整HTML
  392. html_template = f'''<!DOCTYPE html>
  393. <html lang="zh-CN">
  394. <head>
  395. <meta charset="UTF-8">
  396. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  397. <title>级联搜索结果可视化</title>
  398. <style>
  399. {get_css_styles()}
  400. </style>
  401. </head>
  402. <body>
  403. <div class="page-header">
  404. <div class="header-title">🔍 级联搜索结果可视化系统</div>
  405. <div class="header-subtitle">候选词 → Top3人设特征 → 搜索词 → 搜索结果</div>
  406. </div>
  407. <div class="main-layout">
  408. <!-- 左侧:候选词库 -->
  409. {left_html}
  410. <!-- 中间:级联流程 -->
  411. {cascade_html}
  412. <!-- 右侧:搜索结果 -->
  413. <div class="right-results-panel">
  414. <div class="results-header">
  415. <div class="results-title">📝 搜索结果卡片</div>
  416. <div class="results-subtitle" id="resultsSubtitle">请选择一个搜索词查看结果</div>
  417. </div>
  418. <div class="results-content" id="resultsContent">
  419. <div class="empty-results">
  420. <div class="empty-icon">🔍</div>
  421. <div class="empty-text">选择搜索词后,这里将显示对应的搜索结果</div>
  422. </div>
  423. </div>
  424. </div>
  425. </div>
  426. <!-- 特征选择器Modal -->
  427. <div class="modal-overlay" id="featureSelectorModal">
  428. <div class="modal-window">
  429. <div class="modal-header">
  430. <div class="modal-title">选择原始特征</div>
  431. <button class="modal-close-btn" onclick="closeFeatureSelector()">×</button>
  432. </div>
  433. <div class="modal-body">
  434. <div class="feature-list" id="featureList"></div>
  435. </div>
  436. </div>
  437. </div>
  438. <script>
  439. // 数据
  440. const allData = {data_json};
  441. const stage7Data = {stage7_json};
  442. const stage8Data = {stage8_json};
  443. let currentFeatureIdx = 0;
  444. let currentGroupIdx = 0;
  445. let currentSwIdx = 0;
  446. {get_javascript_code()}
  447. </script>
  448. </body>
  449. </html>
  450. '''
  451. print(" ✓ HTML生成完成")
  452. return html_template
  453. def get_css_styles() -> str:
  454. """获取CSS样式"""
  455. return '''
  456. * {
  457. margin: 0;
  458. padding: 0;
  459. box-sizing: border-box;
  460. }
  461. body {
  462. font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
  463. background: #f5f7fa;
  464. color: #333;
  465. overflow-x: hidden;
  466. }
  467. /* 页面头部 */
  468. .page-header {
  469. background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
  470. color: white;
  471. padding: 20px;
  472. text-align: center;
  473. box-shadow: 0 2px 10px rgba(0,0,0,0.1);
  474. }
  475. .header-title {
  476. font-size: 24px;
  477. font-weight: bold;
  478. margin-bottom: 5px;
  479. }
  480. .header-subtitle {
  481. font-size: 14px;
  482. opacity: 0.9;
  483. }
  484. /* 主布局 - 三栏 */
  485. .main-layout {
  486. display: flex;
  487. gap: 20px;
  488. padding: 20px;
  489. height: calc(100vh - 100px);
  490. }
  491. /* 左侧候选词面板 - 固定 */
  492. .left-candidates-panel {
  493. width: 280px;
  494. background: white;
  495. border-radius: 8px;
  496. box-shadow: 0 2px 8px rgba(0,0,0,0.1);
  497. position: sticky;
  498. top: 20px;
  499. height: fit-content;
  500. max-height: calc(100vh - 140px);
  501. display: flex;
  502. flex-direction: column;
  503. }
  504. .candidates-header {
  505. padding: 15px;
  506. border-bottom: 2px solid #e5e7eb;
  507. }
  508. .candidates-title {
  509. font-size: 16px;
  510. font-weight: 600;
  511. color: #374151;
  512. margin-bottom: 5px;
  513. }
  514. .candidates-hint {
  515. font-size: 11px;
  516. color: #ef4444;
  517. font-weight: 500;
  518. }
  519. .candidates-content {
  520. flex: 1;
  521. overflow-y: auto;
  522. padding: 10px;
  523. }
  524. .candidates-section {
  525. margin-bottom: 15px;
  526. }
  527. .section-title {
  528. font-size: 13px;
  529. font-weight: 600;
  530. margin-bottom: 5px;
  531. display: flex;
  532. align-items: center;
  533. gap: 5px;
  534. }
  535. .section-count {
  536. color: #6b7280;
  537. font-size: 12px;
  538. }
  539. .section-hint {
  540. font-size: 11px;
  541. color: #6b7280;
  542. margin-bottom: 8px;
  543. }
  544. .candidates-list {
  545. display: flex;
  546. flex-direction: column;
  547. gap: 6px;
  548. }
  549. .candidate-item {
  550. display: flex;
  551. align-items: center;
  552. gap: 8px;
  553. padding: 8px;
  554. border-radius: 6px;
  555. border: 1px solid #e5e7eb;
  556. transition: all 0.2s;
  557. cursor: pointer;
  558. }
  559. .candidate-item:hover {
  560. box-shadow: 0 2px 4px rgba(0,0,0,0.1);
  561. transform: translateY(-1px);
  562. }
  563. .candidate-item.matched {
  564. background: #f0fdf4;
  565. border-color: #86efac;
  566. }
  567. .candidate-item.partial {
  568. background: #fffbeb;
  569. border-color: #fcd34d;
  570. }
  571. .candidate-item.unmatched {
  572. background: #fef2f2;
  573. border-color: #fca5a5;
  574. }
  575. .candidate-icon {
  576. font-size: 18px;
  577. flex-shrink: 0;
  578. }
  579. .candidate-info {
  580. flex: 1;
  581. min-width: 0;
  582. }
  583. .candidate-name {
  584. font-size: 12px;
  585. font-weight: 500;
  586. color: #374151;
  587. white-space: nowrap;
  588. overflow: hidden;
  589. text-overflow: ellipsis;
  590. }
  591. .candidate-meta {
  592. display: flex;
  593. justify-content: space-between;
  594. align-items: center;
  595. margin-top: 2px;
  596. }
  597. .candidate-type {
  598. font-size: 10px;
  599. color: #6b7280;
  600. }
  601. .candidate-similarity {
  602. font-size: 10px;
  603. font-weight: 600;
  604. color: #10b981;
  605. }
  606. /* 中间级联流程面板 */
  607. .cascade-flow-panel {
  608. flex: 1;
  609. background: white;
  610. border-radius: 8px;
  611. box-shadow: 0 2px 8px rgba(0,0,0,0.1);
  612. overflow-y: auto;
  613. padding: 20px;
  614. }
  615. .cascade-header {
  616. margin-bottom: 20px;
  617. padding-bottom: 10px;
  618. border-bottom: 2px solid #e5e7eb;
  619. }
  620. .cascade-title {
  621. font-size: 18px;
  622. font-weight: 600;
  623. color: #374151;
  624. }
  625. .cascade-content {
  626. display: flex;
  627. flex-direction: column;
  628. gap: 15px;
  629. }
  630. .cascade-layer {
  631. background: #f9fafb;
  632. border-radius: 8px;
  633. padding: 15px;
  634. }
  635. .layer-title {
  636. font-size: 14px;
  637. font-weight: 600;
  638. color: #6b7280;
  639. margin-bottom: 10px;
  640. }
  641. /* 层级1: 特征选择器 */
  642. .selected-feature {
  643. display: flex;
  644. justify-content: space-between;
  645. align-items: center;
  646. padding: 12px;
  647. background: white;
  648. border-radius: 6px;
  649. border: 2px solid #667eea;
  650. }
  651. .feature-name {
  652. font-size: 15px;
  653. font-weight: 600;
  654. color: #374151;
  655. }
  656. .switch-feature-btn {
  657. padding: 6px 12px;
  658. background: #667eea;
  659. color: white;
  660. border: none;
  661. border-radius: 4px;
  662. cursor: pointer;
  663. font-size: 12px;
  664. transition: all 0.2s;
  665. }
  666. .switch-feature-btn:hover {
  667. background: #5568d3;
  668. }
  669. /* 层级2: Top3卡片 */
  670. .top3-container {
  671. display: flex;
  672. gap: 10px;
  673. }
  674. .top3-card {
  675. flex: 1;
  676. padding: 12px;
  677. background: white;
  678. border-radius: 6px;
  679. border: 2px solid #e5e7eb;
  680. cursor: pointer;
  681. transition: all 0.2s;
  682. }
  683. .top3-card:hover {
  684. border-color: #667eea;
  685. box-shadow: 0 2px 6px rgba(102, 126, 234, 0.2);
  686. }
  687. .top3-card.top1-card {
  688. border-color: #10b981;
  689. background: #f0fdf4;
  690. }
  691. .top3-card.top1-card:hover {
  692. border-color: #059669;
  693. }
  694. .top3-card.selected {
  695. border-color: #667eea;
  696. box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.2);
  697. }
  698. .top3-rank {
  699. font-size: 11px;
  700. font-weight: 600;
  701. color: #6b7280;
  702. margin-bottom: 4px;
  703. }
  704. .top3-name {
  705. font-size: 14px;
  706. font-weight: 600;
  707. color: #374151;
  708. margin-bottom: 4px;
  709. }
  710. .top3-similarity {
  711. font-size: 12px;
  712. color: #10b981;
  713. }
  714. /* 级联箭头 */
  715. .cascade-arrow {
  716. text-align: center;
  717. font-size: 24px;
  718. color: #667eea;
  719. margin: 5px 0;
  720. }
  721. /* 层级3: 搜索词 */
  722. .base-word-label {
  723. font-size: 13px;
  724. color: #6b7280;
  725. margin-bottom: 12px;
  726. }
  727. .base-word-value {
  728. font-weight: 600;
  729. color: #10b981;
  730. }
  731. .search-word-card {
  732. background: white;
  733. border-radius: 8px;
  734. border: 2px solid #e5e7eb;
  735. padding: 15px;
  736. margin-bottom: 12px;
  737. cursor: pointer;
  738. transition: all 0.2s;
  739. }
  740. .search-word-card:hover {
  741. border-color: #667eea;
  742. box-shadow: 0 2px 6px rgba(0,0,0,0.1);
  743. }
  744. .search-word-card.searched {
  745. border-color: #10b981;
  746. }
  747. .search-word-card.selected {
  748. border-color: #667eea;
  749. box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.2);
  750. }
  751. .sw-header {
  752. display: flex;
  753. justify-content: space-between;
  754. align-items: center;
  755. margin-bottom: 10px;
  756. }
  757. .sw-status {
  758. font-size: 12px;
  759. font-weight: 600;
  760. color: #10b981;
  761. }
  762. .sw-rank {
  763. font-size: 11px;
  764. color: #6b7280;
  765. }
  766. .sw-candidates-pool {
  767. margin-bottom: 10px;
  768. }
  769. .sw-label {
  770. font-size: 11px;
  771. color: #6b7280;
  772. margin-bottom: 4px;
  773. }
  774. .sw-candidates {
  775. font-size: 12px;
  776. color: #374151;
  777. background: #f9fafb;
  778. padding: 6px;
  779. border-radius: 4px;
  780. }
  781. .sw-arrow-container {
  782. text-align: center;
  783. margin: 10px 0;
  784. }
  785. .sw-arrow {
  786. display: inline-flex;
  787. align-items: center;
  788. gap: 8px;
  789. }
  790. .arrow-line {
  791. font-size: 20px;
  792. color: #667eea;
  793. }
  794. .arrow-score {
  795. font-size: 12px;
  796. font-weight: 600;
  797. color: #667eea;
  798. background: #ede9fe;
  799. padding: 2px 8px;
  800. border-radius: 4px;
  801. }
  802. .sw-result {
  803. text-align: center;
  804. margin-bottom: 10px;
  805. }
  806. .sw-query {
  807. font-size: 16px;
  808. font-weight: 600;
  809. color: #374151;
  810. background: #f0fdf4;
  811. padding: 8px;
  812. border-radius: 6px;
  813. border: 1px solid #86efac;
  814. }
  815. .sw-reasoning {
  816. background: #fffbeb;
  817. padding: 10px;
  818. border-radius: 6px;
  819. border: 1px solid #fcd34d;
  820. }
  821. .reasoning-label {
  822. font-size: 12px;
  823. font-weight: 600;
  824. color: #374151;
  825. margin-bottom: 4px;
  826. }
  827. .reasoning-content {
  828. font-size: 12px;
  829. color: #6b7280;
  830. line-height: 1.5;
  831. }
  832. /* 右侧结果面板 */
  833. .right-results-panel {
  834. width: 500px;
  835. background: white;
  836. border-radius: 8px;
  837. box-shadow: 0 2px 8px rgba(0,0,0,0.1);
  838. overflow-y: auto;
  839. display: flex;
  840. flex-direction: column;
  841. }
  842. .results-header {
  843. padding: 15px;
  844. border-bottom: 2px solid #e5e7eb;
  845. }
  846. .results-title {
  847. font-size: 16px;
  848. font-weight: 600;
  849. color: #374151;
  850. margin-bottom: 5px;
  851. }
  852. .results-subtitle {
  853. font-size: 12px;
  854. color: #6b7280;
  855. }
  856. .results-content {
  857. flex: 1;
  858. padding: 15px;
  859. }
  860. .empty-results {
  861. text-align: center;
  862. padding: 60px 20px;
  863. }
  864. .empty-icon {
  865. font-size: 48px;
  866. margin-bottom: 15px;
  867. }
  868. .empty-text {
  869. font-size: 14px;
  870. color: #6b7280;
  871. }
  872. /* Modal */
  873. .modal-overlay {
  874. display: none;
  875. position: fixed;
  876. top: 0;
  877. left: 0;
  878. right: 0;
  879. bottom: 0;
  880. background: rgba(0,0,0,0.5);
  881. z-index: 1000;
  882. align-items: center;
  883. justify-content: center;
  884. }
  885. .modal-overlay.active {
  886. display: flex;
  887. }
  888. .modal-window {
  889. background: white;
  890. border-radius: 12px;
  891. box-shadow: 0 10px 40px rgba(0,0,0,0.2);
  892. max-width: 600px;
  893. width: 90%;
  894. max-height: 80vh;
  895. display: flex;
  896. flex-direction: column;
  897. }
  898. .modal-header {
  899. padding: 20px;
  900. border-bottom: 1px solid #e5e7eb;
  901. display: flex;
  902. justify-content: space-between;
  903. align-items: center;
  904. }
  905. .modal-title {
  906. font-size: 18px;
  907. font-weight: 600;
  908. color: #374151;
  909. }
  910. .modal-close-btn {
  911. background: none;
  912. border: none;
  913. font-size: 28px;
  914. color: #6b7280;
  915. cursor: pointer;
  916. padding: 0;
  917. width: 32px;
  918. height: 32px;
  919. display: flex;
  920. align-items: center;
  921. justify-content: center;
  922. border-radius: 4px;
  923. }
  924. .modal-close-btn:hover {
  925. background: #f3f4f6;
  926. }
  927. .modal-body {
  928. flex: 1;
  929. overflow-y: auto;
  930. padding: 20px;
  931. }
  932. .feature-list {
  933. display: flex;
  934. flex-direction: column;
  935. gap: 10px;
  936. }
  937. .feature-list-item {
  938. padding: 12px;
  939. background: #f9fafb;
  940. border-radius: 6px;
  941. border: 2px solid #e5e7eb;
  942. cursor: pointer;
  943. transition: all 0.2s;
  944. }
  945. .feature-list-item:hover {
  946. border-color: #667eea;
  947. background: white;
  948. }
  949. .feature-list-item.active {
  950. border-color: #10b981;
  951. background: #f0fdf4;
  952. }
  953. '''
  954. def get_javascript_code() -> str:
  955. """获取JavaScript代码"""
  956. return '''
  957. // 初始化
  958. document.addEventListener('DOMContentLoaded', function() {
  959. console.log('页面加载完成');
  960. renderFeatureList();
  961. });
  962. // 显示特征选择器
  963. function showFeatureSelector() {
  964. const modal = document.getElementById('featureSelectorModal');
  965. modal.classList.add('active');
  966. }
  967. // 关闭特征选择器
  968. function closeFeatureSelector() {
  969. const modal = document.getElementById('featureSelectorModal');
  970. modal.classList.remove('active');
  971. }
  972. // 渲染特征列表
  973. function renderFeatureList() {
  974. const listEl = document.getElementById('featureList');
  975. let html = '';
  976. allData.forEach((feature, idx) => {
  977. const name = feature['原始特征名称'];
  978. const isActive = idx === currentFeatureIdx;
  979. const activeClass = isActive ? 'active' : '';
  980. html += `
  981. <div class="feature-list-item ${activeClass}" onclick="selectFeature(${idx})">
  982. ${name}
  983. </div>
  984. `;
  985. });
  986. listEl.innerHTML = html;
  987. }
  988. // 选择特征
  989. function selectFeature(featureIdx) {
  990. currentFeatureIdx = featureIdx;
  991. currentGroupIdx = 0;
  992. currentSwIdx = 0;
  993. closeFeatureSelector();
  994. updateCascadeView();
  995. renderFeatureList();
  996. }
  997. // 更新级联视图
  998. function updateCascadeView() {
  999. const feature = allData[currentFeatureIdx];
  1000. const cascadeContent = document.getElementById('cascadeContent');
  1001. // 重新渲染级联流程(这里简化处理,实际应该用JavaScript动态更新)
  1002. location.reload(); // 简化版:重新加载页面
  1003. }
  1004. // 选择base_word
  1005. function selectBaseWord(featureIdx, matchIdx) {
  1006. currentFeatureIdx = featureIdx;
  1007. currentGroupIdx = matchIdx;
  1008. currentSwIdx = 0;
  1009. // 移除所有选中状态
  1010. document.querySelectorAll('.top3-card').forEach(card => {
  1011. card.classList.remove('selected');
  1012. });
  1013. // 添加选中状态
  1014. event.target.closest('.top3-card').classList.add('selected');
  1015. // 更新搜索词显示
  1016. const feature = allData[currentFeatureIdx];
  1017. const groups = feature['组合评估结果_分组'] || [];
  1018. if (groups[currentGroupIdx]) {
  1019. // TODO: 更新搜索词列表显示
  1020. console.log('切换到group:', currentGroupIdx);
  1021. }
  1022. }
  1023. // 选择搜索词
  1024. function selectSearchWord(featureIdx, groupIdx, swIdx) {
  1025. currentFeatureIdx = featureIdx;
  1026. currentGroupIdx = groupIdx;
  1027. currentSwIdx = swIdx;
  1028. // 移除所有搜索词的选中状态
  1029. document.querySelectorAll('.search-word-card').forEach(card => {
  1030. card.classList.remove('selected');
  1031. });
  1032. // 添加选中状态
  1033. event.target.closest('.search-word-card').classList.add('selected');
  1034. // 显示搜索结果
  1035. renderSearchResults(featureIdx, groupIdx, swIdx);
  1036. }
  1037. // 渲染搜索结果
  1038. function renderSearchResults(featureIdx, groupIdx, swIdx) {
  1039. const feature = allData[featureIdx];
  1040. const groups = feature['组合评估结果_分组'] || [];
  1041. const group = groups[groupIdx];
  1042. if (!group) return;
  1043. const searches = group['top10_searches'] || [];
  1044. const search = searches[swIdx];
  1045. if (!search) return;
  1046. const searchWord = search['search_word'] || '';
  1047. const searchResult = search['search_result'];
  1048. const resultsContent = document.getElementById('resultsContent');
  1049. const resultsSubtitle = document.getElementById('resultsSubtitle');
  1050. resultsSubtitle.textContent = `搜索词: ${searchWord}`;
  1051. if (!searchResult) {
  1052. resultsContent.innerHTML = `
  1053. <div class="empty-results">
  1054. <div class="empty-icon">❌</div>
  1055. <div class="empty-text">该搜索词未执行搜索</div>
  1056. </div>
  1057. `;
  1058. return;
  1059. }
  1060. const notes = searchResult.data?.data || [];
  1061. if (notes.length === 0) {
  1062. resultsContent.innerHTML = `
  1063. <div class="empty-results">
  1064. <div class="empty-icon">📭</div>
  1065. <div class="empty-text">未找到匹配的帖子</div>
  1066. </div>
  1067. `;
  1068. return;
  1069. }
  1070. // 渲染帖子卡片(简化版)
  1071. let html = '<div class="notes-grid">';
  1072. notes.forEach((note, idx) => {
  1073. const card = note.note_card || {};
  1074. const title = card.display_title || '无标题';
  1075. const image = (card.image_list || [])[0] || '';
  1076. html += `
  1077. <div class="note-card-simple">
  1078. ${image ? `<img src="${image}" alt="${title}" loading="lazy">` : ''}
  1079. <div class="note-title-simple">${title}</div>
  1080. </div>
  1081. `;
  1082. });
  1083. html += '</div>';
  1084. resultsContent.innerHTML = html;
  1085. }
  1086. '''
  1087. def main():
  1088. """主函数"""
  1089. print("=" * 60)
  1090. print("级联搜索结果可视化工具")
  1091. print("=" * 60)
  1092. # 加载数据
  1093. data = load_all_data()
  1094. # 提取全局候选词
  1095. global_candidates = extract_global_candidates(data)
  1096. # 生成HTML
  1097. html_content = generate_html(data, global_candidates)
  1098. # 保存HTML文件
  1099. output_path = "visualization/cascade_search_results.html"
  1100. os.makedirs(os.path.dirname(output_path), exist_ok=True)
  1101. with open(output_path, 'w', encoding='utf-8') as f:
  1102. f.write(html_content)
  1103. print(f"\n✓ HTML文件已保存: {output_path}")
  1104. # 打开HTML文件
  1105. abs_path = os.path.abspath(output_path)
  1106. print(f"正在打开浏览器...")
  1107. webbrowser.open(f'file://{abs_path}')
  1108. print("\n" + "=" * 60)
  1109. print("✅ 可视化生成完成!")
  1110. print("=" * 60)
  1111. if __name__ == '__main__':
  1112. main()