convert_v8_to_graph_v2.js 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887
  1. /**
  2. * 将 v6.1.2.8 的 run_context.json 转换成按 Round > 步骤 > 数据 组织的图结构
  3. */
  4. function convertV8ToGraphV2(runContext, searchResults) {
  5. const nodes = {};
  6. const edges = [];
  7. const iterations = {};
  8. const o = runContext.o || '原始问题';
  9. const rounds = runContext.rounds || [];
  10. // 添加原始问题根节点
  11. const rootId = 'root_o';
  12. nodes[rootId] = {
  13. type: 'root',
  14. query: o,
  15. level: 0,
  16. relevance_score: 1.0,
  17. strategy: '原始问题',
  18. iteration: 0,
  19. is_selected: true
  20. };
  21. iterations[0] = [rootId];
  22. // 处理每一轮
  23. rounds.forEach((round, roundIndex) => {
  24. if (round.type === 'initialization') {
  25. // Round 0: 初始化阶段
  26. const roundNum = 0;
  27. const roundId = `round_${roundNum}`;
  28. // 创建 Round 节点
  29. nodes[roundId] = {
  30. type: 'round',
  31. query: `Round ${roundNum} (初始化)`,
  32. level: roundNum,
  33. relevance_score: 0,
  34. strategy: '初始化',
  35. iteration: roundNum,
  36. is_selected: true
  37. };
  38. edges.push({
  39. from: rootId,
  40. to: roundId,
  41. edge_type: 'root_to_round',
  42. strategy: '初始化'
  43. });
  44. if (!iterations[roundNum]) iterations[roundNum] = [];
  45. iterations[roundNum].push(roundId);
  46. // 创建分词步骤节点
  47. const segStepId = `step_seg_r${roundNum}`;
  48. nodes[segStepId] = {
  49. type: 'step',
  50. query: `步骤:分词 (${round.seg_list?.length || 0}个分词)`,
  51. level: roundNum,
  52. relevance_score: 0,
  53. strategy: '分词',
  54. iteration: roundNum,
  55. is_selected: true
  56. };
  57. edges.push({
  58. from: roundId,
  59. to: segStepId,
  60. edge_type: 'round_to_step',
  61. strategy: '分词'
  62. });
  63. iterations[roundNum].push(segStepId);
  64. // 添加分词结果作为步骤的子节点
  65. round.seg_list?.forEach((seg, segIndex) => {
  66. const segId = `seg_${seg.text}_${roundNum}_${segIndex}`;
  67. nodes[segId] = {
  68. type: 'seg',
  69. query: seg.text,
  70. level: roundNum + 1,
  71. relevance_score: seg.score || 0,
  72. evaluationReason: seg.reason || '',
  73. strategy: '分词结果',
  74. iteration: roundNum,
  75. is_selected: true
  76. };
  77. edges.push({
  78. from: segStepId,
  79. to: segId,
  80. edge_type: 'step_to_data',
  81. strategy: '分词结果'
  82. });
  83. if (!iterations[roundNum + 1]) iterations[roundNum + 1] = [];
  84. iterations[roundNum + 1].push(segId);
  85. });
  86. } else {
  87. // 普通轮次
  88. const roundNum = round.round_num;
  89. const roundId = `round_${roundNum}`;
  90. // 创建 Round 节点
  91. nodes[roundId] = {
  92. type: 'round',
  93. query: `Round ${roundNum}`,
  94. level: roundNum * 10, // 使用10的倍数作为层级
  95. relevance_score: 0,
  96. strategy: `第${roundNum}轮`,
  97. iteration: roundNum,
  98. is_selected: true
  99. };
  100. edges.push({
  101. from: rootId,
  102. to: roundId,
  103. edge_type: 'root_to_round',
  104. strategy: `第${roundNum}轮`
  105. });
  106. if (!iterations[roundNum * 10]) iterations[roundNum * 10] = [];
  107. iterations[roundNum * 10].push(roundId);
  108. // 步骤1: 请求&评估推荐词
  109. if (round.sug_details && Object.keys(round.sug_details).length > 0) {
  110. const sugStepId = `step_sug_r${roundNum}`;
  111. const totalSugs = Object.values(round.sug_details).reduce((sum, list) => sum + list.length, 0);
  112. nodes[sugStepId] = {
  113. type: 'step',
  114. query: `步骤1: 请求&评估推荐词 (${totalSugs}个)`,
  115. level: roundNum * 10 + 1,
  116. relevance_score: 0,
  117. strategy: '请求&评估推荐词',
  118. iteration: roundNum,
  119. is_selected: true
  120. };
  121. edges.push({
  122. from: roundId,
  123. to: sugStepId,
  124. edge_type: 'round_to_step',
  125. strategy: '推荐词'
  126. });
  127. iterations[roundNum * 10].push(sugStepId);
  128. // 为每个 Q 创建节点
  129. Object.keys(round.sug_details).forEach((qText, qIndex) => {
  130. // 从q_list_1中查找对应的q获取分数和理由
  131. // Round 0: 从q_list_1查找; Round 1+: 从input_q_list查找
  132. let qData = {};
  133. if (roundNum === 0) {
  134. qData = round.q_list_1?.find(q => q.text === qText) || {};
  135. } else {
  136. // 从当前轮的input_q_list中查找
  137. qData = round.input_q_list?.find(q => q.text === qText) || {};
  138. }
  139. const qId = `q_${qText}_r${roundNum}_${qIndex}`;
  140. nodes[qId] = {
  141. type: 'q',
  142. query: qText,
  143. level: roundNum * 10 + 2,
  144. relevance_score: qData.score || 0,
  145. evaluationReason: qData.reason || '',
  146. strategy: 'Query',
  147. iteration: roundNum,
  148. is_selected: true
  149. };
  150. edges.push({
  151. from: sugStepId,
  152. to: qId,
  153. edge_type: 'step_to_q',
  154. strategy: 'Query'
  155. });
  156. if (!iterations[roundNum * 10 + 2]) iterations[roundNum * 10 + 2] = [];
  157. iterations[roundNum * 10 + 2].push(qId);
  158. // 为每个 Q 的 sug 创建节点
  159. const sugs = round.sug_details[qText] || [];
  160. sugs.forEach((sug, sugIndex) => {
  161. const sugId = `sug_${sug.text}_r${roundNum}_q${qIndex}_${sugIndex}`;
  162. nodes[sugId] = {
  163. type: 'sug',
  164. query: sug.text,
  165. level: roundNum * 10 + 3,
  166. relevance_score: sug.score || 0,
  167. evaluationReason: sug.reason || '',
  168. strategy: '推荐词',
  169. iteration: roundNum,
  170. is_selected: true
  171. };
  172. edges.push({
  173. from: qId,
  174. to: sugId,
  175. edge_type: 'q_to_sug',
  176. strategy: '推荐词'
  177. });
  178. if (!iterations[roundNum * 10 + 3]) iterations[roundNum * 10 + 3] = [];
  179. iterations[roundNum * 10 + 3].push(sugId);
  180. });
  181. });
  182. }
  183. // 步骤2: 筛选并执行搜索
  184. const searchStepId = `step_search_r${roundNum}`;
  185. const searchCountText = round.search_count > 0
  186. ? `筛选${round.high_score_sug_count}个高分词,搜索${round.search_count}次,${round.total_posts}个帖子`
  187. : `无高分推荐词,未执行搜索`;
  188. nodes[searchStepId] = {
  189. type: 'step',
  190. query: `步骤2: 筛选并执行搜索 (${searchCountText})`,
  191. level: roundNum * 10 + 1,
  192. relevance_score: 0,
  193. strategy: '筛选并执行搜索',
  194. iteration: roundNum,
  195. is_selected: true
  196. };
  197. edges.push({
  198. from: roundId,
  199. to: searchStepId,
  200. edge_type: 'round_to_step',
  201. strategy: '搜索'
  202. });
  203. iterations[roundNum * 10].push(searchStepId);
  204. // 只有在有搜索结果时才添加搜索词和帖子
  205. // 优先使用 round.search_results(新格式),否则使用外部传入的 searchResults(兼容旧版本)
  206. const roundSearchResults = round.search_results || searchResults;
  207. if (round.search_count > 0 && roundSearchResults) {
  208. if (Array.isArray(roundSearchResults)) {
  209. roundSearchResults.forEach((search, searchIndex) => {
  210. const searchWordId = `search_${search.text}_r${roundNum}_${searchIndex}`;
  211. nodes[searchWordId] = {
  212. type: 'search_word',
  213. query: search.text,
  214. level: roundNum * 10 + 2,
  215. relevance_score: search.score_with_o || 0,
  216. strategy: '搜索词',
  217. iteration: roundNum,
  218. is_selected: true
  219. };
  220. edges.push({
  221. from: searchStepId,
  222. to: searchWordId,
  223. edge_type: 'step_to_search_word',
  224. strategy: '搜索词'
  225. });
  226. if (!iterations[roundNum * 10 + 2]) iterations[roundNum * 10 + 2] = [];
  227. iterations[roundNum * 10 + 2].push(searchWordId);
  228. // 添加帖子
  229. if (search.post_list && search.post_list.length > 0) {
  230. search.post_list.forEach((post, postIndex) => {
  231. const postId = `post_${post.note_id}_${searchIndex}_${postIndex}`;
  232. // 准备图片列表,将URL字符串转换为对象格式供轮播图使用
  233. const imageList = (post.images || []).map(url => ({
  234. image_url: url
  235. }));
  236. nodes[postId] = {
  237. type: 'post',
  238. query: post.title,
  239. level: roundNum * 10 + 3,
  240. relevance_score: 0,
  241. strategy: '帖子',
  242. iteration: roundNum,
  243. is_selected: true,
  244. note_id: post.note_id,
  245. note_url: post.note_url,
  246. body_text: post.body_text || '',
  247. images: post.images || [],
  248. image_list: imageList,
  249. interact_info: post.interact_info || {}
  250. };
  251. edges.push({
  252. from: searchWordId,
  253. to: postId,
  254. edge_type: 'search_word_to_post',
  255. strategy: '搜索结果'
  256. });
  257. if (!iterations[roundNum * 10 + 3]) iterations[roundNum * 10 + 3] = [];
  258. iterations[roundNum * 10 + 3].push(postId);
  259. });
  260. }
  261. });
  262. }
  263. }
  264. // 步骤3: 加词生成新查询
  265. if (round.add_word_details && Object.keys(round.add_word_details).length > 0) {
  266. const addWordStepId = `step_add_r${roundNum}`;
  267. const totalAddWords = Object.values(round.add_word_details).reduce((sum, list) => sum + list.length, 0);
  268. nodes[addWordStepId] = {
  269. type: 'step',
  270. query: `步骤3: 加词生成新查询 (${totalAddWords}个)`,
  271. level: roundNum * 10 + 1,
  272. relevance_score: 0,
  273. strategy: '加词生成新查询',
  274. iteration: roundNum,
  275. is_selected: true
  276. };
  277. edges.push({
  278. from: roundId,
  279. to: addWordStepId,
  280. edge_type: 'round_to_step',
  281. strategy: '加词'
  282. });
  283. iterations[roundNum * 10].push(addWordStepId);
  284. // 为每个 Seed 创建节点
  285. Object.keys(round.add_word_details).forEach((seedText, seedIndex) => {
  286. const seedId = `seed_${seedText}_r${roundNum}_${seedIndex}`;
  287. // 查找seed的来源信息 - 从Round 0的seed_list查找基础种子的from_type
  288. const round0 = rounds.find(r => r.round_num === 0 || r.type === 'initialization');
  289. const seedInfo = round0?.seed_list?.find(s => s.text === seedText) || {};
  290. const fromType = seedInfo.from_type || 'unknown';
  291. // 根据来源设置strategy
  292. let strategy;
  293. if (fromType === 'seg') {
  294. strategy = '初始分词';
  295. } else if (fromType === 'add') {
  296. strategy = '加词';
  297. } else if (fromType === 'sug') {
  298. strategy = '调用sug';
  299. } else {
  300. strategy = 'Seed'; // 默认灰色
  301. }
  302. nodes[seedId] = {
  303. type: 'seed',
  304. query: seedText,
  305. level: roundNum * 10 + 2,
  306. relevance_score: 0,
  307. strategy: strategy,
  308. iteration: roundNum,
  309. is_selected: true
  310. };
  311. edges.push({
  312. from: addWordStepId,
  313. to: seedId,
  314. edge_type: 'step_to_seed',
  315. strategy: 'Seed'
  316. });
  317. if (!iterations[roundNum * 10 + 2]) iterations[roundNum * 10 + 2] = [];
  318. iterations[roundNum * 10 + 2].push(seedId);
  319. // 为每个 Seed 的组合词创建节点
  320. const combinedWords = round.add_word_details[seedText] || [];
  321. combinedWords.forEach((word, wordIndex) => {
  322. const wordId = `add_${word.text}_r${roundNum}_seed${seedIndex}_${wordIndex}`;
  323. nodes[wordId] = {
  324. type: 'add_word',
  325. query: word.text,
  326. level: roundNum * 10 + 3,
  327. relevance_score: word.score || 0,
  328. evaluationReason: word.reason || '',
  329. strategy: '加词生成',
  330. iteration: roundNum,
  331. is_selected: true,
  332. selected_word: word.selected_word
  333. };
  334. edges.push({
  335. from: seedId,
  336. to: wordId,
  337. edge_type: 'seed_to_add_word',
  338. strategy: '组合词'
  339. });
  340. if (!iterations[roundNum * 10 + 3]) iterations[roundNum * 10 + 3] = [];
  341. iterations[roundNum * 10 + 3].push(wordId);
  342. });
  343. });
  344. }
  345. // 步骤4: 筛选推荐词进入下轮
  346. const filteredSugs = round.output_q_list?.filter(q => q.from === 'sug') || [];
  347. if (filteredSugs.length > 0) {
  348. const filterStepId = `step_filter_r${roundNum}`;
  349. nodes[filterStepId] = {
  350. type: 'step',
  351. query: `步骤4: 筛选推荐词进入下轮 (${filteredSugs.length}个)`,
  352. level: roundNum * 10 + 1,
  353. relevance_score: 0,
  354. strategy: '筛选推荐词进入下轮',
  355. iteration: roundNum,
  356. is_selected: true
  357. };
  358. edges.push({
  359. from: roundId,
  360. to: filterStepId,
  361. edge_type: 'round_to_step',
  362. strategy: '筛选'
  363. });
  364. iterations[roundNum * 10].push(filterStepId);
  365. // 添加筛选出的sug
  366. filteredSugs.forEach((sug, sugIndex) => {
  367. const sugId = `filtered_sug_${sug.text}_r${roundNum}_${sugIndex}`;
  368. nodes[sugId] = {
  369. type: 'filtered_sug',
  370. query: sug.text,
  371. level: roundNum * 10 + 2,
  372. relevance_score: sug.score || 0,
  373. strategy: '进入下轮',
  374. iteration: roundNum,
  375. is_selected: true
  376. };
  377. edges.push({
  378. from: filterStepId,
  379. to: sugId,
  380. edge_type: 'step_to_filtered_sug',
  381. strategy: '进入下轮'
  382. });
  383. if (!iterations[roundNum * 10 + 2]) iterations[roundNum * 10 + 2] = [];
  384. iterations[roundNum * 10 + 2].push(sugId);
  385. });
  386. }
  387. // 步骤5: 构建下一轮
  388. const nextRoundStepId = `step_next_round_r${roundNum}`;
  389. const nextQCount = round.output_q_list?.length || 0;
  390. const nextSeedCount = round.seed_list_next_size || 0;
  391. nodes[nextRoundStepId] = {
  392. type: 'step',
  393. query: `步骤5: 构建下一轮 (${nextQCount}个查询, ${nextSeedCount}个种子)`,
  394. level: roundNum * 10 + 1,
  395. relevance_score: 0,
  396. strategy: '构建下一轮',
  397. iteration: roundNum,
  398. is_selected: true
  399. };
  400. edges.push({
  401. from: roundId,
  402. to: nextRoundStepId,
  403. edge_type: 'round_to_step',
  404. strategy: '构建下一轮'
  405. });
  406. iterations[roundNum * 10].push(nextRoundStepId);
  407. // 5.1: 构建下轮查询
  408. if (round.output_q_list && round.output_q_list.length > 0) {
  409. const nextQStepId = `step_next_q_r${roundNum}`;
  410. nodes[nextQStepId] = {
  411. type: 'step',
  412. query: `构建下轮查询 (${nextQCount}个)`,
  413. level: roundNum * 10 + 2,
  414. relevance_score: 0,
  415. strategy: '下轮查询',
  416. iteration: roundNum,
  417. is_selected: true
  418. };
  419. edges.push({
  420. from: nextRoundStepId,
  421. to: nextQStepId,
  422. edge_type: 'step_to_step',
  423. strategy: '查询'
  424. });
  425. if (!iterations[roundNum * 10 + 2]) iterations[roundNum * 10 + 2] = [];
  426. iterations[roundNum * 10 + 2].push(nextQStepId);
  427. // 添加下轮查询列表
  428. round.output_q_list.forEach((q, qIndex) => {
  429. const nextQId = `next_q_${q.text}_r${roundNum}_${qIndex}`;
  430. // 根据来源设置strategy
  431. let strategy;
  432. if (q.from === 'seg') {
  433. strategy = '初始分词';
  434. } else if (q.from === 'add') {
  435. strategy = '加词';
  436. } else if (q.from === 'sug') {
  437. strategy = '调用sug';
  438. } else {
  439. strategy = 'Query'; // 默认
  440. }
  441. nodes[nextQId] = {
  442. type: 'next_q',
  443. query: q.text,
  444. level: roundNum * 10 + 3,
  445. relevance_score: q.score || 0,
  446. evaluationReason: q.reason || '',
  447. strategy: strategy,
  448. iteration: roundNum,
  449. is_selected: true,
  450. from_source: q.from
  451. };
  452. edges.push({
  453. from: nextQStepId,
  454. to: nextQId,
  455. edge_type: 'step_to_next_q',
  456. strategy: strategy
  457. });
  458. if (!iterations[roundNum * 10 + 3]) iterations[roundNum * 10 + 3] = [];
  459. iterations[roundNum * 10 + 3].push(nextQId);
  460. });
  461. }
  462. // 5.2: 构建下轮种子(如果有数据的话)
  463. if (nextSeedCount > 0 && round.seed_list_next) {
  464. const nextSeedStepId = `step_next_seed_r${roundNum}`;
  465. nodes[nextSeedStepId] = {
  466. type: 'step',
  467. query: `构建下轮种子 (${nextSeedCount}个)`,
  468. level: roundNum * 10 + 2,
  469. relevance_score: 0,
  470. strategy: '下轮种子',
  471. iteration: roundNum,
  472. is_selected: true
  473. };
  474. edges.push({
  475. from: nextRoundStepId,
  476. to: nextSeedStepId,
  477. edge_type: 'step_to_step',
  478. strategy: '种子'
  479. });
  480. if (!iterations[roundNum * 10 + 2]) iterations[roundNum * 10 + 2] = [];
  481. iterations[roundNum * 10 + 2].push(nextSeedStepId);
  482. // 添加下轮种子列表
  483. round.seed_list_next.forEach((seed, seedIndex) => {
  484. const nextSeedId = `next_seed_${seed.text}_r${roundNum}_${seedIndex}`;
  485. // 根据来源设置strategy
  486. let strategy;
  487. if (seed.from === 'seg') {
  488. strategy = '初始分词';
  489. } else if (seed.from === 'add') {
  490. strategy = '加词';
  491. } else if (seed.from === 'sug') {
  492. strategy = '调用sug';
  493. } else {
  494. strategy = 'Seed'; // 默认
  495. }
  496. nodes[nextSeedId] = {
  497. type: 'next_seed',
  498. query: seed.text,
  499. level: roundNum * 10 + 3,
  500. relevance_score: seed.score || 0,
  501. strategy: strategy,
  502. iteration: roundNum,
  503. is_selected: true,
  504. from_source: seed.from
  505. };
  506. edges.push({
  507. from: nextSeedStepId,
  508. to: nextSeedId,
  509. edge_type: 'step_to_next_seed',
  510. strategy: strategy
  511. });
  512. if (!iterations[roundNum * 10 + 3]) iterations[roundNum * 10 + 3] = [];
  513. iterations[roundNum * 10 + 3].push(nextSeedId);
  514. });
  515. }
  516. }
  517. });
  518. return {
  519. nodes,
  520. edges,
  521. iterations
  522. };
  523. }
  524. /**
  525. * 简化版转换:专注于query和post的演化
  526. * - 合并所有query节点(不区分seg/sug/add_word)
  527. * - 合并相同的帖子节点
  528. * - 步骤信息放在边上
  529. * - 隐藏Round/Step节点
  530. */
  531. function convertV8ToGraphSimplified(runContext, searchResults) {
  532. const mergedNodes = {};
  533. const edges = [];
  534. const iterations = {};
  535. const o = runContext.o || '原始问题';
  536. const rounds = runContext.rounds || [];
  537. // 添加原始问题根节点
  538. const rootId = 'root_o';
  539. mergedNodes[rootId] = {
  540. type: 'root',
  541. query: o,
  542. level: 0,
  543. relevance_score: 1.0,
  544. strategy: '原始问题',
  545. iteration: 0,
  546. is_selected: true,
  547. occurrences: [{round: 0, role: 'root', score: 1.0}]
  548. };
  549. iterations[0] = [rootId];
  550. // 用于记录节点之间的演化关系
  551. const queryEvolution = {}; // {text: {occurrences: [], parentTexts: [], childTexts: []}}
  552. const postMap = {}; // {note_id: {...}}
  553. // 第一遍:收集所有query和post
  554. rounds.forEach((round, roundIndex) => {
  555. const roundNum = round.round_num || roundIndex;
  556. if (round.type === 'initialization') {
  557. // Round 0: 收集分词结果
  558. (round.q_list_1 || []).forEach(q => {
  559. if (!queryEvolution[q.text]) {
  560. queryEvolution[q.text] = {
  561. occurrences: [],
  562. parentTexts: new Set([o]), // 来自原始问题
  563. childTexts: new Set()
  564. };
  565. }
  566. queryEvolution[q.text].occurrences.push({
  567. round: roundNum,
  568. role: 'segmentation',
  569. strategy: '分词',
  570. score: q.score,
  571. reason: q.reason
  572. });
  573. });
  574. } else {
  575. // Round 1+
  576. // 收集sug_details (推荐词)
  577. Object.entries(round.sug_details || {}).forEach(([parentText, sugs]) => {
  578. sugs.forEach(sug => {
  579. if (!queryEvolution[sug.text]) {
  580. queryEvolution[sug.text] = {
  581. occurrences: [],
  582. parentTexts: new Set(),
  583. childTexts: new Set()
  584. };
  585. }
  586. queryEvolution[sug.text].occurrences.push({
  587. round: roundNum,
  588. role: 'sug',
  589. strategy: '调用sug',
  590. score: sug.score,
  591. reason: sug.reason
  592. });
  593. queryEvolution[sug.text].parentTexts.add(parentText);
  594. if (queryEvolution[parentText]) {
  595. queryEvolution[parentText].childTexts.add(sug.text);
  596. }
  597. });
  598. });
  599. // 收集add_word_details (加词结果)
  600. Object.entries(round.add_word_details || {}).forEach(([seedText, words]) => {
  601. words.forEach(word => {
  602. if (!queryEvolution[word.text]) {
  603. queryEvolution[word.text] = {
  604. occurrences: [],
  605. parentTexts: new Set(),
  606. childTexts: new Set()
  607. };
  608. }
  609. queryEvolution[word.text].occurrences.push({
  610. round: roundNum,
  611. role: 'add_word',
  612. strategy: '加词',
  613. score: word.score,
  614. reason: word.reason,
  615. selectedWord: word.selected_word
  616. });
  617. queryEvolution[word.text].parentTexts.add(seedText);
  618. if (queryEvolution[seedText]) {
  619. queryEvolution[seedText].childTexts.add(word.text);
  620. }
  621. });
  622. });
  623. // 收集搜索结果和帖子
  624. const roundSearchResults = round.search_results || searchResults;
  625. if (roundSearchResults && Array.isArray(roundSearchResults)) {
  626. roundSearchResults.forEach(search => {
  627. const searchText = search.text;
  628. // 标记这个query被用于搜索
  629. if (queryEvolution[searchText]) {
  630. queryEvolution[searchText].occurrences.push({
  631. round: roundNum,
  632. role: 'search',
  633. strategy: '执行搜索',
  634. score: search.score_with_o,
  635. postCount: search.post_list ? search.post_list.length : 0
  636. });
  637. }
  638. // 收集帖子
  639. if (search.post_list && search.post_list.length > 0) {
  640. search.post_list.forEach(post => {
  641. if (!postMap[post.note_id]) {
  642. postMap[post.note_id] = {
  643. ...post,
  644. foundByQueries: new Set(),
  645. foundInRounds: new Set()
  646. };
  647. }
  648. postMap[post.note_id].foundByQueries.add(searchText);
  649. postMap[post.note_id].foundInRounds.add(roundNum);
  650. // 建立query到post的关系
  651. if (!queryEvolution[searchText].posts) {
  652. queryEvolution[searchText].posts = new Set();
  653. }
  654. queryEvolution[searchText].posts.add(post.note_id);
  655. });
  656. }
  657. });
  658. }
  659. }
  660. });
  661. // 第二遍:创建合并后的节点
  662. Object.entries(queryEvolution).forEach(([text, data]) => {
  663. const nodeId = `query_${text}`;
  664. // 获取最新的分数
  665. const latestOccurrence = data.occurrences[data.occurrences.length - 1] || {};
  666. const hasSearchResults = data.posts && data.posts.size > 0;
  667. mergedNodes[nodeId] = {
  668. type: 'query',
  669. query: text,
  670. level: Math.max(...data.occurrences.map(o => o.round), 0) * 10 + 2,
  671. relevance_score: latestOccurrence.score || 0,
  672. evaluationReason: latestOccurrence.reason || '',
  673. strategy: data.occurrences.map(o => o.strategy).join(' + '),
  674. primaryStrategy: latestOccurrence.strategy || '未知', // 添加主要策略字段
  675. iteration: Math.max(...data.occurrences.map(o => o.round), 0),
  676. is_selected: true,
  677. occurrences: data.occurrences,
  678. hasSearchResults: hasSearchResults,
  679. postCount: data.posts ? data.posts.size : 0,
  680. selectedWord: data.occurrences.find(o => o.selectedWord)?.selectedWord || ''
  681. };
  682. // 添加到对应的轮次
  683. const maxRound = Math.max(...data.occurrences.map(o => o.round), 0);
  684. const iterKey = maxRound * 10 + 2;
  685. if (!iterations[iterKey]) iterations[iterKey] = [];
  686. iterations[iterKey].push(nodeId);
  687. });
  688. // 创建帖子节点
  689. Object.entries(postMap).forEach(([noteId, post]) => {
  690. const postId = `post_${noteId}`;
  691. const imageList = (post.images || []).map(url => ({
  692. image_url: url
  693. }));
  694. mergedNodes[postId] = {
  695. type: 'post',
  696. query: post.title,
  697. level: 100, // 放在最后
  698. relevance_score: 0,
  699. strategy: '帖子',
  700. iteration: Math.max(...Array.from(post.foundInRounds)),
  701. is_selected: true,
  702. note_id: post.note_id,
  703. note_url: post.note_url,
  704. body_text: post.body_text || '',
  705. images: post.images || [],
  706. image_list: imageList,
  707. interact_info: post.interact_info || {},
  708. foundByQueries: Array.from(post.foundByQueries),
  709. foundInRounds: Array.from(post.foundInRounds)
  710. };
  711. if (!iterations[100]) iterations[100] = [];
  712. iterations[100].push(postId);
  713. });
  714. // 第三遍:创建边
  715. // 1. 原始问题 -> 分词结果
  716. Object.entries(queryEvolution).forEach(([text, data]) => {
  717. const nodeId = `query_${text}`;
  718. const segOccurrence = data.occurrences.find(o => o.role === 'segmentation');
  719. if (segOccurrence && data.parentTexts.has(o)) {
  720. edges.push({
  721. from: rootId,
  722. to: nodeId,
  723. edge_type: 'segmentation',
  724. strategy: '分词',
  725. label: '分词',
  726. round: 0
  727. });
  728. }
  729. });
  730. // 2. Query演化关系
  731. Object.entries(queryEvolution).forEach(([text, data]) => {
  732. const nodeId = `query_${text}`;
  733. data.parentTexts.forEach(parentText => {
  734. if (parentText === o) return; // 跳过原始问题(已处理)
  735. const parentNodeId = `query_${parentText}`;
  736. if (!mergedNodes[parentNodeId]) return;
  737. // 找到这个演化的策略和轮次
  738. const occurrence = data.occurrences.find(o =>
  739. o.role === 'sug' || o.role === 'add_word'
  740. );
  741. edges.push({
  742. from: parentNodeId,
  743. to: nodeId,
  744. edge_type: occurrence?.role || 'evolution',
  745. strategy: occurrence?.strategy || '演化',
  746. label: `${occurrence?.strategy || '演化'} (R${occurrence?.round || 0})`,
  747. round: occurrence?.round || 0
  748. });
  749. });
  750. });
  751. // 3. Query -> Post (搜索关系)
  752. Object.entries(queryEvolution).forEach(([text, data]) => {
  753. const nodeId = `query_${text}`;
  754. if (data.posts && data.posts.size > 0) {
  755. const searchOccurrence = data.occurrences.find(o => o.role === 'search');
  756. data.posts.forEach(noteId => {
  757. const postId = `post_${noteId}`;
  758. edges.push({
  759. from: nodeId,
  760. to: postId,
  761. edge_type: 'search',
  762. strategy: '搜索',
  763. label: `搜索 (${data.posts.size}个帖子)`,
  764. round: searchOccurrence?.round || 0
  765. });
  766. });
  767. }
  768. });
  769. return {
  770. nodes: mergedNodes,
  771. edges,
  772. iterations
  773. };
  774. }
  775. module.exports = { convertV8ToGraphV2, convertV8ToGraphSimplified };