models.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. from dataclasses import dataclass
  2. from typing import List
  3. @dataclass
  4. class Document:
  5. doc_id: str
  6. dataset_id: int
  7. @dataclass
  8. class GraphChunk:
  9. milvus_id: int
  10. chunk_id: int
  11. doc_id: str
  12. topic: str
  13. domain: str
  14. text_type: int
  15. task_type: str
  16. @dataclass
  17. class ChunkRelations:
  18. entities: List[str]
  19. concepts: List[str]
  20. keywords: List[str]
  21. domain: str
  22. topic: str
  23. QUERY = """
  24. // 1) Document & GraphChunk
  25. MERGE (d:Document {doc_id: $doc_id})
  26. ON CREATE SET d.dataset_id = $dataset_id
  27. SET d.dataset_id = $dataset_id
  28. MERGE (gc:GraphChunk {milvus_id: $milvus_id})
  29. ON CREATE SET gc.chunk_id = $chunk_id, gc.doc_id = $doc_id
  30. SET gc.topic = $topic,
  31. gc.domain = $domain,
  32. gc.text_type = $text_type,
  33. gc.task_type = $task_type,
  34. gc.doc_id = $doc_id,
  35. gc.display_name = $chunk_id + "--" + $doc_id
  36. MERGE (gc)-[:BELONGS_TO]->(d)
  37. MERGE (d)-[:HAS_CHUNK]->(gc)
  38. // 2) 参数准备
  39. WITH gc,
  40. COALESCE($entities, []) AS entities,
  41. COALESCE($concepts, []) AS concepts,
  42. COALESCE($keywords, []) AS keywords,
  43. $domain_name AS domain_name,
  44. $topic_name AS topic_name
  45. // 3) Entities
  46. UNWIND entities AS e_name
  47. WITH gc, e_name, concepts, keywords, domain_name, topic_name
  48. WITH gc, TRIM(e_name) AS e_name, concepts, keywords, domain_name, topic_name
  49. WHERE e_name <> ""
  50. MERGE (e:Entity {name: e_name})
  51. MERGE (gc)-[:HAS_ENTITY]->(e)
  52. // 4) Concepts
  53. WITH gc, concepts, keywords, domain_name, topic_name
  54. UNWIND concepts AS c_name
  55. WITH gc, c_name, keywords, domain_name, topic_name
  56. WITH gc, TRIM(c_name) AS c_name, keywords, domain_name, topic_name
  57. WHERE c_name <> ""
  58. MERGE (co:Concept {name: c_name})
  59. MERGE (gc)-[:HAS_CONCEPT]->(co)
  60. // 5) Keywords
  61. WITH gc, keywords, domain_name, topic_name
  62. UNWIND keywords AS k_name
  63. WITH gc, k_name, domain_name, topic_name
  64. WITH gc, TRIM(k_name) AS k_name, domain_name, topic_name
  65. WHERE k_name <> ""
  66. MERGE (k:Keyword {name: k_name})
  67. MERGE (gc)-[:HAS_KEYWORD]->(k)
  68. // 6) Domain(条件执行,用 FOREACH 替代 CALL)
  69. WITH gc, domain_name, topic_name
  70. FOREACH (_ IN CASE WHEN domain_name IS NOT NULL AND TRIM(domain_name) <> "" THEN [1] ELSE [] END |
  71. MERGE (d_node:Domain {name: TRIM(domain_name)})
  72. MERGE (gc)-[:HAS_DOMAIN]->(d_node)
  73. )
  74. // 7) Topic(条件执行,用 FOREACH 替代 CALL)
  75. WITH gc, topic_name
  76. FOREACH (_ IN CASE WHEN topic_name IS NOT NULL AND TRIM(topic_name) <> "" THEN [1] ELSE [] END |
  77. MERGE (t:Topic {name: TRIM(topic_name)})
  78. MERGE (gc)-[:HAS_TOPIC]->(t)
  79. )
  80. """