field.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. from pymilvus import FieldSchema, DataType
  2. # milvus 向量数据库
  3. fields = [
  4. FieldSchema(
  5. name="id",
  6. dtype=DataType.INT64,
  7. is_primary=True,
  8. auto_id=True,
  9. description="自增id",
  10. ),
  11. FieldSchema(
  12. name="doc_id", dtype=DataType.VARCHAR, max_length=64, description="文档id"
  13. ),
  14. FieldSchema(name="chunk_id", dtype=DataType.INT64, description="文档分块id"),
  15. # 三种向量字段
  16. FieldSchema(name="vector_text", dtype=DataType.FLOAT_VECTOR, dim=2560),
  17. FieldSchema(name="vector_summary", dtype=DataType.FLOAT_VECTOR, dim=2560),
  18. FieldSchema(name="vector_questions", dtype=DataType.FLOAT_VECTOR, dim=2560),
  19. # metadata
  20. FieldSchema(
  21. name="topic", dtype=DataType.VARCHAR, max_length=255, description="主题"
  22. ),
  23. FieldSchema(
  24. name="domain", dtype=DataType.VARCHAR, max_length=100, description="领域"
  25. ),
  26. FieldSchema(
  27. name="task_type", dtype=DataType.VARCHAR, max_length=100, description="任务类型"
  28. ),
  29. FieldSchema(
  30. name="summary", dtype=DataType.VARCHAR, max_length=512, description="总结"
  31. ),
  32. FieldSchema(
  33. name="keywords",
  34. dtype=DataType.ARRAY,
  35. element_type=DataType.VARCHAR,
  36. max_length=100,
  37. max_capacity=5,
  38. description="关键词",
  39. ),
  40. FieldSchema(
  41. name="concepts",
  42. dtype=DataType.ARRAY,
  43. element_type=DataType.VARCHAR,
  44. max_length=100,
  45. max_capacity=5,
  46. description="主要知识点",
  47. ),
  48. FieldSchema(
  49. name="questions",
  50. dtype=DataType.ARRAY,
  51. element_type=DataType.VARCHAR,
  52. max_length=200,
  53. max_capacity=5,
  54. description="隐含问题",
  55. ),
  56. FieldSchema(
  57. name="entities",
  58. dtype=DataType.ARRAY,
  59. element_type=DataType.VARCHAR,
  60. max_length=200,
  61. max_capacity=5,
  62. description="命名实体",
  63. ),
  64. FieldSchema(name="topic_purity", dtype=DataType.FLOAT),
  65. FieldSchema(name="tokens", dtype=DataType.INT64),
  66. ]
  67. __all__ = ["fields"]