1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162 |
- from pymilvus import FieldSchema, DataType
- # milvus 向量数据库
- fields = [
- FieldSchema(
- name="id",
- dtype=DataType.INT64,
- is_primary=True,
- auto_id=True,
- description="自增逐渐id",
- ),
- FieldSchema(
- name="doc_id", dtype=DataType.VARCHAR, max_length=64, description="文档id"
- ),
- FieldSchema(name="chunk_id", dtype=DataType.INT64, description="文档分块id"),
- # 三种向量字段
- FieldSchema(name="vector_text", dtype=DataType.FLOAT_VECTOR, dim=2560),
- FieldSchema(name="vector_summary", dtype=DataType.FLOAT_VECTOR, dim=2560),
- FieldSchema(name="vector_questions", dtype=DataType.FLOAT_VECTOR, dim=2560),
- # metadata
- FieldSchema(
- name="topic", dtype=DataType.VARCHAR, max_length=255, description="主题"
- ),
- FieldSchema(
- name="domain", dtype=DataType.VARCHAR, max_length=100, description="领域"
- ),
- FieldSchema(
- name="task_type", dtype=DataType.VARCHAR, max_length=100, description="任务类型"
- ),
- FieldSchema(
- name="summary", dtype=DataType.VARCHAR, max_length=512, description="总结"
- ),
- FieldSchema(
- name="keywords",
- dtype=DataType.ARRAY,
- element_type=DataType.VARCHAR,
- max_length=100,
- max_capacity=5,
- description="关键词",
- ),
- FieldSchema(
- name="concepts",
- dtype=DataType.ARRAY,
- element_type=DataType.VARCHAR,
- max_length=100,
- max_capacity=5,
- description="主要知识点",
- ),
- FieldSchema(
- name="questions",
- dtype=DataType.ARRAY,
- element_type=DataType.VARCHAR,
- max_length=200,
- max_capacity=5,
- description="隐含问题",
- ),
- FieldSchema(name="topic_purity", dtype=DataType.FLOAT),
- FieldSchema(name="tokens", dtype=DataType.INT64),
- ]
- __all__ = ["fields"]
|