from pymilvus import FieldSchema, DataType

# milvus 向量数据库
fields = [
    FieldSchema(
        name="id",
        dtype=DataType.INT64,
        is_primary=True,
        auto_id=True,
        description="自增id",
    ),
    FieldSchema(
        name="doc_id", dtype=DataType.VARCHAR, max_length=64, description="文档id"
    ),
    FieldSchema(name="chunk_id", dtype=DataType.INT64, description="文档分块id"),
    # 三种向量字段
    FieldSchema(name="vector_text", dtype=DataType.FLOAT_VECTOR, dim=2560),
    FieldSchema(name="vector_summary", dtype=DataType.FLOAT_VECTOR, dim=2560),
    FieldSchema(name="vector_questions", dtype=DataType.FLOAT_VECTOR, dim=2560),
    # metadata
    FieldSchema(
        name="topic", dtype=DataType.VARCHAR, max_length=255, description="主题"
    ),
    FieldSchema(
        name="domain", dtype=DataType.VARCHAR, max_length=100, description="领域"
    ),
    FieldSchema(
        name="task_type", dtype=DataType.VARCHAR, max_length=100, description="任务类型"
    ),
    FieldSchema(
        name="summary", dtype=DataType.VARCHAR, max_length=512, description="总结"
    ),
    FieldSchema(
        name="keywords",
        dtype=DataType.ARRAY,
        element_type=DataType.VARCHAR,
        max_length=100,
        max_capacity=5,
        description="关键词",
    ),
    FieldSchema(
        name="concepts",
        dtype=DataType.ARRAY,
        element_type=DataType.VARCHAR,
        max_length=100,
        max_capacity=5,
        description="主要知识点",
    ),
    FieldSchema(
        name="questions",
        dtype=DataType.ARRAY,
        element_type=DataType.VARCHAR,
        max_length=200,
        max_capacity=5,
        description="隐含问题",
    ),
FieldSchema(
        name="entities",
        dtype=DataType.ARRAY,
        element_type=DataType.VARCHAR,
        max_length=200,
        max_capacity=5,
        description="命名实体",
    ),
    FieldSchema(name="topic_purity", dtype=DataType.FLOAT),
    FieldSchema(name="tokens", dtype=DataType.INT64),
]


__all__ = ["fields"]