|
@@ -0,0 +1,108 @@
|
|
|
|
|
+<?xml version="1.0" encoding="UTF-8"?>
|
|
|
|
|
+<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
|
|
|
|
|
+<mapper namespace="com.tzld.videoVector.dao.mapper.pgVector.ext.ContentVectorMapperExt">
|
|
|
|
|
+
|
|
|
|
|
+ <!-- ==================== 自定义 ResultMap ==================== -->
|
|
|
|
|
+
|
|
|
|
|
+ <resultMap id="VectorWithEmbeddingResultMap" type="com.tzld.videoVector.model.po.pgVector.ContentVector">
|
|
|
|
|
+ <id column="id" jdbcType="BIGINT" property="id" />
|
|
|
|
|
+ <result column="content_id" jdbcType="BIGINT" property="contentId" />
|
|
|
|
|
+ <result column="task_id" jdbcType="VARCHAR" property="taskId" />
|
|
|
|
|
+ <result column="config_code" jdbcType="VARCHAR" property="configCode" />
|
|
|
|
|
+ <result column="source_field" jdbcType="VARCHAR" property="sourceField" />
|
|
|
|
|
+ <result column="source_path" jdbcType="VARCHAR" property="sourcePath" />
|
|
|
|
|
+ <result column="text_hash" jdbcType="VARCHAR" property="textHash" />
|
|
|
|
|
+ <result column="embedding_model" jdbcType="VARCHAR" property="embeddingModel" />
|
|
|
|
|
+ <result column="segment_index" jdbcType="INTEGER" property="segmentIndex" />
|
|
|
|
|
+ <result column="segment_total" jdbcType="INTEGER" property="segmentTotal" />
|
|
|
|
|
+ <result column="source_text" jdbcType="VARCHAR" property="sourceText" />
|
|
|
|
|
+ <result column="embedding" jdbcType="VARCHAR" property="embedding" />
|
|
|
|
|
+ <result column="created_at" jdbcType="TIMESTAMP" property="createdAt" />
|
|
|
|
|
+ <result column="updated_at" jdbcType="TIMESTAMP" property="updatedAt" />
|
|
|
|
|
+ </resultMap>
|
|
|
|
|
+
|
|
|
|
|
+ <resultMap id="SearchResultMap" type="com.tzld.videoVector.model.po.pgVector.ContentVector">
|
|
|
|
|
+ <result column="id" jdbcType="BIGINT" property="id" />
|
|
|
|
|
+ <result column="content_id" jdbcType="BIGINT" property="contentId" />
|
|
|
|
|
+ <result column="config_code" jdbcType="VARCHAR" property="configCode" />
|
|
|
|
|
+ <result column="score" jdbcType="DOUBLE" property="score" />
|
|
|
|
|
+ </resultMap>
|
|
|
|
|
+
|
|
|
|
|
+ <!-- ==================== 自定义向量操作 SQL ==================== -->
|
|
|
|
|
+
|
|
|
|
|
+ <!-- Upsert: 插入或更新向量 -->
|
|
|
|
|
+ <insert id="upsertWithEmbedding">
|
|
|
|
|
+ INSERT INTO content_vectors (content_id, task_id, config_code, source_field, source_path,
|
|
|
|
|
+ text_hash, embedding_model, segment_index, segment_total, source_text, embedding, created_at, updated_at)
|
|
|
|
|
+ VALUES (#{contentId}, #{taskId}, #{configCode}, #{sourceField}, #{sourcePath},
|
|
|
|
|
+ #{textHash}, #{embeddingModel}, #{segmentIndex}, #{segmentTotal}, #{sourceText},
|
|
|
|
|
+ #{embedding}::vector, NOW(), NOW())
|
|
|
|
|
+ ON CONFLICT (content_id, config_code, text_hash, segment_index)
|
|
|
|
|
+ DO UPDATE SET embedding = EXCLUDED.embedding, source_text = EXCLUDED.source_text, updated_at = NOW()
|
|
|
|
|
+ </insert>
|
|
|
|
|
+
|
|
|
|
|
+ <!-- 根据 contentId + configCode 查询 -->
|
|
|
|
|
+ <select id="selectByContentIdAndConfigCode" resultMap="VectorWithEmbeddingResultMap">
|
|
|
|
|
+ SELECT id, content_id, task_id, config_code, source_field, source_path,
|
|
|
|
|
+ text_hash, embedding_model, segment_index, segment_total, source_text,
|
|
|
|
|
+ embedding::text as embedding, created_at, updated_at
|
|
|
|
|
+ FROM content_vectors
|
|
|
|
|
+ WHERE content_id = #{contentId}
|
|
|
|
|
+ <if test="configCode != null and configCode != ''">
|
|
|
|
|
+ AND config_code = #{configCode}
|
|
|
|
|
+ </if>
|
|
|
|
|
+ ORDER BY source_field ASC, segment_index ASC
|
|
|
|
|
+ </select>
|
|
|
|
|
+
|
|
|
|
|
+ <!-- 根据 contentId 查询 -->
|
|
|
|
|
+ <select id="selectByContentId" resultMap="VectorWithEmbeddingResultMap">
|
|
|
|
|
+ SELECT id, content_id, task_id, config_code, source_field, source_path,
|
|
|
|
|
+ text_hash, embedding_model, segment_index, segment_total, source_text,
|
|
|
|
|
+ embedding::text as embedding, created_at, updated_at
|
|
|
|
|
+ FROM content_vectors
|
|
|
|
|
+ WHERE content_id = #{contentId}
|
|
|
|
|
+ ORDER BY source_field ASC, segment_index ASC
|
|
|
|
|
+ </select>
|
|
|
|
|
+
|
|
|
|
|
+ <!-- 根据 contentId + sourceField 查询 -->
|
|
|
|
|
+ <select id="selectByContentIdAndField" resultMap="VectorWithEmbeddingResultMap">
|
|
|
|
|
+ SELECT id, content_id, task_id, config_code, source_field, source_path,
|
|
|
|
|
+ text_hash, embedding_model, segment_index, segment_total, source_text,
|
|
|
|
|
+ embedding::text as embedding, created_at, updated_at
|
|
|
|
|
+ FROM content_vectors
|
|
|
|
|
+ WHERE content_id = #{contentId} AND source_field = #{sourceField}
|
|
|
|
|
+ ORDER BY segment_index ASC
|
|
|
|
|
+ </select>
|
|
|
|
|
+
|
|
|
|
|
+ <!-- 根据 textHash + configCode 查询缓存向量 -->
|
|
|
|
|
+ <select id="selectByTextHashAndConfigCode" resultMap="VectorWithEmbeddingResultMap">
|
|
|
|
|
+ SELECT id, content_id, task_id, config_code, source_field, source_path,
|
|
|
|
|
+ text_hash, embedding_model, segment_index, segment_total, source_text,
|
|
|
|
|
+ embedding::text as embedding, created_at, updated_at
|
|
|
|
|
+ FROM content_vectors
|
|
|
|
|
+ WHERE text_hash = #{textHash} AND config_code = #{configCode}
|
|
|
|
|
+ ORDER BY id DESC
|
|
|
|
|
+ LIMIT 1
|
|
|
|
|
+ </select>
|
|
|
|
|
+
|
|
|
|
|
+ <!-- 根据 textHash 查询(不限 configCode) -->
|
|
|
|
|
+ <select id="selectByTextHash" resultMap="VectorWithEmbeddingResultMap">
|
|
|
|
|
+ SELECT id, content_id, task_id, config_code, source_field, source_path,
|
|
|
|
|
+ text_hash, embedding_model, segment_index, segment_total, source_text,
|
|
|
|
|
+ embedding::text as embedding, created_at, updated_at
|
|
|
|
|
+ FROM content_vectors
|
|
|
|
|
+ WHERE text_hash = #{textHash}
|
|
|
|
|
+ ORDER BY id DESC
|
|
|
|
|
+ LIMIT 1
|
|
|
|
|
+ </select>
|
|
|
|
|
+
|
|
|
|
|
+ <!-- 余弦相似度搜索 Top-N -->
|
|
|
|
|
+ <select id="searchTopNByCosine" resultMap="SearchResultMap">
|
|
|
|
|
+ SELECT id, content_id, config_code, 1 - (embedding <=> #{queryVector}::vector) AS score
|
|
|
|
|
+ FROM content_vectors
|
|
|
|
|
+ WHERE config_code = #{configCode}
|
|
|
|
|
+ ORDER BY embedding <=> #{queryVector}::vector
|
|
|
|
|
+ LIMIT #{topN}
|
|
|
|
|
+ </select>
|
|
|
|
|
+
|
|
|
|
|
+</mapper>
|