wangyunpeng 17 часов назад
Родитель
Сommit
f38a1ce26f
1 измененных файлов с 17 добавлено и 5 удалено
  1. 17 5
      core/src/main/java/com/tzld/videoVector/job/VideoTitleVectorJob.java

+ 17 - 5
core/src/main/java/com/tzld/videoVector/job/VideoTitleVectorJob.java

@@ -13,6 +13,7 @@ import com.tzld.videoVector.util.Md5Util;
 import com.xxl.job.core.biz.model.ReturnT;
 import com.xxl.job.core.handler.annotation.XxlJob;
 import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Component;
 import org.springframework.util.CollectionUtils;
 import org.springframework.util.StringUtils;
@@ -49,7 +50,12 @@ public class VideoTitleVectorJob {
     private VectorStoreService vectorStoreService;
 
     /** 每批获取视频详情的数量 */
-    private static final int DETAIL_BATCH_SIZE = 100;
+    @Value("${video.title.vector.batch.size:100}")
+    private int detailBatchSize;
+
+    /** 每次 embedding 调用之间的间隔毫秒数(限流,防止超过对方频次限制) */
+    @Value("${video.title.vector.embedding.interval.ms:100}")
+    private long embeddingIntervalMs;
 
     /**
      * 视频标题向量化任务
@@ -116,8 +122,8 @@ public class VideoTitleVectorJob {
             AtomicInteger totalFail = new AtomicInteger(0);
             AtomicInteger totalSkip = new AtomicInteger(0);
 
-            for (int i = 0; i < needProcessIds.size(); i += DETAIL_BATCH_SIZE) {
-                int end = Math.min(i + DETAIL_BATCH_SIZE, needProcessIds.size());
+            for (int i = 0; i < needProcessIds.size(); i += detailBatchSize) {
+                int end = Math.min(i + detailBatchSize, needProcessIds.size());
                 List<Long> batchIds = needProcessIds.subList(i, end);
 
                 try {
@@ -127,7 +133,7 @@ public class VideoTitleVectorJob {
                     totalFail.addAndGet(batchIds.size());
                 }
 
-                if ((i / DETAIL_BATCH_SIZE + 1) % 10 == 0) {
+                if ((i / detailBatchSize + 1) % 10 == 0) {
                     log.info("进度: 已处理 {}/{}, 成功: {}, 失败: {}, 跳过: {}",
                             end, needProcessIds.size(), totalSuccess.get(), totalFail.get(), totalSkip.get());
                 }
@@ -202,7 +208,7 @@ public class VideoTitleVectorJob {
     }
 
     /**
-     * 优先通过 text_hash 复用已有 embedding,未命中则调用 embedding API
+     * 优先通过 text_hash 复用已有 embedding,未命中则调用 embedding API(带限流)
      */
     private List<Float> getOrEmbed(String text, DeconstructVectorConfig config) {
         String configCode = config.getConfigCode();
@@ -214,6 +220,12 @@ public class VideoTitleVectorJob {
                 return cached;
             }
         }
+        // 限流:实际调用 embedding API 前等待,避免超过对方频次限制
+        try {
+            Thread.sleep(embeddingIntervalMs);
+        } catch (InterruptedException e) {
+            Thread.currentThread().interrupt();
+        }
         return embeddingService.embed(text, config);
     }