|
|
@@ -13,6 +13,7 @@ import com.tzld.videoVector.util.Md5Util;
|
|
|
import com.xxl.job.core.biz.model.ReturnT;
|
|
|
import com.xxl.job.core.handler.annotation.XxlJob;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.springframework.beans.factory.annotation.Value;
|
|
|
import org.springframework.stereotype.Component;
|
|
|
import org.springframework.util.CollectionUtils;
|
|
|
import org.springframework.util.StringUtils;
|
|
|
@@ -49,7 +50,12 @@ public class VideoTitleVectorJob {
|
|
|
private VectorStoreService vectorStoreService;
|
|
|
|
|
|
/** 每批获取视频详情的数量 */
|
|
|
- private static final int DETAIL_BATCH_SIZE = 100;
|
|
|
+ @Value("${video.title.vector.batch.size:100}")
|
|
|
+ private int detailBatchSize;
|
|
|
+
|
|
|
+ /** 每次 embedding 调用之间的间隔毫秒数(限流,防止超过对方频次限制) */
|
|
|
+ @Value("${video.title.vector.embedding.interval.ms:100}")
|
|
|
+ private long embeddingIntervalMs;
|
|
|
|
|
|
/**
|
|
|
* 视频标题向量化任务
|
|
|
@@ -116,8 +122,8 @@ public class VideoTitleVectorJob {
|
|
|
AtomicInteger totalFail = new AtomicInteger(0);
|
|
|
AtomicInteger totalSkip = new AtomicInteger(0);
|
|
|
|
|
|
- for (int i = 0; i < needProcessIds.size(); i += DETAIL_BATCH_SIZE) {
|
|
|
- int end = Math.min(i + DETAIL_BATCH_SIZE, needProcessIds.size());
|
|
|
+ for (int i = 0; i < needProcessIds.size(); i += detailBatchSize) {
|
|
|
+ int end = Math.min(i + detailBatchSize, needProcessIds.size());
|
|
|
List<Long> batchIds = needProcessIds.subList(i, end);
|
|
|
|
|
|
try {
|
|
|
@@ -127,7 +133,7 @@ public class VideoTitleVectorJob {
|
|
|
totalFail.addAndGet(batchIds.size());
|
|
|
}
|
|
|
|
|
|
- if ((i / DETAIL_BATCH_SIZE + 1) % 10 == 0) {
|
|
|
+ if ((i / detailBatchSize + 1) % 10 == 0) {
|
|
|
log.info("进度: 已处理 {}/{}, 成功: {}, 失败: {}, 跳过: {}",
|
|
|
end, needProcessIds.size(), totalSuccess.get(), totalFail.get(), totalSkip.get());
|
|
|
}
|
|
|
@@ -202,7 +208,7 @@ public class VideoTitleVectorJob {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * 优先通过 text_hash 复用已有 embedding,未命中则调用 embedding API
|
|
|
+ * 优先通过 text_hash 复用已有 embedding,未命中则调用 embedding API(带限流)
|
|
|
*/
|
|
|
private List<Float> getOrEmbed(String text, DeconstructVectorConfig config) {
|
|
|
String configCode = config.getConfigCode();
|
|
|
@@ -214,6 +220,12 @@ public class VideoTitleVectorJob {
|
|
|
return cached;
|
|
|
}
|
|
|
}
|
|
|
+ // 限流:实际调用 embedding API 前等待,避免超过对方频次限制
|
|
|
+ try {
|
|
|
+ Thread.sleep(embeddingIntervalMs);
|
|
|
+ } catch (InterruptedException e) {
|
|
|
+ Thread.currentThread().interrupt();
|
|
|
+ }
|
|
|
return embeddingService.embed(text, config);
|
|
|
}
|
|
|
|