|
|
@@ -1,9 +1,12 @@
|
|
|
package com.tzld.videoVector.job;
|
|
|
|
|
|
import com.alibaba.fastjson.JSON;
|
|
|
+import com.alibaba.fastjson.JSONObject;
|
|
|
import com.aliyun.odps.data.Record;
|
|
|
import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
|
|
|
import com.google.common.collect.Lists;
|
|
|
+import com.tzld.videoVector.api.LibraryApiService;
|
|
|
+import com.tzld.videoVector.common.constant.VectorConstants;
|
|
|
import com.tzld.videoVector.dao.mapper.pgVector.ChannelDemandMatchConfigMapper;
|
|
|
import com.tzld.videoVector.dao.mapper.pgVector.ChannelDemandMatchResultMapper;
|
|
|
import com.tzld.videoVector.dao.mapper.pgVector.ext.ChannelDemandMatchResultMapperExt;
|
|
|
@@ -61,6 +64,9 @@ public class ChannelDemandMatchJob {
|
|
|
@Resource
|
|
|
private RedisUtils redisUtils;
|
|
|
|
|
|
+ @Resource
|
|
|
+ private LibraryApiService libraryApiService;
|
|
|
+
|
|
|
/**
|
|
|
* 召回结果Redis缓存前缀
|
|
|
*/
|
|
|
@@ -161,6 +167,36 @@ public class ChannelDemandMatchJob {
|
|
|
@Value("${channel.demand.result.retention-days:14}")
|
|
|
private int resultRetentionDays;
|
|
|
|
|
|
+ /**
|
|
|
+ * Library API 执行 ID
|
|
|
+ */
|
|
|
+ @Value("${library.api.execution-id:581}")
|
|
|
+ private int libraryExecutionId;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Library API 平台
|
|
|
+ */
|
|
|
+ @Value("${library.api.platform:piaoquan}")
|
|
|
+ private String libraryPlatform;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Library API 话题元素搜索返回上限
|
|
|
+ */
|
|
|
+ @Value("${library.api.element-search-limit:20}")
|
|
|
+ private int elementSearchLimit;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Library API 帖子列表每页数量
|
|
|
+ */
|
|
|
+ @Value("${library.api.post-page-size:50}")
|
|
|
+ private int libraryPostPageSize;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 视频详情指标天数维度
|
|
|
+ */
|
|
|
+ @Value("${video.detail.metrics.days:7}")
|
|
|
+ private int metricsDays;
|
|
|
+
|
|
|
/**
|
|
|
* 点类型 → 向量配置编码映射
|
|
|
*/
|
|
|
@@ -521,15 +557,10 @@ public class ChannelDemandMatchJob {
|
|
|
allBatchRows.addAll(rows);
|
|
|
}
|
|
|
|
|
|
- // 策略三:需求特征点类型+需求特征点 均有值 → 用需求特征点召回
|
|
|
+ // 策略三-泛化:特征点泛化 → 使用 Library API 召回
|
|
|
if ("特征点泛化".equals(demand.getDemandType()) && hasValidValue(demand.getMatchGeneralizedPointType())
|
|
|
&& hasValidValue(demand.getMatchGeneralizedElement())) {
|
|
|
- List<String> configCodes = POINT_TYPE_CONFIG_CODE_MAP.getOrDefault(demand.getMatchGeneralizedPointType(), Arrays.asList("VIDEO_TOPIC"));
|
|
|
- List<ChannelDemandMatchResult> rows = new ArrayList<>();
|
|
|
- for (String configCode : configCodes) {
|
|
|
- rows.addAll(doRecall(demand, demand.getMatchGeneralizedElement(), configCode, topN / configCodes.size()));
|
|
|
-
|
|
|
- }
|
|
|
+ List<ChannelDemandMatchResult> rows = doLibraryRecall(demand, topN);
|
|
|
allBatchRows.addAll(rows);
|
|
|
}
|
|
|
|
|
|
@@ -584,6 +615,155 @@ public class ChannelDemandMatchJob {
|
|
|
return batchRows;
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * Library API 召回:通过话题元素搜索 + 帖子列表获取召回视频
|
|
|
+ * <p>
|
|
|
+ * 流程:
|
|
|
+ * 1. 用泛化元素名称搜索话题元素,获取元素 ID
|
|
|
+ * 2. 仅保留名称全等匹配的元素
|
|
|
+ * 3. 遍历元素 ID,调用帖子列表接口获取视频帖子
|
|
|
+ * 4. 按 post_id 去重
|
|
|
+ * 5. 从 Redis 批量获取视频详情指标(按 video.detail.metrics.days 天数维度)
|
|
|
+ * 6. 按 rov 降序排列,取 topN
|
|
|
+ */
|
|
|
+ private List<ChannelDemandMatchResult> doLibraryRecall(ChannelDemandMatchResult demand, int topN) {
|
|
|
+ List<ChannelDemandMatchResult> batchRows = new ArrayList<>();
|
|
|
+ String elementName = demand.getMatchGeneralizedElement();
|
|
|
+
|
|
|
+ // 1. 搜索话题元素
|
|
|
+ LibraryApiService.TopicElementSearchResponse elementResp = libraryApiService.searchTopicElements(
|
|
|
+ libraryExecutionId, elementName, elementSearchLimit, "all");
|
|
|
+ if (elementResp == null || CollectionUtils.isEmpty(elementResp.getItems())) {
|
|
|
+ log.info("Library API 话题元素搜索无结果, executionId={}, elementName={}", libraryExecutionId, elementName);
|
|
|
+ return batchRows;
|
|
|
+ }
|
|
|
+ log.info("Library API 话题元素搜索到 {} 个元素, elementName={}", elementResp.getItems().size(), elementName);
|
|
|
+
|
|
|
+ // 2. 仅保留名称全等匹配的元素
|
|
|
+ List<LibraryApiService.TopicElementItem> matchedItems = elementResp.getItems().stream()
|
|
|
+ .filter(e -> elementName.equals(e.getName()))
|
|
|
+ .collect(Collectors.toList());
|
|
|
+ if (matchedItems.isEmpty()) {
|
|
|
+ log.info("Library API 话题元素无全等匹配, elementName={}", elementName);
|
|
|
+ return batchRows;
|
|
|
+ }
|
|
|
+ log.info("Library API 话题元素全等匹配 {} 个, elementName={}", matchedItems.size(), elementName);
|
|
|
+
|
|
|
+ // 3. 遍历匹配的元素获取帖子,按 post_id 去重
|
|
|
+ Map<Long, LibraryApiService.PostItem> postMap = new LinkedHashMap<>();
|
|
|
+ for (LibraryApiService.TopicElementItem element : matchedItems) {
|
|
|
+ if (element.getId() == null) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ LibraryApiService.PostListResponse postResp = libraryApiService.getPosts(
|
|
|
+ 1, libraryPostPageSize, libraryPlatform, libraryExecutionId, element.getId());
|
|
|
+ if (postResp == null || CollectionUtils.isEmpty(postResp.getPosts())) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ for (LibraryApiService.PostItem post : postResp.getPosts()) {
|
|
|
+ if (post.getPostId() == null) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ Long postIdLong;
|
|
|
+ try {
|
|
|
+ postIdLong = Long.parseLong(post.getPostId());
|
|
|
+ } catch (NumberFormatException e) {
|
|
|
+ log.warn("post_id 解析失败, postId={}, elementId={}", post.getPostId(), element.getId());
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ postMap.putIfAbsent(postIdLong, post);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (postMap.isEmpty()) {
|
|
|
+ log.info("Library API 帖子列表无结果, elementName={}", elementName);
|
|
|
+ return batchRows;
|
|
|
+ }
|
|
|
+ log.info("Library API 去重后获取到 {} 个帖子, elementName={}", postMap.size(), elementName);
|
|
|
+
|
|
|
+ // 4. 从 Redis 批量获取视频详情指标
|
|
|
+ List<Long> postIdList = new ArrayList<>(postMap.keySet());
|
|
|
+ List<String> redisKeys = postIdList.stream()
|
|
|
+ .map(id -> VectorConstants.VIDEO_DETAIL_DAYS_KEY_PREFIX + metricsDays + "d:" + id)
|
|
|
+ .collect(Collectors.toList());
|
|
|
+ List<String> redisValues = redisUtils.mGet(redisKeys);
|
|
|
+
|
|
|
+ // 5. 解析指标并构建结果行
|
|
|
+ List<PostWithMetrics> postWithMetricsList = new ArrayList<>();
|
|
|
+ for (int i = 0; i < postIdList.size(); i++) {
|
|
|
+ Long postId = postIdList.get(i);
|
|
|
+ LibraryApiService.PostItem post = postMap.get(postId);
|
|
|
+ Map<String, Object> videoDetail = null;
|
|
|
+ Double rov = null;
|
|
|
+
|
|
|
+ if (redisValues != null && i < redisValues.size() && redisValues.get(i) != null) {
|
|
|
+ try {
|
|
|
+ videoDetail = JSONObject.parseObject(redisValues.get(i), Map.class);
|
|
|
+ if (videoDetail != null) {
|
|
|
+ Object rovObj = videoDetail.get("rov");
|
|
|
+ if (rovObj != null) {
|
|
|
+ rov = rovObj instanceof Number ? ((Number) rovObj).doubleValue()
|
|
|
+ : Double.parseDouble(rovObj.toString());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.warn("解析视频详情失败, postId={}: {}", postId, e.getMessage());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ postWithMetricsList.add(new PostWithMetrics(post, videoDetail, rov));
|
|
|
+ }
|
|
|
+
|
|
|
+ // 6. 按 rov 降序排列(无 rov 数据的排在最后),取 topN
|
|
|
+ postWithMetricsList.sort((a, b) -> {
|
|
|
+ Double aRov = a.rov != null ? a.rov : -1.0;
|
|
|
+ Double bRov = b.rov != null ? b.rov : -1.0;
|
|
|
+ return bRov.compareTo(aRov);
|
|
|
+ });
|
|
|
+
|
|
|
+ int count = 0;
|
|
|
+ for (PostWithMetrics pm : postWithMetricsList) {
|
|
|
+ if (count >= topN) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ LibraryApiService.PostItem post = pm.post;
|
|
|
+ Map<String, Object> detail = pm.videoDetail;
|
|
|
+ Long postId = Long.parseLong(post.getPostId());
|
|
|
+
|
|
|
+ ChannelDemandMatchResult row = copyDemandFields(demand);
|
|
|
+ row.setMatchVideoId(postId);
|
|
|
+ row.setMatchConfigCode("LIBRARY_TOPIC_ELEMENT");
|
|
|
+ row.setMatchRov(pm.rov);
|
|
|
+ row.setMatchScore(pm.rov);
|
|
|
+ row.setMatchSim(null);
|
|
|
+ row.setMatchExposurePv(extractNumber(detail, "分发曝光pv", Long.class));
|
|
|
+ row.setMatchHeadSingleReturnRate(extractNumber(detail, "头部单层回流率", Double.class));
|
|
|
+ row.setMatchHeadDistributionSingleReturnRate(extractNumber(detail, "头部进分发单层回流率", Double.class));
|
|
|
+ row.setMatchText(post.getTitle());
|
|
|
+ row.setMatchStatus((short) 1);
|
|
|
+ row.setExperimentId(generateExperimentId(demand, postId, "LIBRARY_TOPIC_ELEMENT"));
|
|
|
+ batchRows.add(row);
|
|
|
+ count++;
|
|
|
+ }
|
|
|
+
|
|
|
+ log.info("Library API 召回完成, elementName={}, 候选{}条, 返回{}条",
|
|
|
+ elementName, postWithMetricsList.size(), batchRows.size());
|
|
|
+ return batchRows;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 帖子与指标数据组装
|
|
|
+ */
|
|
|
+ private static class PostWithMetrics {
|
|
|
+ final LibraryApiService.PostItem post;
|
|
|
+ final Map<String, Object> videoDetail;
|
|
|
+ final Double rov;
|
|
|
+
|
|
|
+ PostWithMetrics(LibraryApiService.PostItem post, Map<String, Object> videoDetail, Double rov) {
|
|
|
+ this.post = post;
|
|
|
+ this.videoDetail = videoDetail;
|
|
|
+ this.rov = rov;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* 带Redis缓存的召回:相同queryText+configCode+topN直接复用缓存结果
|
|
|
*/
|