|
|
@@ -21,16 +21,17 @@ import org.springframework.data.redis.core.RedisTemplate;
|
|
|
import org.springframework.stereotype.Component;
|
|
|
|
|
|
import java.util.*;
|
|
|
-import java.util.function.Function;
|
|
|
import java.util.stream.Collectors;
|
|
|
-import java.util.stream.Stream;
|
|
|
|
|
|
/**
|
|
|
* 视频解构 实质元素 ros 召回 (用户近期 share 行为 -> dk_elements)
|
|
|
- * 范式: 完全对齐 YearShareCate2RecallStrategy, 把"取 merge_second_level_cate"换成"取 dk_elements 摊平"
|
|
|
- * 每个 share vid 一般有多个 element, parseUserActionVideoAndElements 返回扁平的 (vid, element) pair 列表
|
|
|
+ * 每个 share vid 一般有多个 element, parseUserActionVideoAndElements 返回扁平的 (vid, element) pair 列表;
|
|
|
+ * parse 时已按 c >= MIN_CONTRIB_SCORE (0.8) 过滤掉低贡献分元素, 噪声不进召回.
|
|
|
*
|
|
|
- * 上游 ODPS: alg_recsys_recall_elements_ros (原始元素 -> top-N vid + ros 得分)
|
|
|
+ * 挑 kw 逻辑: 按近期 share 行为时间序遍历摊平 element, distinct 取前 topN (30) 个 -> 一次 multiGet
|
|
|
+ * elements_ros_recall:{kw} 倒排. 不再做"最近+最频"并集 (元素粒度比 cate2 细, 取近期 30 更直接).
|
|
|
+ *
|
|
|
+ * 上游 ODPS: alg_recsys_recall_elements_ros (原始元素 -> top-50 vid + ros 得分)
|
|
|
* Redis key: elements_ros_recall:{原始元素}
|
|
|
* value: vid1,vid2,...\tscore1,score2,...
|
|
|
*/
|
|
|
@@ -50,6 +51,11 @@ public class YearShareDkElementsRecallStrategy implements RecallStrategy {
|
|
|
public static final String PUSH_FROM = "recall_user_year_share_dk_elements";
|
|
|
public static final String redisKeyPrefix = "elements_ros_recall";
|
|
|
|
|
|
+ /** 元素贡献分过滤阈值 (parse 时丢弃 c < 0.8 的 element, 噪声元素不进召回) */
|
|
|
+ public static final double MIN_CONTRIB_SCORE = 0.8;
|
|
|
+ /** 摊平后按时间序 distinct 取前 N 个 element 进 Redis 倒排查询 */
|
|
|
+ public static final int topN = 30;
|
|
|
+
|
|
|
@Override
|
|
|
public List<Video> recall(RecallParam param) {
|
|
|
|
|
|
@@ -64,27 +70,13 @@ public class YearShareDkElementsRecallStrategy implements RecallStrategy {
|
|
|
if (CollectionUtils.isEmpty(userNetworkVideoElement)) {
|
|
|
return videosResult;
|
|
|
}
|
|
|
- int limit = Math.min(userNetworkVideoElement.size(), 3);
|
|
|
- List<String> lastTopNElement = userNetworkVideoElement.stream()
|
|
|
- .map(Pair::getValue)
|
|
|
- .distinct()
|
|
|
- .limit(limit)
|
|
|
- .collect(Collectors.toList());
|
|
|
-
|
|
|
- List<String> freqTopNElement = userNetworkVideoElement.stream()
|
|
|
+ // 按用户近期 share 行为时间序遍历, distinct 取前 topN 个高贡献分 element
|
|
|
+ // (贡献分过滤已在 parseUserActionVideoAndElements 内 c >= MIN_CONTRIB_SCORE 完成)
|
|
|
+ List<String> allElements = userNetworkVideoElement.stream()
|
|
|
.map(Pair::getValue)
|
|
|
- .collect(Collectors.groupingBy(Function.identity(), Collectors.counting())).entrySet()
|
|
|
- .stream()
|
|
|
- .sorted(Map.Entry.<String, Long>comparingByValue().reversed())
|
|
|
- .limit(limit)
|
|
|
- .map(Map.Entry::getKey)
|
|
|
- .collect(Collectors.toList());
|
|
|
-
|
|
|
-
|
|
|
- List<String> allElements = Stream.of(lastTopNElement, freqTopNElement)
|
|
|
- .flatMap(Collection::stream)
|
|
|
- .distinct()
|
|
|
.filter(StringUtils::isNotBlank)
|
|
|
+ .distinct()
|
|
|
+ .limit(topN)
|
|
|
.collect(Collectors.toList());
|
|
|
|
|
|
List<String> keys = this.getRedisKey(allElements);
|
|
|
@@ -136,7 +128,7 @@ public class YearShareDkElementsRecallStrategy implements RecallStrategy {
|
|
|
if (StringUtils.isBlank(dkElementsStr)) {
|
|
|
continue;
|
|
|
}
|
|
|
- List<String> kws = DkElementsUtils.parseElementKws(dkElementsStr);
|
|
|
+ List<String> kws = DkElementsUtils.parseElementKws(dkElementsStr, MIN_CONTRIB_SCORE);
|
|
|
for (String kw : kws) {
|
|
|
result.add(Pair.of(videoIdL, kw));
|
|
|
}
|