|
|
@@ -0,0 +1,195 @@
|
|
|
+package com.tzld.piaoquan.recommend.server.service.recall.strategy;
|
|
|
+
|
|
|
+import com.tzld.piaoquan.recommend.server.model.Video;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.filter.FilterParam;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.filter.FilterResult;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.filter.FilterService;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.recall.FilterParamFactory;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.recall.RecallParam;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.recall.RecallStrategy;
|
|
|
+import com.tzld.piaoquan.recommend.server.util.DkElementsUtils;
|
|
|
+import com.tzld.piaoquan.recommend.server.util.FeatureUtils;
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.apache.commons.collections4.CollectionUtils;
|
|
|
+import org.apache.commons.collections4.MapUtils;
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
+import org.apache.commons.lang3.math.NumberUtils;
|
|
|
+import org.apache.commons.lang3.tuple.Pair;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+import org.springframework.beans.factory.annotation.Qualifier;
|
|
|
+import org.springframework.data.redis.core.RedisTemplate;
|
|
|
+import org.springframework.stereotype.Component;
|
|
|
+
|
|
|
+import java.util.*;
|
|
|
+import java.util.function.Function;
|
|
|
+import java.util.stream.Collectors;
|
|
|
+import java.util.stream.Stream;
|
|
|
+
|
|
|
+/**
|
|
|
+ * 视频解构 实质元素 ros 召回 (用户近期 share 行为 -> dk_elements)
|
|
|
+ * 范式: 完全对齐 YearShareCate2RecallStrategy, 把"取 merge_second_level_cate"换成"取 dk_elements 摊平"
|
|
|
+ * 每个 share vid 一般有多个 element, parseUserActionVideoAndElements 返回扁平的 (vid, element) pair 列表
|
|
|
+ *
|
|
|
+ * 上游 ODPS: alg_recsys_recall_elements_ros (原始元素 -> top-N vid + ros 得分)
|
|
|
+ * Redis key: elements_ros_recall:{原始元素}
|
|
|
+ * value: vid1,vid2,...\tscore1,score2,...
|
|
|
+ */
|
|
|
+@Slf4j
|
|
|
+@Component
|
|
|
+public class YearShareDkElementsRecallStrategy implements RecallStrategy {
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ @Qualifier("redisTemplate")
|
|
|
+ private RedisTemplate<String, String> redisTemplate;
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ private FilterService filterService;
|
|
|
+
|
|
|
+ private final String CLASS_NAME = this.getClass().getSimpleName();
|
|
|
+
|
|
|
+ public static final String PUSH_FROM = "recall_user_year_share_dk_elements";
|
|
|
+ public static final String redisKeyPrefix = "elements_ros_recall";
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public List<Video> recall(RecallParam param) {
|
|
|
+
|
|
|
+ List<Video> videosResult = new ArrayList<>();
|
|
|
+ try {
|
|
|
+
|
|
|
+ if (MapUtils.isEmpty(param.getUserNetworkSeqVideoInfoMap())) {
|
|
|
+ return videosResult;
|
|
|
+ }
|
|
|
+
|
|
|
+ List<Pair<Long, String>> userNetworkVideoElement = this.parseUserActionVideoAndElements(param.getUserNetworkSeqFeature(), param.getUserNetworkSeqVideoInfoMap());
|
|
|
+ if (CollectionUtils.isEmpty(userNetworkVideoElement)) {
|
|
|
+ return videosResult;
|
|
|
+ }
|
|
|
+ int limit = Math.min(userNetworkVideoElement.size(), 3);
|
|
|
+ List<String> lastTopNElement = userNetworkVideoElement.stream()
|
|
|
+ .map(Pair::getValue)
|
|
|
+ .distinct()
|
|
|
+ .limit(limit)
|
|
|
+ .collect(Collectors.toList());
|
|
|
+
|
|
|
+ List<String> freqTopNElement = userNetworkVideoElement.stream()
|
|
|
+ .map(Pair::getValue)
|
|
|
+ .collect(Collectors.groupingBy(Function.identity(), Collectors.counting())).entrySet()
|
|
|
+ .stream()
|
|
|
+ .sorted(Map.Entry.<String, Long>comparingByValue().reversed())
|
|
|
+ .limit(limit)
|
|
|
+ .map(Map.Entry::getKey)
|
|
|
+ .collect(Collectors.toList());
|
|
|
+
|
|
|
+
|
|
|
+ List<String> allElements = Stream.of(lastTopNElement, freqTopNElement)
|
|
|
+ .flatMap(Collection::stream)
|
|
|
+ .distinct()
|
|
|
+ .filter(StringUtils::isNotBlank)
|
|
|
+ .collect(Collectors.toList());
|
|
|
+
|
|
|
+ List<String> keys = this.getRedisKey(allElements);
|
|
|
+ List<String> values = redisTemplate.opsForValue().multiGet(keys);
|
|
|
+ List<Long> ids = recall(param.getVideoId(), values);
|
|
|
+
|
|
|
+ Map<Long, Double> scoresMap = FilterParamFactory.positionScores(ids);
|
|
|
+ FilterParam filterParam = FilterParamFactory.create(param, ids, pushFrom(), scoresMap);
|
|
|
+ FilterResult filterResult = filterService.filter(filterParam);
|
|
|
+ if (filterResult != null && CollectionUtils.isNotEmpty(filterResult.getVideoIds())) {
|
|
|
+ for (Long vid : filterResult.getVideoIds()) {
|
|
|
+ Video video = new Video();
|
|
|
+ video.setVideoId(vid);
|
|
|
+ video.setRovScore(scoresMap.getOrDefault(vid, 0.0));
|
|
|
+ video.setPushFrom(pushFrom());
|
|
|
+ videosResult.add(video);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("recall is wrong in {}, error={}", CLASS_NAME, e);
|
|
|
+ }
|
|
|
+
|
|
|
+ return videosResult;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 摊平: 每个 share vid 一般有多个 dk_element, 输出 (vid, element) pair 序列, 按 vid 时间序保留
|
|
|
+ */
|
|
|
+ private List<Pair<Long, String>> parseUserActionVideoAndElements(Map<String, String> userNetworkSeqFeature, Map<Long, Map<String, String>> userNetworkSeqVideoInfoMap) {
|
|
|
+ List<Pair<Long, String>> result = new ArrayList<>();
|
|
|
+ List<String> actVidSeq = FeatureUtils.extractVidsFromUserNetworkSeqFeature(userNetworkSeqFeature, "a_v_s");
|
|
|
+ List<String> actTypeSeq = FeatureUtils.extractVidsFromUserNetworkSeqFeature(userNetworkSeqFeature, "a_t_s");
|
|
|
+ if (actVidSeq.size() != actTypeSeq.size()) {
|
|
|
+ return new ArrayList<>();
|
|
|
+ }
|
|
|
+
|
|
|
+ for (int i = 0; i < actVidSeq.size(); i++) {
|
|
|
+ long videoIdL = NumberUtils.toLong(actVidSeq.get(i), -1);
|
|
|
+ if (videoIdL <= 0) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ String type = actTypeSeq.get(i);
|
|
|
+ if (!"share".equals(type)) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ Map<String, String> videoBaseInfo = userNetworkSeqVideoInfoMap.getOrDefault(videoIdL, new HashMap<>());
|
|
|
+ String dkElementsStr = videoBaseInfo.get("dk_elements");
|
|
|
+ if (StringUtils.isBlank(dkElementsStr)) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ List<String> kws = DkElementsUtils.parseElementKws(dkElementsStr);
|
|
|
+ for (String kw : kws) {
|
|
|
+ result.add(Pair.of(videoIdL, kw));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ private List<String> getRedisKey(List<String> elementList) {
|
|
|
+ List<String> keys = new ArrayList<>();
|
|
|
+ for (String element : elementList) {
|
|
|
+ keys.add(String.format("%s:%s", redisKeyPrefix, element));
|
|
|
+ }
|
|
|
+ return keys;
|
|
|
+ }
|
|
|
+
|
|
|
+ private List<Long> recall(Long headVid, List<String> values) {
|
|
|
+ List<Long> vidList = new ArrayList<>();
|
|
|
+ if (null != values && !values.isEmpty()) {
|
|
|
+ Set<Long> hits = new HashSet<>();
|
|
|
+ hits.add(headVid);
|
|
|
+ List<org.apache.commons.math3.util.Pair<Long, Double>> list = new ArrayList<>();
|
|
|
+ for (String value : values) {
|
|
|
+ if (null != value && !value.isEmpty()) {
|
|
|
+ String[] cells = value.split("\t");
|
|
|
+ if (2 == cells.length) {
|
|
|
+ List<Long> ids = Arrays.stream(cells[0].split(",")).map(Long::valueOf).collect(Collectors.toList());
|
|
|
+ List<Double> scores = Arrays.stream(cells[1].split(",")).map(Double::valueOf).collect(Collectors.toList());
|
|
|
+ if (!ids.isEmpty() && ids.size() == scores.size()) {
|
|
|
+ for (int i = 0; i < ids.size(); ++i) {
|
|
|
+ long id = ids.get(i);
|
|
|
+ double score = scores.get(i);
|
|
|
+ if (hits.contains(id)) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ hits.add(id);
|
|
|
+ list.add(org.apache.commons.math3.util.Pair.create(id, score));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (!list.isEmpty()) {
|
|
|
+ list.sort(Comparator.comparingDouble(o -> -o.getSecond()));
|
|
|
+ for (org.apache.commons.math3.util.Pair<Long, Double> pair : list) {
|
|
|
+ vidList.add(pair.getFirst());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return vidList;
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public String pushFrom() {
|
|
|
+ return PUSH_FROM;
|
|
|
+ }
|
|
|
+}
|