|
@@ -0,0 +1,531 @@
|
|
|
+package com.tzld.piaoquan.recommend.server.service.rank.strategy;
|
|
|
+
|
|
|
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
|
|
|
+import com.tzld.piaoquan.recommend.server.common.ThreadPoolFactory;
|
|
|
+import com.tzld.piaoquan.recommend.server.common.base.RankItem;
|
|
|
+import com.tzld.piaoquan.recommend.server.model.Video;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.FeatureService;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.rank.RankParam;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.rank.extractor.ExtractorUtils;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.recall.strategy.*;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.score.ScorerUtils;
|
|
|
+import com.tzld.piaoquan.recommend.server.util.CommonCollectionUtils;
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.apache.commons.collections4.MapUtils;
|
|
|
+import org.apache.commons.math3.util.Pair;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+import org.springframework.beans.factory.annotation.Value;
|
|
|
+import org.springframework.stereotype.Service;
|
|
|
+
|
|
|
+import java.util.*;
|
|
|
+import java.util.concurrent.Future;
|
|
|
+import java.util.concurrent.TimeUnit;
|
|
|
+import java.util.stream.Collectors;
|
|
|
+
|
|
|
+@Service
|
|
|
+@Slf4j
|
|
|
+public class RankStrategy4RegionMergeModelV553 extends RankStrategy4RegionMergeModelBasic {
|
|
|
+ @ApolloJsonValue("${rank.score.merge.weightv553:}")
|
|
|
+ private Map<String, Double> mergeWeight;
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ private FeatureService featureService;
|
|
|
+
|
|
|
+
|
|
|
+ @Value("${similarity.concurrent: true}")
|
|
|
+ private boolean similarityConcurrent;
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public List<Video> mergeAndRankRovRecall(RankParam param) {
|
|
|
+ Map<String, Double> mergeWeight = this.mergeWeight != null ? this.mergeWeight : new HashMap<>(0);
|
|
|
+ //-------------------融-------------------
|
|
|
+ //-------------------合-------------------
|
|
|
+ //-------------------逻-------------------
|
|
|
+ //-------------------辑-------------------
|
|
|
+
|
|
|
+ List<Video> oldRovs = new ArrayList<>();
|
|
|
+ oldRovs.addAll(extractAndSort(param, RegionHRecallStrategy.PUSH_FORM));
|
|
|
+ oldRovs.addAll(extractAndSort(param, RegionHDupRecallStrategy.PUSH_FORM));
|
|
|
+ oldRovs.addAll(extractAndSort(param, Region24HRecallStrategy.PUSH_FORM));
|
|
|
+ oldRovs.addAll(extractAndSort(param, RegionRelative24HRecallStrategy.PUSH_FORM));
|
|
|
+ oldRovs.addAll(extractAndSort(param, RegionRelative24HDupRecallStrategy.PUSH_FORM));
|
|
|
+ removeDuplicate(oldRovs);
|
|
|
+ int sizeReturn = param.getSize();
|
|
|
+ List<Video> v0 = oldRovs.size() <= sizeReturn
|
|
|
+ ? oldRovs
|
|
|
+ : oldRovs.subList(0, sizeReturn);
|
|
|
+ Set<Long> setVideo = new HashSet<>();
|
|
|
+ this.duplicate(setVideo, v0);
|
|
|
+ setVideo.addAll(v0.stream().map(Video::getVideoId).collect(Collectors.toSet()));
|
|
|
+ List<Video> rovRecallRank = new ArrayList<>(v0);
|
|
|
+ //-------------------return相似召回------------------
|
|
|
+ List<Video> v6 = extractAndSort(param, ReturnVideoRecallStrategy.PUSH_FORM);
|
|
|
+ v6 = v6.stream().filter(r -> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
|
|
|
+ v6 = v6.subList(0, Math.min(mergeWeight.getOrDefault("v6", 5.0).intValue(), v6.size()));
|
|
|
+ rovRecallRank.addAll(v6);
|
|
|
+ setVideo.addAll(v6.stream().map(Video::getVideoId).collect(Collectors.toSet()));
|
|
|
+ //-------------------新地域召回------------------
|
|
|
+ List<Video> v1 = extractAndSort(param, RegionRealtimeRecallStrategyV1.PUSH_FORM);
|
|
|
+ v1 = v1.stream().filter(r -> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
|
|
|
+ v1 = v1.subList(0, Math.min(mergeWeight.getOrDefault("v1", 5.0).intValue(), v1.size()));
|
|
|
+ rovRecallRank.addAll(v1);
|
|
|
+ setVideo.addAll(v1.stream().map(Video::getVideoId).collect(Collectors.toSet()));
|
|
|
+ //-------------------老内容召回------------------
|
|
|
+ List<Video> v2 = extractAndSort(param, RegionRealtimeRecallStrategyV7VovLongTermV1.PUSH_FORM);
|
|
|
+ v2 = v2.stream().filter(r -> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
|
|
|
+ v2 = v2.subList(0, Math.min(mergeWeight.getOrDefault("v2", 2.0).intValue(), v2.size()));
|
|
|
+ rovRecallRank.addAll(v2);
|
|
|
+ setVideo.addAll(v2.stream().map(Video::getVideoId).collect(Collectors.toSet()));
|
|
|
+
|
|
|
+ List<Video> v3 = extractAndSort(param, RegionRealtimeRecallStrategyV7VovLongTermV2.PUSH_FORM);
|
|
|
+ v3 = v3.stream().filter(r -> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
|
|
|
+ v3 = v3.subList(0, Math.min(mergeWeight.getOrDefault("v3", 2.0).intValue(), v3.size()));
|
|
|
+ rovRecallRank.addAll(v3);
|
|
|
+ setVideo.addAll(v3.stream().map(Video::getVideoId).collect(Collectors.toSet()));
|
|
|
+
|
|
|
+ List<Video> v4 = extractAndSort(param, RegionRealtimeRecallStrategyV7VovLongTermV3.PUSH_FORM);
|
|
|
+ v4 = v4.stream().filter(r -> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
|
|
|
+ v4 = v4.subList(0, Math.min(mergeWeight.getOrDefault("v4", 2.0).intValue(), v4.size()));
|
|
|
+ rovRecallRank.addAll(v4);
|
|
|
+ setVideo.addAll(v4.stream().map(Video::getVideoId).collect(Collectors.toSet()));
|
|
|
+
|
|
|
+ List<Video> v5 = extractAndSort(param, RegionRealtimeRecallStrategyV7VovLongTermV4.PUSH_FORM);
|
|
|
+ v5 = v5.stream().filter(r -> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
|
|
|
+ v5 = v5.subList(0, Math.min(mergeWeight.getOrDefault("v5", 2.0).intValue(), v5.size()));
|
|
|
+ rovRecallRank.addAll(v5);
|
|
|
+ setVideo.addAll(v5.stream().map(Video::getVideoId).collect(Collectors.toSet()));
|
|
|
+
|
|
|
+ //-------------------排-------------------
|
|
|
+ //-------------------序-------------------
|
|
|
+ //-------------------逻-------------------
|
|
|
+ //-------------------辑-------------------
|
|
|
+
|
|
|
+ // 1 批量获取特征 省份参数要对齐 headvid 要传递过来!
|
|
|
+ List<String> vids = CommonCollectionUtils.toListDistinct(rovRecallRank, v -> String.valueOf(v.getVideoId()));
|
|
|
+
|
|
|
+ // k1:视频、k2:表、k3:特征、v:特征值
|
|
|
+ String provinceCn = param.getProvince().replaceAll("省$", "");
|
|
|
+ String headVid = String.valueOf(param.getHeadVid());
|
|
|
+ FeatureService.Feature feature = featureService.getFeature(param.getMid(), vids,
|
|
|
+ String.valueOf(param.getAppType()), provinceCn, headVid);
|
|
|
+ Map<String, Map<String, String>> featureOriginUser = feature.getUserFeature();
|
|
|
+ Map<String, Map<String, Map<String, String>>> featureOriginVideo = feature.getVideoFeature();
|
|
|
+
|
|
|
+
|
|
|
+ // 2 特征处理
|
|
|
+ Map<String, Double> userFeatureMapDouble = new HashMap<>();
|
|
|
+ String mid = param.getMid();
|
|
|
+ Map<String, String> c1 = featureOriginUser.getOrDefault("alg_mid_feature_play", new HashMap<>());
|
|
|
+ Map<String, String> c2 = featureOriginUser.getOrDefault("alg_mid_feature_share_and_return", new HashMap<>());
|
|
|
+ Map<String, String> c3 = featureOriginUser.getOrDefault("alg_mid_feature_play_tags", new HashMap<>());
|
|
|
+ Map<String, String> c4 = featureOriginUser.getOrDefault("alg_mid_feature_return_tags", new HashMap<>());
|
|
|
+ Map<String, String> c5 = featureOriginUser.getOrDefault("alg_mid_feature_share_tags", new HashMap<>());
|
|
|
+ Map<String, String> c6 = featureOriginUser.getOrDefault("alg_mid_feature_feed_exp_share_tags_v2", new HashMap<>());
|
|
|
+ Map<String, String> c7 = featureOriginUser.getOrDefault("alg_mid_feature_feed_exp_return_tags_v2", new HashMap<>());
|
|
|
+ Map<String, String> c8 = featureOriginUser.getOrDefault("alg_mid_feature_sharecf", new HashMap<>());
|
|
|
+ Map<String, String> c9 = featureOriginUser.getOrDefault("alg_mid_feature_returncf", new HashMap<>());
|
|
|
+
|
|
|
+ if (!c1.isEmpty()) {
|
|
|
+ userFeatureMapDouble.put("playcnt_6h", Double.parseDouble(c1.getOrDefault("playcnt_6h", "0")));
|
|
|
+ userFeatureMapDouble.put("playcnt_1d", Double.parseDouble(c1.getOrDefault("playcnt_1d", "0")));
|
|
|
+ userFeatureMapDouble.put("playcnt_3d", Double.parseDouble(c1.getOrDefault("playcnt_3d", "0")));
|
|
|
+ userFeatureMapDouble.put("playcnt_7d", Double.parseDouble(c1.getOrDefault("playcnt_7d", "0")));
|
|
|
+ }
|
|
|
+ if (!c2.isEmpty()) {
|
|
|
+ userFeatureMapDouble.put("share_pv_12h", Double.parseDouble(c2.getOrDefault("share_pv_12h", "0")));
|
|
|
+ userFeatureMapDouble.put("share_pv_1d", Double.parseDouble(c2.getOrDefault("share_pv_1d", "0")));
|
|
|
+ userFeatureMapDouble.put("share_pv_3d", Double.parseDouble(c2.getOrDefault("share_pv_3d", "0")));
|
|
|
+ userFeatureMapDouble.put("share_pv_7d", Double.parseDouble(c2.getOrDefault("share_pv_7d", "0")));
|
|
|
+ userFeatureMapDouble.put("return_uv_12h", Double.parseDouble(c2.getOrDefault("return_uv_12h", "0")));
|
|
|
+ userFeatureMapDouble.put("return_uv_1d", Double.parseDouble(c2.getOrDefault("return_uv_1d", "0")));
|
|
|
+ userFeatureMapDouble.put("return_uv_3d", Double.parseDouble(c2.getOrDefault("return_uv_3d", "0")));
|
|
|
+ userFeatureMapDouble.put("return_uv_7d", Double.parseDouble(c2.getOrDefault("return_uv_7d", "0")));
|
|
|
+ }
|
|
|
+
|
|
|
+ Map<String, String> c34567Map = new HashMap<>(15);
|
|
|
+ List<Tuple2> tmpList0 = Arrays.asList(
|
|
|
+ new Tuple2(c3, "c3_feature"),
|
|
|
+ new Tuple2(c4, "c4_feature"),
|
|
|
+ new Tuple2(c5, "c5_feature"),
|
|
|
+ new Tuple2(c6, "c6_feature"),
|
|
|
+ new Tuple2(c7, "c7_feature")
|
|
|
+ );
|
|
|
+ for (Tuple2 tuple2 : tmpList0) {
|
|
|
+ for (String key_time : Arrays.asList("tags_1d", "tags_3d", "tags_7d")) {
|
|
|
+ String tags = tuple2.first.getOrDefault(key_time, "");
|
|
|
+ if (!tags.isEmpty()) {
|
|
|
+ c34567Map.put(tuple2.name + "_" + key_time, tags);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ Map<String, Map<String, String[]>> c89Map = new HashMap<>(4);
|
|
|
+ List<Tuple2> tmpList1 = Arrays.asList(
|
|
|
+ new Tuple2(c8, "c8_feature"),
|
|
|
+ new Tuple2(c9, "c9_feature")
|
|
|
+ );
|
|
|
+ for (Tuple2 tuple2 : tmpList1) {
|
|
|
+ for (String key_action : Arrays.asList("share", "return")) {
|
|
|
+ String cfListStr = tuple2.first.getOrDefault(key_action, "");
|
|
|
+ if (!cfListStr.isEmpty()) {
|
|
|
+ Map<String, String[]> cfMap = new HashMap<>();
|
|
|
+ String[] entries = cfListStr.split(",");
|
|
|
+ for (String entry : entries) {
|
|
|
+ String[] rList = entry.split(":");
|
|
|
+ if (rList.length >= 4) { // 确保分割后有四个元素
|
|
|
+ String key = rList[0];
|
|
|
+ String value1 = rList[1];
|
|
|
+ String value2 = rList[2];
|
|
|
+ String value3 = rList[3];
|
|
|
+ String[] strs = {value1, value2, value3};
|
|
|
+ cfMap.put(key, strs);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ c89Map.put(tuple2.name + "_" + key_action, cfMap);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ List<RankItem> rankItems = CommonCollectionUtils.toList(rovRecallRank, RankItem::new);
|
|
|
+ for (RankItem item : rankItems) {
|
|
|
+ Map<String, Double> featureMap = new HashMap<>();
|
|
|
+ String vid = item.getVideoId() + "";
|
|
|
+ Map<String, String> b1 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_all_exp_v2", new HashMap<>());
|
|
|
+ Map<String, String> b2 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_all_share", new HashMap<>());
|
|
|
+ Map<String, String> b3 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_all_return", new HashMap<>());
|
|
|
+ Map<String, String> b6 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_exp2share_v2", new HashMap<>());
|
|
|
+ Map<String, String> b7 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_share2return", new HashMap<>());
|
|
|
+
|
|
|
+ Map<String, String> b8 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_noflow_exp_v2", new HashMap<>());
|
|
|
+ Map<String, String> b9 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_noflow_root_share_v2", new HashMap<>());
|
|
|
+ Map<String, String> b10 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_noflow_root_return_v2", new HashMap<>());
|
|
|
+ Map<String, String> b11 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_flow_exp_v2", new HashMap<>());
|
|
|
+ Map<String, String> b12 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_flow_root_share_v2", new HashMap<>());
|
|
|
+ Map<String, String> b13 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_flow_root_return_v2", new HashMap<>());
|
|
|
+ Map<String, String> b17 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_province_exp_v2", new HashMap<>());
|
|
|
+ Map<String, String> b18 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_province_root_share_v2", new HashMap<>());
|
|
|
+ Map<String, String> b19 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_province_root_return_v2", new HashMap<>());
|
|
|
+
|
|
|
+ List<Tuple4> originData = Arrays.asList(
|
|
|
+ new Tuple4(b1, b2, b3, "b123"),
|
|
|
+ new Tuple4(b1, b6, b7, "b167"),
|
|
|
+ new Tuple4(b8, b9, b10, "b8910"),
|
|
|
+ new Tuple4(b11, b12, b13, "b111213"),
|
|
|
+ new Tuple4(b17, b18, b19, "b171819")
|
|
|
+ );
|
|
|
+
|
|
|
+ for (Tuple4 tuple4 : originData) {
|
|
|
+ for (String prefix2 : Arrays.asList("1h", "2h", "3h", "4h", "12h", "1d", "3d", "7d")) {
|
|
|
+ double exp = tuple4.first.isEmpty() ? 0 : Double.parseDouble(tuple4.first.getOrDefault("exp_pv_" + prefix2, "0.0"));
|
|
|
+ double share = tuple4.second.isEmpty() ? 0 : Double.parseDouble(tuple4.second.getOrDefault("share_pv_" + prefix2, "0.0"));
|
|
|
+ double returns = tuple4.third.isEmpty() ? 0 : Double.parseDouble(tuple4.third.getOrDefault("return_uv_" + prefix2, "0.0"));
|
|
|
+
|
|
|
+ double f1 = ExtractorUtils.calDiv(share, exp);
|
|
|
+ double f2 = ExtractorUtils.calLog(share);
|
|
|
+ double f3 = ExtractorUtils.calDiv(returns, exp);
|
|
|
+ double f4 = ExtractorUtils.calLog(returns);
|
|
|
+ double f5 = f3 * f4;
|
|
|
+
|
|
|
+ String key1 = tuple4.name + "_" + prefix2 + "_" + "STR";
|
|
|
+ String key2 = tuple4.name + "_" + prefix2 + "_" + "log(share)";
|
|
|
+ String key3 = tuple4.name + "_" + prefix2 + "_" + "ROV";
|
|
|
+ String key4 = tuple4.name + "_" + prefix2 + "_" + "log(return)";
|
|
|
+ String key5 = tuple4.name + "_" + prefix2 + "_" + "ROV*log(return)";
|
|
|
+
|
|
|
+ featureMap.put(key1, f1);
|
|
|
+ featureMap.put(key2, f2);
|
|
|
+ featureMap.put(key3, f3);
|
|
|
+ featureMap.put(key4, f4);
|
|
|
+ featureMap.put(key5, f5);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ Map<String, String> videoInfo = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_basic_info", new HashMap<>());
|
|
|
+ featureMap.put("total_time", Double.parseDouble(videoInfo.getOrDefault("total_time", "0")));
|
|
|
+ featureMap.put("bit_rate", Double.parseDouble(videoInfo.getOrDefault("bit_rate", "0")));
|
|
|
+
|
|
|
+ String title = videoInfo.getOrDefault("title", "");
|
|
|
+ if (!title.isEmpty()) {
|
|
|
+ if (similarityConcurrent) {
|
|
|
+ List<Future<Pair<String, Double[]>>> futures = new ArrayList<>();
|
|
|
+ for (String name : Arrays.asList("c3_feature", "c4_feature", "c5_feature", "c6_feature", "c7_feature")) {
|
|
|
+ for (String key_time : Arrays.asList("tags_1d", "tags_3d", "tags_7d")) {
|
|
|
+ String key = name + "_" + key_time;
|
|
|
+ String tags = c34567Map.getOrDefault(key, "");
|
|
|
+ if (!tags.isEmpty()) {
|
|
|
+ Future<Pair<String, Double[]>> future = ThreadPoolFactory.defaultPool().submit(() -> {
|
|
|
+ Double[] doubles = null;
|
|
|
+ if (param.getAbExpCodes().contains(word2vecExp)) {
|
|
|
+ doubles = ExtractorUtils.funcC34567ForTagsNew(tags, title);
|
|
|
+ } else {
|
|
|
+ doubles = ExtractorUtils.funcC34567ForTags(tags, title);
|
|
|
+ }
|
|
|
+ return Pair.create(key, doubles);
|
|
|
+ });
|
|
|
+ futures.add(future);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ for (Future<Pair<String, Double[]>> future : futures) {
|
|
|
+ Pair<String, Double[]> pair = future.get(1000, TimeUnit.MILLISECONDS);
|
|
|
+ featureMap.put(pair.getFirst() + "_matchnum", pair.getSecond()[0]);
|
|
|
+ featureMap.put(pair.getFirst() + "_maxscore", pair.getSecond()[1]);
|
|
|
+ featureMap.put(pair.getFirst() + "_avgscore", pair.getSecond()[2]);
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("concurrent similarity error", e);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ for (String name : Arrays.asList("c3_feature", "c4_feature", "c5_feature", "c6_feature", "c7_feature")) {
|
|
|
+ for (String key_time : Arrays.asList("tags_1d", "tags_3d", "tags_7d")) {
|
|
|
+ String tags = c34567Map.getOrDefault(name + "_" + key_time, "");
|
|
|
+ if (!tags.isEmpty()) {
|
|
|
+ Double[] doubles = null;
|
|
|
+ if (param.getAbExpCodes().contains(word2vecExp)) {
|
|
|
+ doubles = ExtractorUtils.funcC34567ForTagsNew(tags, title);
|
|
|
+ } else {
|
|
|
+ doubles = ExtractorUtils.funcC34567ForTags(tags, title);
|
|
|
+ }
|
|
|
+ featureMap.put(name + "_" + key_time + "_matchnum", doubles[0]);
|
|
|
+ featureMap.put(name + "_" + key_time + "_maxscore", doubles[1]);
|
|
|
+ featureMap.put(name + "_" + key_time + "_avgscore", doubles[2]);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!vid.isEmpty()) {
|
|
|
+ for (String key_feature : Arrays.asList("c8_feature", "c9_feature")) {
|
|
|
+ for (String key_action : Arrays.asList("share", "return")) {
|
|
|
+ Map<String, String[]> cfMap = c89Map.getOrDefault(key_feature + "_" + key_action, new HashMap<>());
|
|
|
+ if (cfMap.containsKey(vid)) {
|
|
|
+ String[] scores = cfMap.get(vid);
|
|
|
+ Double score1 = Double.parseDouble(scores[0]);
|
|
|
+ Double score2 = Double.parseDouble(scores[1]);
|
|
|
+ Double score3 = Double.parseDouble(scores[2]) <= 0 ? 0D : 1.0 / Double.parseDouble(scores[2]);
|
|
|
+ featureMap.put(key_feature + "_" + key_action + "_score", score1);
|
|
|
+ featureMap.put(key_feature + "_" + key_action + "_num", score2);
|
|
|
+ featureMap.put(key_feature + "_" + key_action + "_rank", score3);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ Map<String, String> d1 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_recsys_feature_cf_i2i_new_v2", new HashMap<>());
|
|
|
+ if (!d1.isEmpty()) {
|
|
|
+ featureMap.put("d1_exp", Double.parseDouble(d1.getOrDefault("exp", "0")));
|
|
|
+ featureMap.put("d1_return_n", Double.parseDouble(d1.getOrDefault("return_n", "0")));
|
|
|
+ featureMap.put("d1_rovn", Double.parseDouble(d1.getOrDefault("rovn", "0")));
|
|
|
+ }
|
|
|
+ item.featureMapDouble = featureMap;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 3 连续值特征分桶
|
|
|
+ readBucketFile();
|
|
|
+ Map<String, String> userFeatureMap = new HashMap<>(userFeatureMapDouble.size());
|
|
|
+ for (Map.Entry<String, Double> entry : userFeatureMapDouble.entrySet()) {
|
|
|
+ String name = entry.getKey();
|
|
|
+ Double score = entry.getValue();
|
|
|
+ // 注意:0值、不在分桶文件中的特征,会被过滤掉。
|
|
|
+ if (score > 1E-8 && this.bucketsLen.containsKey(name) && this.bucketsMap.containsKey(name)) {
|
|
|
+ Double bucketNum = this.bucketsLen.get(name);
|
|
|
+ double[] buckets = this.bucketsMap.get(name);
|
|
|
+ Double scoreNew = 1.0 / bucketNum * (ExtractorUtils.findInsertPosition(buckets, score) + 1.0);
|
|
|
+ userFeatureMap.put(name, String.valueOf(scoreNew));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ for (RankItem item : rankItems) {
|
|
|
+ Map<String, String> featureMap = new HashMap<>();
|
|
|
+ Map<String, Double> featureMapDouble = item.featureMapDouble;
|
|
|
+
|
|
|
+ for (Map.Entry<String, Double> entry : featureMapDouble.entrySet()) {
|
|
|
+ String name = entry.getKey();
|
|
|
+ Double score = entry.getValue();
|
|
|
+ // 注意:0值、不在分桶文件中的特征,会被过滤掉。
|
|
|
+ if (score > 1E-8 && this.bucketsLen.containsKey(name) && this.bucketsMap.containsKey(name)) {
|
|
|
+ Double bucketNum = this.bucketsLen.get(name);
|
|
|
+ double[] buckets = this.bucketsMap.get(name);
|
|
|
+ Double scoreNew = 1.0 / bucketNum * (ExtractorUtils.findInsertPosition(buckets, score) + 1.0);
|
|
|
+ featureMap.put(name, String.valueOf(scoreNew));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ item.featureMap = featureMap;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 3 排序
|
|
|
+ Map<String, String> sceneFeatureMap = new HashMap<>(0);
|
|
|
+
|
|
|
+ List<RankItem> items = ScorerUtils.getScorerPipeline("feeds_score_config_20240807.conf")
|
|
|
+ .scoring(sceneFeatureMap, userFeatureMap, rankItems);
|
|
|
+
|
|
|
+
|
|
|
+ // 获取VoV预测模型参数
|
|
|
+ // 融合权重
|
|
|
+ double alpha_vov = mergeWeight.getOrDefault("alpha_vov", 1.0);
|
|
|
+
|
|
|
+ double vov_thresh = mergeWeight.getOrDefault("vov_thresh", 0.1);
|
|
|
+
|
|
|
+ double view_thresh = mergeWeight.getOrDefault("view_thresh", 1535.0);
|
|
|
+
|
|
|
+ double level50_vov = mergeWeight.getOrDefault("level50_vov", 0.123);
|
|
|
+
|
|
|
+ double level_95_vov = mergeWeight.getOrDefault("level_95_vov", 0.178);
|
|
|
+
|
|
|
+ double beta_vov = mergeWeight.getOrDefault("beta_vov", 100.0);
|
|
|
+
|
|
|
+ List<Double> weightList = new ArrayList<>(7);
|
|
|
+ weightList.add(mergeWeight.getOrDefault("d2_ago_vov_w", 0.0));
|
|
|
+ weightList.add(mergeWeight.getOrDefault("d1_ago_vov_w", 0.0));
|
|
|
+ weightList.add(mergeWeight.getOrDefault("h48_ago_vov_w", 0.0));
|
|
|
+ weightList.add(mergeWeight.getOrDefault("h24_ago_vov_w", 0.0));
|
|
|
+ weightList.add(mergeWeight.getOrDefault("h3_ago_vov_w", 0.0));
|
|
|
+ weightList.add(mergeWeight.getOrDefault("h2_ago_vov_w", 0.0));
|
|
|
+ weightList.add(mergeWeight.getOrDefault("h1_ago_vov_w", 0.0));
|
|
|
+
|
|
|
+
|
|
|
+ Map<String, Map<String, String>> vid2MapFeature = this.getVideoRedisFeature(vids, "redis:vid_hasreturn_rov:");
|
|
|
+ Map<String, Map<String, String>> vid2VovFeatureMap = this.getVideoRedisFeature(vids, "redis:vid_vovhour4rank:");
|
|
|
+ List<Video> result = new ArrayList<>();
|
|
|
+// String hasReturnRovKey = mergeWeight.getOrDefault("hasReturnRovKey", 1.0) < 0.5 ? "rate_1" : "rate_n";
|
|
|
+// Double chooseFunction = mergeWeight.getOrDefault("chooseFunction", 0.0);
|
|
|
+
|
|
|
+ for (RankItem item : items) {
|
|
|
+ double score = 0.0;
|
|
|
+ // 获取其他模型输出score
|
|
|
+ double fmRovOrigin = item.getScoreRov();
|
|
|
+ item.getScoresMap().put("fmRovOrigin", fmRovOrigin);
|
|
|
+ double fmRov = restoreScore(fmRovOrigin);
|
|
|
+ item.getScoresMap().put("fmRov", fmRov);
|
|
|
+
|
|
|
+
|
|
|
+ // 获取VoV输入特征
|
|
|
+ double h1_ago_vov = Double.parseDouble(vid2VovFeatureMap.getOrDefault(item.getVideoId() + "", new HashMap<>())
|
|
|
+ .getOrDefault("h1_ago_vov", "-2")); // 如果没有时,默认为多少?? 需要考虑
|
|
|
+ double h2_ago_vov = Double.parseDouble(vid2VovFeatureMap.getOrDefault(item.getVideoId() + "", new HashMap<>())
|
|
|
+ .getOrDefault("h2_ago_vov", "-2")); // 如果没有时,默认为多少?? 需要考虑
|
|
|
+ double h3_ago_vov = Double.parseDouble(vid2VovFeatureMap.getOrDefault(item.getVideoId() + "", new HashMap<>())
|
|
|
+ .getOrDefault("h3_ago_vov", "-2")); // 如果没有时,默认为多少?? 需要考虑
|
|
|
+ double h24_ago_vov = Double.parseDouble(vid2VovFeatureMap.getOrDefault(item.getVideoId() + "", new HashMap<>())
|
|
|
+ .getOrDefault("h24_ago_vov", "-2")); // 如果没有时,默认为多少?? 需要考虑
|
|
|
+ double h48_ago_vov = Double.parseDouble(vid2VovFeatureMap.getOrDefault(item.getVideoId() + "", new HashMap<>())
|
|
|
+ .getOrDefault("h48_ago_vov", "-2")); // 如果没有时,默认为多少?? 需要考虑
|
|
|
+ double d1_ago_vov = Double.parseDouble(vid2VovFeatureMap.getOrDefault(item.getVideoId() + "", new HashMap<>())
|
|
|
+ .getOrDefault("d1_ago_vov", "-2")); // 如果没有时,默认为多少?? 需要考虑
|
|
|
+ double d2_ago_vov = Double.parseDouble(vid2VovFeatureMap.getOrDefault(item.getVideoId() + "", new HashMap<>())
|
|
|
+ .getOrDefault("d2_ago_vov", "-2")); // 如果没有时,默认为多少?? 需要考虑
|
|
|
+
|
|
|
+ double h1_ago_view = Double.parseDouble(vid2VovFeatureMap.getOrDefault(item.getVideoId() + "", new HashMap<>())
|
|
|
+ .getOrDefault("h1_ago_view", "-2")); // 如果没有时,默认为多少?? 需要考虑
|
|
|
+
|
|
|
+ item.getScoresMap().put("h1_ago_vov", h1_ago_vov);
|
|
|
+ item.getScoresMap().put("h2_ago_vov", h2_ago_vov);
|
|
|
+ item.getScoresMap().put("h3_ago_vov", h3_ago_vov);
|
|
|
+ item.getScoresMap().put("h24_ago_vov", h24_ago_vov);
|
|
|
+ item.getScoresMap().put("h48_ago_vov", h48_ago_vov);
|
|
|
+ item.getScoresMap().put("d1_ago_vov", d1_ago_vov);
|
|
|
+ item.getScoresMap().put("d2_ago_vov", d2_ago_vov);
|
|
|
+
|
|
|
+ item.getScoresMap().put("h1_ago_view", h1_ago_view);
|
|
|
+ item.getScoresMap().put("alpha_vov", alpha_vov);
|
|
|
+ item.getScoresMap().put("view_thresh", view_thresh);
|
|
|
+ item.getScoresMap().put("vov_thresh", vov_thresh);
|
|
|
+
|
|
|
+
|
|
|
+ List<Double> featureList = new ArrayList<>(7);
|
|
|
+ featureList.add(d2_ago_vov);
|
|
|
+ featureList.add(d1_ago_vov);
|
|
|
+ featureList.add(h48_ago_vov);
|
|
|
+ featureList.add(h24_ago_vov);
|
|
|
+ featureList.add(h3_ago_vov);
|
|
|
+ featureList.add(h2_ago_vov);
|
|
|
+ featureList.add(h1_ago_vov);
|
|
|
+
|
|
|
+ // todo 线性加权 预测VoV
|
|
|
+
|
|
|
+
|
|
|
+ double vov_p = calculateScore(featureList, weightList, item, vov_thresh, view_thresh, h1_ago_view, level50_vov, level_95_vov, beta_vov);
|
|
|
+
|
|
|
+
|
|
|
+ double hasReturnRovScore = Double.parseDouble(vid2MapFeature.getOrDefault(item.getVideoId() + "", new HashMap<>())
|
|
|
+ .getOrDefault("rate_n", "0"));
|
|
|
+ item.getScoresMap().put("hasReturnRovScore", hasReturnRovScore);
|
|
|
+ score = fmRov * (1 + hasReturnRovScore) * (1.0 + alpha_vov * vov_p);
|
|
|
+
|
|
|
+
|
|
|
+ item.getScoresMap().put("vov_p", vov_p);
|
|
|
+
|
|
|
+ Video video = item.getVideo();
|
|
|
+ video.setScore(score);
|
|
|
+ video.setSortScore(score);
|
|
|
+ video.setScoresMap(item.getScoresMap());
|
|
|
+ video.setAllFeatureMap(item.getAllFeatureMap());
|
|
|
+ if (feature != null
|
|
|
+ && MapUtils.isNotEmpty(feature.getVideoFeature())
|
|
|
+ && MapUtils.isNotEmpty(feature.getVideoFeature().get(item.getVideoId() + ""))) {
|
|
|
+ video.getMetaFeatureMap().putAll(feature.getVideoFeature().get(item.getVideoId() + ""));
|
|
|
+ }
|
|
|
+ if (feature != null
|
|
|
+ && MapUtils.isNotEmpty(feature.getUserFeature())) {
|
|
|
+ video.getMetaFeatureMap().putAll(feature.getUserFeature());
|
|
|
+ }
|
|
|
+ result.add(video);
|
|
|
+ }
|
|
|
+ result.sort(Comparator.comparingDouble(o -> -o.getSortScore()));
|
|
|
+
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ private double calculateScore(List<Double> featureList, List<Double> weightList, RankItem rankItem,
|
|
|
+ double vov_thresh, double view_thresh, double h1_ago_view, double level50_vov, double level_95_vov, double beta_vov) {
|
|
|
+ // 检查 h1_ago_view 条件
|
|
|
+ if (h1_ago_view == -2 || h1_ago_view == -1 || h1_ago_view < view_thresh) {
|
|
|
+ rankItem.getScoresMap().put("origin_vov_p", 0d);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ // // 检查 featureList 是否全为 -1
|
|
|
+ // if (featureList.stream().allMatch(f -> f == -1)) {
|
|
|
+ // rankItem.getScoresMap().put("origin_vov_p", 0d);
|
|
|
+ // return 0;
|
|
|
+ // }
|
|
|
+
|
|
|
+ // 计算有效特征的总权重和得分
|
|
|
+ double score = 0;
|
|
|
+ List<Integer> validIndices = new ArrayList<>();
|
|
|
+
|
|
|
+ for (int i = 0; i < featureList.size(); i++) {
|
|
|
+ if (featureList.get(i) != -1) {
|
|
|
+ validIndices.add(i);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 如果没有有效特征,返回 0
|
|
|
+ if (validIndices.isEmpty()) {
|
|
|
+ rankItem.getScoresMap().put("origin_vov_p", 0d);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 计算得分,动态调整权重
|
|
|
+ for (int index : validIndices) {
|
|
|
+ double weight = weightList.get(index);
|
|
|
+ score += featureList.get(index) * weight;
|
|
|
+ }
|
|
|
+ rankItem.getScoresMap().put("origin_vov_p", score);
|
|
|
+ // 调整vov
|
|
|
+ if (score < vov_thresh) {
|
|
|
+ score = 0;
|
|
|
+ } else {
|
|
|
+ double term1 = 1 / (1 + Math.exp(-1 * beta_vov * (score - level50_vov)));
|
|
|
+ double term2 = 1 + Math.exp(-1 * beta_vov * (level_95_vov - level50_vov));
|
|
|
+ score = term1 * term2;
|
|
|
+ }
|
|
|
+ return score;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+}
|