|
@@ -4,60 +4,38 @@ import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
|
|
|
import com.google.common.reflect.TypeToken;
|
|
|
import com.tzld.piaoquan.recommend.server.common.base.RankItem;
|
|
|
import com.tzld.piaoquan.recommend.server.model.Video;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.FeatureService;
|
|
|
import com.tzld.piaoquan.recommend.server.service.rank.RankParam;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.rank.RankResult;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.rank.RankService;
|
|
|
import com.tzld.piaoquan.recommend.server.service.rank.extractor.ExtractorUtils;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.rank.extractor.RankExtractorItemFeature;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.rank.extractor.RankExtractorItemTags;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.rank.extractor.RankExtractorUserFeature;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.rank.processor.RankProcessorBoost;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.rank.processor.RankProcessorDensity;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.rank.processor.RankProcessorInsert;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.rank.processor.RankProcessorTagFilter;
|
|
|
import com.tzld.piaoquan.recommend.server.service.recall.strategy.*;
|
|
|
import com.tzld.piaoquan.recommend.server.service.score.ScorerUtils;
|
|
|
import com.tzld.piaoquan.recommend.server.util.CommonCollectionUtils;
|
|
|
import com.tzld.piaoquan.recommend.server.util.JSONUtils;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
-import org.apache.commons.collections4.CollectionUtils;
|
|
|
-import org.apache.commons.lang3.RandomUtils;
|
|
|
-import org.springframework.data.redis.connection.RedisConnectionFactory;
|
|
|
-import org.springframework.data.redis.connection.RedisStandaloneConfiguration;
|
|
|
-import org.springframework.data.redis.connection.jedis.JedisConnectionFactory;
|
|
|
-import org.springframework.data.redis.core.RedisTemplate;
|
|
|
-import org.springframework.data.redis.serializer.StringRedisSerializer;
|
|
|
+import org.apache.commons.collections4.MapUtils;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
|
|
|
-import java.text.SimpleDateFormat;
|
|
|
+import java.io.BufferedReader;
|
|
|
+import java.io.IOException;
|
|
|
+import java.io.InputStream;
|
|
|
+import java.io.InputStreamReader;
|
|
|
import java.util.*;
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
|
-/**
|
|
|
- * @author zhangbo
|
|
|
- * @desc 地域召回融合 流量池汤姆森
|
|
|
- */
|
|
|
@Service
|
|
|
@Slf4j
|
|
|
-public class RankStrategy4RegionMergeModelV563 extends RankService {
|
|
|
+public class RankStrategy4RegionMergeModelV563 extends RankStrategy4RegionMergeModelBasic {
|
|
|
@ApolloJsonValue("${rank.score.merge.weightv563:}")
|
|
|
private Map<String, Double> mergeWeight;
|
|
|
- @ApolloJsonValue("${RankStrategy4DensityFilterV2:}")
|
|
|
- private Map<String, Map<String, Map<String, String>>> filterRules = new HashMap<>();
|
|
|
- final private String CLASS_NAME = this.getClass().getSimpleName();
|
|
|
-
|
|
|
- public void duplicate(Set<Long> setVideo, List<Video> videos) {
|
|
|
- Iterator<Video> iterator = videos.iterator();
|
|
|
- while (iterator.hasNext()) {
|
|
|
- Video v = iterator.next();
|
|
|
- if (setVideo.contains(v.getVideoId())) {
|
|
|
- iterator.remove();
|
|
|
- } else {
|
|
|
- setVideo.add(v.getVideoId());
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
|
|
|
+ @Autowired
|
|
|
+ private FeatureService featureService;
|
|
|
+
|
|
|
+ Map<String, double[]> bucketsMap = new HashMap<>();
|
|
|
+ Map<String, Double> bucketsLen = new HashMap<>();
|
|
|
+
|
|
|
+ String CLASS_NAME = this.getClass().getSimpleName();
|
|
|
@Override
|
|
|
public List<Video> mergeAndRankRovRecall(RankParam param) {
|
|
|
Map<String, Double> mergeWeight = this.mergeWeight != null ? this.mergeWeight : new HashMap<>(0);
|
|
@@ -72,620 +50,381 @@ public class RankStrategy4RegionMergeModelV563 extends RankService {
|
|
|
oldRovs.addAll(extractAndSort(param, Region24HRecallStrategy.PUSH_FORM));
|
|
|
oldRovs.addAll(extractAndSort(param, RegionRelative24HRecallStrategy.PUSH_FORM));
|
|
|
oldRovs.addAll(extractAndSort(param, RegionRelative24HDupRecallStrategy.PUSH_FORM));
|
|
|
- int sizeReturn = param.getSize();
|
|
|
removeDuplicate(oldRovs);
|
|
|
+ int sizeReturn = param.getSize();
|
|
|
List<Video> v0 = oldRovs.size() <= sizeReturn
|
|
|
? oldRovs
|
|
|
: oldRovs.subList(0, sizeReturn);
|
|
|
-
|
|
|
- //-------------------地域相关召回 融合+去重-------------------
|
|
|
- List<Video> v1 = extractAndSort(param, RegionRealtimeRecallStrategyV1.PUSH_FORM);
|
|
|
- List<Video> v2 = extractAndSort(param, RegionRealtimeRecallStrategyV2_sort.PUSH_FORM);
|
|
|
- List<Video> v3 = extractAndSort(param, RegionRealtimeRecallStrategyV3.PUSH_FORM);
|
|
|
- List<Video> v4 = extractAndSort(param, RegionRealtimeRecallStrategyV4.PUSH_FORM);
|
|
|
- //-------------------相关性召回 融合+去重-------------------
|
|
|
- List<Video> v5 = extractAndSort(param, SimHotVideoRecallStrategy.PUSH_FORM);
|
|
|
- List<Video> v6 = extractAndSort(param, ReturnVideoRecallStrategy.PUSH_FORM);
|
|
|
- //-------------------节日扶持召回 融合+去重-------------------
|
|
|
- List<Video> v7 = extractAndSort(param, FestivalRecallStrategyV1.PUSH_FORM);
|
|
|
- //-------------------流量池回捞-------------------
|
|
|
- List<Video> v9 = extractAndSort(param, FlowPoolLastDayTopRecallStrategy.PUSH_FORM);
|
|
|
-
|
|
|
- // 去重
|
|
|
Set<Long> setVideo = new HashSet<>();
|
|
|
this.duplicate(setVideo, v0);
|
|
|
- this.duplicate(setVideo, v5);
|
|
|
- this.duplicate(setVideo, v6);
|
|
|
- this.duplicate(setVideo, v4);
|
|
|
- this.duplicate(setVideo, v3);
|
|
|
- this.duplicate(setVideo, v2);
|
|
|
- this.duplicate(setVideo, v9);
|
|
|
- this.duplicate(setVideo, v1);
|
|
|
- this.duplicate(setVideo, v7);
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
- List<Video> rovRecallRank = new ArrayList<>();
|
|
|
- rovRecallRank.addAll(v0);
|
|
|
- rovRecallRank.addAll(v1.subList(0, Math.min(mergeWeight.getOrDefault("v1", 25.0).intValue(), v1.size())));
|
|
|
- rovRecallRank.addAll(v2.subList(0, Math.min(mergeWeight.getOrDefault("v2", 15.0).intValue(), v2.size())));
|
|
|
- rovRecallRank.addAll(v3.subList(0, Math.min(mergeWeight.getOrDefault("v3", 10.0).intValue(), v3.size())));
|
|
|
- rovRecallRank.addAll(v4.subList(0, Math.min(mergeWeight.getOrDefault("v4", 5.0).intValue(), v4.size())));
|
|
|
- rovRecallRank.addAll(v5.subList(0, Math.min(mergeWeight.getOrDefault("v5", 5.0).intValue(), v5.size())));
|
|
|
- rovRecallRank.addAll(v6.subList(0, Math.min(mergeWeight.getOrDefault("v6", 5.0).intValue(), v6.size())));
|
|
|
- rovRecallRank.addAll(v7.subList(0, Math.min(mergeWeight.getOrDefault("v7", 5.0).intValue(), v7.size())));
|
|
|
- rovRecallRank.addAll(v9.subList(0, Math.min(mergeWeight.getOrDefault("v9", 5.0).intValue(), v9.size())));
|
|
|
+ setVideo.addAll(v0.stream().map(Video::getVideoId).collect(Collectors.toSet()));
|
|
|
+ List<Video> rovRecallRank = new ArrayList<>(v0);
|
|
|
+ //-------------------return相似召回------------------
|
|
|
+ List<Video> v6 = extractAndSort(param, ReturnVideoRecallStrategy.PUSH_FORM);
|
|
|
+ v6 = v6.stream().filter(r-> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
|
|
|
+ v6 = v6.subList(0, Math.min(mergeWeight.getOrDefault("v6", 5.0).intValue(), v6.size()));
|
|
|
+ rovRecallRank.addAll(v6);
|
|
|
+ setVideo.addAll(v6.stream().map(Video::getVideoId).collect(Collectors.toSet()));
|
|
|
+ //-------------------新地域召回------------------
|
|
|
+ List<Video> v1 = extractAndSort(param, RegionRealtimeRecallStrategyV1.PUSH_FORM);
|
|
|
+ v1 = v1.stream().filter(r-> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
|
|
|
+ v1 = v1.subList(0, Math.min(mergeWeight.getOrDefault("v1", 5.0).intValue(), v1.size()));
|
|
|
+ rovRecallRank.addAll(v1);
|
|
|
+ setVideo.addAll(v1.stream().map(Video::getVideoId).collect(Collectors.toSet()));
|
|
|
+
|
|
|
|
|
|
//-------------------排-------------------
|
|
|
//-------------------序-------------------
|
|
|
//-------------------逻-------------------
|
|
|
//-------------------辑-------------------
|
|
|
|
|
|
- // 1 模型分
|
|
|
- List<String> rtFeaPart = new ArrayList<>();
|
|
|
- List<RankItem> items = model(rovRecallRank, param, rtFeaPart);
|
|
|
- List<String> rtFeaPartKey = new ArrayList<>(Arrays.asList("item_rt_fea_1day_partition", "item_rt_fea_1h_partition"));
|
|
|
- List<String> rtFeaPartKeyResult = this.redisTemplate.opsForValue().multiGet(rtFeaPartKey);
|
|
|
- Calendar calendar = Calendar.getInstance();
|
|
|
- String date = new SimpleDateFormat("yyyyMMdd").format(calendar.getTime());
|
|
|
- String hour = new SimpleDateFormat("HH").format(calendar.getTime());
|
|
|
- String rtFeaPart1h = date + hour;
|
|
|
- if (rtFeaPartKeyResult != null) {
|
|
|
- if (rtFeaPartKeyResult.get(1) != null) {
|
|
|
- rtFeaPart1h = rtFeaPartKeyResult.get(1);
|
|
|
- }
|
|
|
- }
|
|
|
- // 2 统计分
|
|
|
- String cur = rtFeaPart1h;
|
|
|
- List<String> datehours = new LinkedList<>(); // 时间是倒叙的
|
|
|
- for (int i = 0; i < 24; ++i) {
|
|
|
- datehours.add(cur);
|
|
|
- cur = ExtractorUtils.subtractHours(cur, 1);
|
|
|
+ // TODO 1 批量获取特征 省份参数要对齐 headvid 要传递过来!
|
|
|
+ List<String> vids = CommonCollectionUtils.toListDistinct(rovRecallRank, v -> String.valueOf(v.getVideoId()));
|
|
|
+
|
|
|
+ // k1:视频、k2:表、k3:特征、v:特征值
|
|
|
+ String provinceCn = param.getProvince().replaceAll("省$", "");
|
|
|
+ String headVid = String.valueOf(param.getHeadVid());
|
|
|
+ FeatureService.Feature feature = featureService.getFeature(param.getMid(), vids,
|
|
|
+ String.valueOf(param.getAppType()), provinceCn, headVid);
|
|
|
+ Map<String, Map<String, String>> featureOriginUser = feature.getUserFeature();
|
|
|
+ Map<String, Map<String, Map<String, String>>> featureOriginVideo = feature.getVideoFeature();
|
|
|
+
|
|
|
+
|
|
|
+ // TODO 2 特征处理
|
|
|
+ Map<String, Double> userFeatureMapDouble = new HashMap<>();
|
|
|
+ String mid = param.getMid();
|
|
|
+ Map<String, String> c1 = featureOriginUser.getOrDefault("alg_mid_feature_play", new HashMap<>());
|
|
|
+ Map<String, String> c2 = featureOriginUser.getOrDefault("alg_mid_feature_share_and_return", new HashMap<>());
|
|
|
+ Map<String, String> c3 = featureOriginUser.getOrDefault("alg_mid_feature_play_tags", new HashMap<>());
|
|
|
+ Map<String, String> c4 = featureOriginUser.getOrDefault("alg_mid_feature_return_tags", new HashMap<>());
|
|
|
+ Map<String, String> c5 = featureOriginUser.getOrDefault("alg_mid_feature_share_tags", new HashMap<>());
|
|
|
+ Map<String, String> c6 = featureOriginUser.getOrDefault("alg_mid_feature_feed_exp_share_tags", new HashMap<>());
|
|
|
+ Map<String, String> c7 = featureOriginUser.getOrDefault("alg_mid_feature_feed_exp_return_tags", new HashMap<>());
|
|
|
+ Map<String, String> c8 = featureOriginUser.getOrDefault("alg_mid_feature_sharecf", new HashMap<>());
|
|
|
+ Map<String, String> c9 = featureOriginUser.getOrDefault("alg_mid_feature_returncf", new HashMap<>());
|
|
|
+
|
|
|
+ if (!c1.isEmpty()) {
|
|
|
+ userFeatureMapDouble.put("playcnt_6h", Double.parseDouble(c1.getOrDefault("playcnt_6h", "0")));
|
|
|
+ userFeatureMapDouble.put("playcnt_1d", Double.parseDouble(c1.getOrDefault("playcnt_1d", "0")));
|
|
|
+ userFeatureMapDouble.put("playcnt_3d", Double.parseDouble(c1.getOrDefault("playcnt_3d", "0")));
|
|
|
+ userFeatureMapDouble.put("playcnt_7d", Double.parseDouble(c1.getOrDefault("playcnt_7d", "0")));
|
|
|
}
|
|
|
- List<String> datehoursRoot = new LinkedList<>();
|
|
|
- for (int i = 0; i < 24; ++i) {
|
|
|
- datehoursRoot.add(String.valueOf(i+1));
|
|
|
+ if (!c2.isEmpty()) {
|
|
|
+ userFeatureMapDouble.put("share_pv_12h", Double.parseDouble(c2.getOrDefault("share_pv_12h", "0")));
|
|
|
+ userFeatureMapDouble.put("share_pv_1d", Double.parseDouble(c2.getOrDefault("share_pv_1d", "0")));
|
|
|
+ userFeatureMapDouble.put("share_pv_3d", Double.parseDouble(c2.getOrDefault("share_pv_3d", "0")));
|
|
|
+ userFeatureMapDouble.put("share_pv_7d", Double.parseDouble(c2.getOrDefault("share_pv_7d", "0")));
|
|
|
+ userFeatureMapDouble.put("return_uv_12h", Double.parseDouble(c2.getOrDefault("return_uv_12h", "0")));
|
|
|
+ userFeatureMapDouble.put("return_uv_1d", Double.parseDouble(c2.getOrDefault("return_uv_1d", "0")));
|
|
|
+ userFeatureMapDouble.put("return_uv_3d", Double.parseDouble(c2.getOrDefault("return_uv_3d", "0")));
|
|
|
+ userFeatureMapDouble.put("return_uv_7d", Double.parseDouble(c2.getOrDefault("return_uv_7d", "0")));
|
|
|
}
|
|
|
- // 2.1 item特征提取
|
|
|
- this.getVideoFeatureFromRedis(items);
|
|
|
|
|
|
-
|
|
|
- for (RankItem item : items) {
|
|
|
- Map<String, Map<String, Double>> itemRealRootMap = item.getItemRealTimeRootFeature();
|
|
|
- List<Double> views_20240410 = getStaticData(itemRealRootMap, datehoursRoot, "exp");
|
|
|
- List<Double> share_20240410 = getStaticData(itemRealRootMap, datehoursRoot, "share");
|
|
|
- List<Double> return_20240410 = getStaticData(itemRealRootMap, datehoursRoot, "return");
|
|
|
- List<Double> rov_20240410 = getRateData(return_20240410, views_20240410, 0.0, 0.0);
|
|
|
- Double rovScore_20240410 = calScoreWeightNoTimeDecay(rov_20240410);
|
|
|
- List<Double> ros_20240410 = getRateData(return_20240410, share_20240410, 1.0, 10.0);
|
|
|
- Double rosScore_20240410 = calScoreWeightNoTimeDecay(ros_20240410);
|
|
|
- item.scoresMap.put("rovScore_20240410", rovScore_20240410);
|
|
|
- item.scoresMap.put("rosScore_20240410", rosScore_20240410);
|
|
|
-
|
|
|
- Map<String, Map<String, Double>> itemRealMap = item.getItemRealTimeFeature();
|
|
|
- List<Double> views = getStaticData(itemRealMap, datehours, "view_pv_list_1h");
|
|
|
- List<Double> plays = getStaticData(itemRealMap, datehours, "play_pv_list_1h");
|
|
|
- List<Double> shares = getStaticData(itemRealMap, datehours, "share_pv_list_1h");
|
|
|
- List<Double> preturns = getStaticData(itemRealMap, datehours, "p_return_uv_list_1h");
|
|
|
- List<Double> allreturns = getStaticData(itemRealMap, datehours, "return_uv_list_1h");
|
|
|
-
|
|
|
- List<Double> share2return = getRateData(preturns, shares, 1.0, 1000.0);
|
|
|
- Double share2returnScore = calScoreWeightNoTimeDecay(share2return);
|
|
|
- List<Double> view2return = getRateData(preturns, views, 1.0, 1000.0);
|
|
|
- Double view2returnScore = calScoreWeightNoTimeDecay(view2return);
|
|
|
- List<Double> view2play = getRateData(plays, views, 1.0, 1000.0);
|
|
|
- Double view2playScore = calScoreWeightNoTimeDecay(view2play);
|
|
|
- List<Double> play2share = getRateData(shares, plays, 1.0, 1000.0);
|
|
|
- Double play2shareScore = calScoreWeightNoTimeDecay(play2share);
|
|
|
- item.scoresMap.put("share2returnScore", share2returnScore);
|
|
|
- item.scoresMap.put("view2returnScore", view2returnScore);
|
|
|
- item.scoresMap.put("view2playScore", view2playScore);
|
|
|
- item.scoresMap.put("play2shareScore", play2shareScore);
|
|
|
-
|
|
|
- // 全部回流的rov和ros
|
|
|
- List<Double> share2allreturn = getRateData(allreturns, shares, 1.0, 10.0);
|
|
|
- Double share2allreturnScore = calScoreWeightNoTimeDecay(share2allreturn);
|
|
|
- List<Double> view2allreturn = getRateData(allreturns, views, 0.0, 0.0);
|
|
|
- Double view2allreturnScore = calScoreWeightNoTimeDecay(view2allreturn);
|
|
|
- item.scoresMap.put("share2allreturnScore", share2allreturnScore);
|
|
|
- item.scoresMap.put("view2allreturnScore", view2allreturnScore);
|
|
|
-
|
|
|
- // 全部回流
|
|
|
- Double allreturnsScore = calScoreWeightNoTimeDecay(allreturns);
|
|
|
- item.scoresMap.put("allreturnsScore", allreturnsScore);
|
|
|
-
|
|
|
- // 平台回流
|
|
|
- Double preturnsScore = calScoreWeightNoTimeDecay(preturns);
|
|
|
- item.scoresMap.put("preturnsScore", preturnsScore);
|
|
|
-
|
|
|
- // rov的趋势
|
|
|
- double trendScore = calTrendScore(view2return);
|
|
|
- item.scoresMap.put("trendScore", trendScore);
|
|
|
-
|
|
|
- // 新视频提取
|
|
|
- Map<String, String> itemBasicMap = item.getItemBasicFeature();
|
|
|
- double newVideoScore = calNewVideoScore(itemBasicMap);
|
|
|
- item.scoresMap.put("newVideoScore", newVideoScore);
|
|
|
-
|
|
|
- }
|
|
|
- // 3 融合公式
|
|
|
- List<Video> result = new ArrayList<>();
|
|
|
- double a = mergeWeight.getOrDefault("a", 0.1);
|
|
|
- double b = mergeWeight.getOrDefault("b", 0.0);
|
|
|
- double c = mergeWeight.getOrDefault("c", 0.000001);
|
|
|
- double d = mergeWeight.getOrDefault("d", 1.0);
|
|
|
- double e = mergeWeight.getOrDefault("e", 1.0);
|
|
|
- double f = mergeWeight.getOrDefault("f", 0.1);
|
|
|
- double g = mergeWeight.getOrDefault("g", 2.0);
|
|
|
- double h = mergeWeight.getOrDefault("h", 50.0);
|
|
|
- double ifAdd = mergeWeight.getOrDefault("ifAdd", 1.0);
|
|
|
- for (RankItem item : items) {
|
|
|
- double trendScore = item.scoresMap.getOrDefault("trendScore", 0.0) > 1E-8 ?
|
|
|
- item.scoresMap.getOrDefault("trendScore", 0.0) : 0.0;
|
|
|
- double newVideoScore = item.scoresMap.getOrDefault("newVideoScore", 0.0) > 1E-8 ?
|
|
|
- item.scoresMap.getOrDefault("newVideoScore", 0.0) : 0.0;
|
|
|
- double strScore = item.getScoreStr();
|
|
|
- double rosScore = item.scoresMap.getOrDefault("share2returnScore", 0.0);
|
|
|
- double share2allreturnScore = item.scoresMap.getOrDefault("share2allreturnScore", 0.0);
|
|
|
- double view2allreturnScore = item.scoresMap.getOrDefault("view2allreturnScore", 0.0);
|
|
|
- double preturnsScore = Math.log(1 + item.scoresMap.getOrDefault("preturnsScore", 0.0));
|
|
|
- double rovScore_20240410 = item.scoresMap.getOrDefault("rovScore_20240410", 0.0);
|
|
|
- double rosScore_20240410 = item.scoresMap.getOrDefault("rosScore_20240410", 0.0);
|
|
|
-
|
|
|
- double score = 0.0;
|
|
|
- if (ifAdd < 0.5) {
|
|
|
- score = Math.pow(strScore, a) * Math.pow(rosScore, b) + c * preturnsScore +
|
|
|
- (newVideoScore > 1E-8 ? d * trendScore * (e + newVideoScore) : 0.0);
|
|
|
- } else {
|
|
|
- score = a * strScore + b * rosScore + c * preturnsScore +
|
|
|
- (newVideoScore > 1E-8 ? d * trendScore * (e + newVideoScore) : 0.0);
|
|
|
-
|
|
|
- }
|
|
|
- double allreturnsScore = item.scoresMap.getOrDefault("allreturnsScore", 0.0);
|
|
|
- if (allreturnsScore > h) {
|
|
|
- score += (f * rosScore_20240410 + g * rovScore_20240410);
|
|
|
+ Map<String, String> c34567Map = new HashMap<>(15);
|
|
|
+ List<Tuple2> tmpList0 = Arrays.asList(
|
|
|
+ new Tuple2(c3, "c3_feature"),
|
|
|
+ new Tuple2(c4, "c4_feature"),
|
|
|
+ new Tuple2(c5, "c5_feature"),
|
|
|
+ new Tuple2(c6, "c6_feature"),
|
|
|
+ new Tuple2(c7, "c7_feature")
|
|
|
+ );
|
|
|
+ for (Tuple2 tuple2 : tmpList0) {
|
|
|
+ for (String key_time : Arrays.asList("tags_1d", "tags_3d", "tags_7d")) {
|
|
|
+ String tags = tuple2.first.getOrDefault(key_time, "");
|
|
|
+ if (!tags.isEmpty()) {
|
|
|
+ c34567Map.put(tuple2.name + "_" + key_time, tags);
|
|
|
+ }
|
|
|
}
|
|
|
- Video video = item.getVideo();
|
|
|
- video.setScore(score);
|
|
|
- video.setSortScore(score);
|
|
|
- video.setScoreStr(item.getScoreStr());
|
|
|
- video.setScoresMap(item.getScoresMap());
|
|
|
- result.add(video);
|
|
|
- }
|
|
|
- Collections.sort(result, Comparator.comparingDouble(o -> -o.getSortScore()));
|
|
|
- return result;
|
|
|
- }
|
|
|
-
|
|
|
- public double calNewVideoScore(Map<String, String> itemBasicMap) {
|
|
|
- double existenceDays = Double.valueOf(itemBasicMap.getOrDefault("existence_days", "30"));
|
|
|
- if (existenceDays > 5) {
|
|
|
- return 0.0;
|
|
|
- }
|
|
|
- double score = 1.0 / (existenceDays + 10.0);
|
|
|
- return score;
|
|
|
- }
|
|
|
-
|
|
|
- public double calTrendScore(List<Double> data) {
|
|
|
- double sum = 0.0;
|
|
|
- int size = data.size();
|
|
|
- for (int i = 0; i < size - 4; ++i) {
|
|
|
- sum += data.get(i) - data.get(i + 4);
|
|
|
}
|
|
|
- if (sum * 10 > 0.6) {
|
|
|
- sum = 0.6;
|
|
|
- } else {
|
|
|
- sum = sum * 10;
|
|
|
- }
|
|
|
- if (sum > 0) {
|
|
|
- // 为了打断点
|
|
|
- sum = sum;
|
|
|
- }
|
|
|
- return sum;
|
|
|
- }
|
|
|
|
|
|
- public Double calScoreWeightNoTimeDecay(List<Double> data) {
|
|
|
- Double up = 0.0;
|
|
|
- Double down = 0.0;
|
|
|
- for (int i = 0; i < data.size(); ++i) {
|
|
|
- up += 1.0 * data.get(i);
|
|
|
- down += 1.0;
|
|
|
- }
|
|
|
- return down > 1E-8 ? up / down : 0.0;
|
|
|
- }
|
|
|
-
|
|
|
- public List<Double> getRateData(List<Double> ups, List<Double> downs, Double up, Double down) {
|
|
|
- List<Double> data = new LinkedList<>();
|
|
|
- for (int i = 0; i < ups.size(); ++i) {
|
|
|
- if (ExtractorUtils.isDoubleEqualToZero(downs.get(i) + down)) {
|
|
|
- data.add(0.0);
|
|
|
- } else {
|
|
|
- data.add(
|
|
|
- (ups.get(i) + up) / (downs.get(i) + down)
|
|
|
- );
|
|
|
+ Map<String, Map<String, String[]>> c89Map = new HashMap<>(4);
|
|
|
+ List<Tuple2> tmpList1 = Arrays.asList(
|
|
|
+ new Tuple2(c8, "c8_feature"),
|
|
|
+ new Tuple2(c9, "c9_feature")
|
|
|
+ );
|
|
|
+ for (Tuple2 tuple2 : tmpList1) {
|
|
|
+ for (String key_action : Arrays.asList("share", "return")) {
|
|
|
+ String cfListStr = tuple2.first.getOrDefault(key_action, "");
|
|
|
+ if (!cfListStr.isEmpty()) {
|
|
|
+ Map<String, String[]> cfMap = new HashMap<>();
|
|
|
+ String[] entries = cfListStr.split(",");
|
|
|
+ for (String entry : entries) {
|
|
|
+ String[] rList = entry.split(":");
|
|
|
+ if (rList.length >= 4) { // 确保分割后有四个元素
|
|
|
+ String key = rList[0];
|
|
|
+ String value1 = rList[1];
|
|
|
+ String value2 = rList[2];
|
|
|
+ String value3 = rList[3];
|
|
|
+ String[] strs = {value1, value2, value3};
|
|
|
+ cfMap.put(key, strs);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ c89Map.put(tuple2.name + "_" + key_action, cfMap);
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
- return data;
|
|
|
- }
|
|
|
|
|
|
- public List<Double> getStaticData(Map<String, Map<String, Double>> itemRealMap,
|
|
|
- List<String> datehours, String key) {
|
|
|
- List<Double> views = new LinkedList<>();
|
|
|
- Map<String, Double> tmp = itemRealMap.getOrDefault(key, new HashMap<>());
|
|
|
- for (String dh : datehours) {
|
|
|
- views.add(tmp.getOrDefault(dh, 0.0D) +
|
|
|
- (views.isEmpty() ? 0.0 : views.get(views.size() - 1))
|
|
|
- );
|
|
|
- }
|
|
|
- return views;
|
|
|
- }
|
|
|
|
|
|
- public List<RankItem> model(List<Video> videos, RankParam param,
|
|
|
- List<String> rtFeaPart) {
|
|
|
- List<RankItem> result = new ArrayList<>();
|
|
|
- if (videos.isEmpty()) {
|
|
|
- return result;
|
|
|
- }
|
|
|
+ List<RankItem> rankItems = CommonCollectionUtils.toList(rovRecallRank, RankItem::new);
|
|
|
+ for (RankItem item : rankItems) {
|
|
|
+ Map<String, Double> featureMap = new HashMap<>();
|
|
|
+ String vid = item.getVideoId() + "";
|
|
|
+ Map<String, String> b1 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_all_exp", new HashMap<>());
|
|
|
+ Map<String, String> b2 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_all_share", new HashMap<>());
|
|
|
+ Map<String, String> b3 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_all_return", new HashMap<>());
|
|
|
+ Map<String, String> b6 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_exp2share", new HashMap<>());
|
|
|
+ Map<String, String> b7 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_share2return", new HashMap<>());
|
|
|
+
|
|
|
+ Map<String, String> b8 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_noflow_exp", new HashMap<>());
|
|
|
+ Map<String, String> b9 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_noflow_root_share", new HashMap<>());
|
|
|
+ Map<String, String> b10 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_noflow_root_return", new HashMap<>());
|
|
|
+ Map<String, String> b11 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_flow_exp", new HashMap<>());
|
|
|
+ Map<String, String> b12 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_flow_root_share", new HashMap<>());
|
|
|
+ Map<String, String> b13 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_flow_root_return", new HashMap<>());
|
|
|
+ Map<String, String> b17 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_province_exp", new HashMap<>());
|
|
|
+ Map<String, String> b18 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_province_root_share", new HashMap<>());
|
|
|
+ Map<String, String> b19 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_province_root_return", new HashMap<>());
|
|
|
+
|
|
|
+ List<Tuple4> originData = Arrays.asList(
|
|
|
+ new Tuple4(b1, b2, b3, "b123"),
|
|
|
+ new Tuple4(b1, b6, b7, "b167"),
|
|
|
+ new Tuple4(b8, b9, b10, "b8910"),
|
|
|
+ new Tuple4(b11, b12, b13, "b111213"),
|
|
|
+ new Tuple4(b17, b18, b19, "b171819")
|
|
|
+ );
|
|
|
|
|
|
- RedisStandaloneConfiguration redisSC = new RedisStandaloneConfiguration();
|
|
|
- redisSC.setPort(6379);
|
|
|
- redisSC.setPassword("Wqsd@2019");
|
|
|
- redisSC.setHostName("r-bp1pi8wyv6lzvgjy5z.redis.rds.aliyuncs.com");
|
|
|
- RedisConnectionFactory connectionFactory = new JedisConnectionFactory(redisSC);
|
|
|
- RedisTemplate<String, String> redisTemplate = new RedisTemplate<>();
|
|
|
- redisTemplate.setConnectionFactory(connectionFactory);
|
|
|
- redisTemplate.setDefaultSerializer(new StringRedisSerializer());
|
|
|
- redisTemplate.afterPropertiesSet();
|
|
|
-
|
|
|
- // 0: 场景特征处理
|
|
|
- Map<String, String> sceneFeatureMap = this.getSceneFeature(param);
|
|
|
-
|
|
|
- // 1: user特征处理
|
|
|
- Map<String, String> userFeatureMap = new HashMap<>();
|
|
|
- if (param.getMid() != null && !param.getMid().isEmpty()) {
|
|
|
- String midKey = "user_info_4video_" + param.getMid();
|
|
|
- String userFeatureStr = redisTemplate.opsForValue().get(midKey);
|
|
|
- if (userFeatureStr != null) {
|
|
|
- try {
|
|
|
- userFeatureMap = JSONUtils.fromJson(userFeatureStr,
|
|
|
- new TypeToken<Map<String, String>>() {
|
|
|
- },
|
|
|
- userFeatureMap);
|
|
|
- } catch (Exception e) {
|
|
|
- log.error(String.format("parse user json is wrong in {} with {}", this.CLASS_NAME, e));
|
|
|
+ for (Tuple4 tuple4 : originData) {
|
|
|
+ for (String prefix2 : Arrays.asList("1h", "2h", "3h", "4h", "12h", "1d", "3d", "7d")) {
|
|
|
+ double exp = tuple4.first.isEmpty() ? 0 : Double.parseDouble(tuple4.first.getOrDefault("exp_pv_" + prefix2, "0.0"));
|
|
|
+ double share = tuple4.second.isEmpty() ? 0 : Double.parseDouble(tuple4.second.getOrDefault("share_pv_" + prefix2, "0.0"));
|
|
|
+ double returns = tuple4.third.isEmpty() ? 0 : Double.parseDouble(tuple4.third.getOrDefault("return_uv_" + prefix2, "0.0"));
|
|
|
+
|
|
|
+ double f1 = ExtractorUtils.calDiv(share, exp);
|
|
|
+ double f2 = ExtractorUtils.calLog(share);
|
|
|
+ double f3 = ExtractorUtils.calDiv(returns, exp);
|
|
|
+ double f4 = ExtractorUtils.calLog(returns);
|
|
|
+ double f5 = f3 * f4;
|
|
|
+
|
|
|
+ String key1 = tuple4.name + "_" + prefix2 + "_" + "STR";
|
|
|
+ String key2 = tuple4.name + "_" + prefix2 + "_" + "log(share)";
|
|
|
+ String key3 = tuple4.name + "_" + prefix2 + "_" + "ROV";
|
|
|
+ String key4 = tuple4.name + "_" + prefix2 + "_" + "log(return)";
|
|
|
+ String key5 = tuple4.name + "_" + prefix2 + "_" + "ROV*log(return)";
|
|
|
+
|
|
|
+ featureMap.put(key1, f1);
|
|
|
+ featureMap.put(key2, f2);
|
|
|
+ featureMap.put(key3, f3);
|
|
|
+ featureMap.put(key4, f4);
|
|
|
+ featureMap.put(key5, f5);
|
|
|
}
|
|
|
}
|
|
|
- }
|
|
|
- final Set<String> userFeatureSet = new HashSet<>(Arrays.asList(
|
|
|
- "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_system",
|
|
|
- "u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
|
|
|
- "u_3day_exp_cnt", "u_3day_click_cnt", "u_3day_share_cnt", "u_3day_return_cnt"
|
|
|
- ));
|
|
|
- Iterator<Map.Entry<String, String>> iterator = userFeatureMap.entrySet().iterator();
|
|
|
- while (iterator.hasNext()) {
|
|
|
- Map.Entry<String, String> entry = iterator.next();
|
|
|
- if (!userFeatureSet.contains(entry.getKey())) {
|
|
|
- iterator.remove();
|
|
|
- }
|
|
|
- }
|
|
|
|
|
|
- Map<String, String> f1 = RankExtractorUserFeature.getOriginFeature(userFeatureMap,
|
|
|
- new HashSet<String>(Arrays.asList(
|
|
|
- "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_system"
|
|
|
- ))
|
|
|
- );
|
|
|
- Map<String, String> f2 = RankExtractorUserFeature.getUserRateFeature(userFeatureMap);
|
|
|
- Map<String, String> f3 = RankExtractorUserFeature.cntFeatureChange(userFeatureMap,
|
|
|
- new HashSet<String>(Arrays.asList(
|
|
|
- "u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
|
|
|
- "u_3day_exp_cnt", "u_3day_click_cnt", "u_3day_share_cnt", "u_3day_return_cnt"
|
|
|
- ))
|
|
|
- );
|
|
|
- f1.putAll(f2);
|
|
|
- f1.putAll(f3);
|
|
|
-
|
|
|
- // 2-1: item特征处理
|
|
|
- final Set<String> itemFeatureSet = new HashSet<>(Arrays.asList(
|
|
|
- "total_time", "play_count_total",
|
|
|
- "i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
|
|
|
- "i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt"
|
|
|
- ));
|
|
|
-
|
|
|
- List<RankItem> rankItems = CommonCollectionUtils.toList(videos, RankItem::new);
|
|
|
- List<Long> videoIds = CommonCollectionUtils.toListDistinct(videos, Video::getVideoId);
|
|
|
- List<String> videoFeatureKeys = videoIds.stream().map(r -> "video_info_" + r)
|
|
|
- .collect(Collectors.toList());
|
|
|
- List<String> videoFeatures = redisTemplate.opsForValue().multiGet(videoFeatureKeys);
|
|
|
- if (videoFeatures != null) {
|
|
|
- for (int i = 0; i < videoFeatures.size(); ++i) {
|
|
|
- String vF = videoFeatures.get(i);
|
|
|
- Map<String, String> vfMap = new HashMap<>();
|
|
|
- if (vF == null) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- try {
|
|
|
- vfMap = JSONUtils.fromJson(vF, new TypeToken<Map<String, String>>() {
|
|
|
- }, vfMap);
|
|
|
- Map<String, String> vfMapCopy = new HashMap<>(vfMap);
|
|
|
- rankItems.get(i).setItemBasicFeature(vfMapCopy);
|
|
|
- Iterator<Map.Entry<String, String>> iteratorIn = vfMap.entrySet().iterator();
|
|
|
- while (iteratorIn.hasNext()) {
|
|
|
- Map.Entry<String, String> entry = iteratorIn.next();
|
|
|
- if (!itemFeatureSet.contains(entry.getKey())) {
|
|
|
- iteratorIn.remove();
|
|
|
+ Map<String, String> videoInfo = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_basic_info", new HashMap<>());
|
|
|
+ featureMap.put("total_time", Double.parseDouble(videoInfo.getOrDefault("total_time", "0")));
|
|
|
+ featureMap.put("bit_rate", Double.parseDouble(videoInfo.getOrDefault("bit_rate", "0")));
|
|
|
+
|
|
|
+ String title = videoInfo.getOrDefault("title", "");
|
|
|
+ if (!title.isEmpty()) {
|
|
|
+ for (String name : Arrays.asList("c3_feature", "c4_feature", "c5_feature", "c6_feature", "c7_feature")) {
|
|
|
+ for (String key_time : Arrays.asList("tags_1d", "tags_3d", "tags_7d")) {
|
|
|
+ String tags = c34567Map.getOrDefault(name + "_" + key_time, "");
|
|
|
+ if (!tags.isEmpty()) {
|
|
|
+ Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
|
|
|
+ featureMap.put(name + "_" + key_time + "_matchnum", doubles[0]);
|
|
|
+ featureMap.put(name + "_" + key_time + "_maxscore", doubles[1]);
|
|
|
+ featureMap.put(name + "_" + key_time + "_avgscore", doubles[2]);
|
|
|
}
|
|
|
}
|
|
|
- Map<String, String> f4 = RankExtractorItemFeature.getItemRateFeature(vfMap);
|
|
|
- Map<String, String> f5 = RankExtractorItemFeature.cntFeatureChange(vfMap,
|
|
|
- new HashSet<String>(Arrays.asList(
|
|
|
- "total_time", "play_count_total",
|
|
|
- "i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
|
|
|
- "i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt"))
|
|
|
- );
|
|
|
- f4.putAll(f5);
|
|
|
- rankItems.get(i).setFeatureMap(f4);
|
|
|
- } catch (Exception e) {
|
|
|
- log.error(String.format("parse video json is wrong in {} with {}", this.CLASS_NAME, e));
|
|
|
}
|
|
|
}
|
|
|
- }
|
|
|
- // 2-2: item 实时特征处理
|
|
|
- List<String> rtFeaPartKey = new ArrayList<>(Arrays.asList("item_rt_fea_1day_partition", "item_rt_fea_1h_partition"));
|
|
|
- List<String> rtFeaPartKeyResult = this.redisTemplate.opsForValue().multiGet(rtFeaPartKey);
|
|
|
- Calendar calendar = Calendar.getInstance();
|
|
|
- String date = new SimpleDateFormat("yyyyMMdd").format(calendar.getTime());
|
|
|
- String hour = new SimpleDateFormat("HH").format(calendar.getTime());
|
|
|
- String rtFeaPart1day = date + hour;
|
|
|
- String rtFeaPart1h = date + hour;
|
|
|
- if (rtFeaPartKeyResult != null) {
|
|
|
- if (rtFeaPartKeyResult.get(0) != null) {
|
|
|
- rtFeaPart1day = rtFeaPartKeyResult.get(0);
|
|
|
- }
|
|
|
- if (rtFeaPartKeyResult.get(1) != null) {
|
|
|
- rtFeaPart1h = rtFeaPartKeyResult.get(1);
|
|
|
- }
|
|
|
- }
|
|
|
|
|
|
- List<String> videoRtKeys1 = videoIds.stream().map(r -> "item_rt_fea_1day_" + r)
|
|
|
- .collect(Collectors.toList());
|
|
|
- List<String> videoRtKeys2 = videoIds.stream().map(r -> "item_rt_fea_1h_" + r)
|
|
|
- .collect(Collectors.toList());
|
|
|
- videoRtKeys1.addAll(videoRtKeys2);
|
|
|
- List<String> videoRtFeatures = this.redisTemplate.opsForValue().multiGet(videoRtKeys1);
|
|
|
-
|
|
|
-
|
|
|
- if (videoRtFeatures != null) {
|
|
|
- int j = 0;
|
|
|
- for (RankItem item : rankItems) {
|
|
|
- String vF = videoRtFeatures.get(j);
|
|
|
- ++j;
|
|
|
- if (vF == null) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- Map<String, String> vfMap = new HashMap<>();
|
|
|
- Map<String, Map<String, Double>> vfMapNew = new HashMap<>();
|
|
|
- try {
|
|
|
- vfMap = JSONUtils.fromJson(vF, new TypeToken<Map<String, String>>() {
|
|
|
- }, vfMap);
|
|
|
- for (Map.Entry<String, String> entry : vfMap.entrySet()) {
|
|
|
- String value = entry.getValue();
|
|
|
- if (value == null) {
|
|
|
- continue;
|
|
|
+ if (!vid.isEmpty()) {
|
|
|
+ for (String key_feature : Arrays.asList("c8_feature", "c9_feature")) {
|
|
|
+ for (String key_action : Arrays.asList("share", "return")) {
|
|
|
+ Map<String, String[]> cfMap = c89Map.getOrDefault(key_feature + "_" + key_action, new HashMap<>());
|
|
|
+ if (cfMap.containsKey(vid)) {
|
|
|
+ String[] scores = cfMap.get(vid);
|
|
|
+ Double score1 = Double.parseDouble(scores[0]);
|
|
|
+ Double score2 = Double.parseDouble(scores[1]);
|
|
|
+ Double score3 = Double.parseDouble(scores[2]) <= 0 ? 0D : 1.0 / Double.parseDouble(scores[2]);
|
|
|
+ featureMap.put(key_feature + "_" + key_action + "_score", score1);
|
|
|
+ featureMap.put(key_feature + "_" + key_action + "_num", score2);
|
|
|
+ featureMap.put(key_feature + "_" + key_action + "_rank", score3);
|
|
|
}
|
|
|
- String[] var1 = value.split(",");
|
|
|
- Map<String, Double> tmp = new HashMap<>();
|
|
|
- for (String var2 : var1) {
|
|
|
- String[] var3 = var2.split(":");
|
|
|
- tmp.put(var3[0], Double.valueOf(var3[1]));
|
|
|
- }
|
|
|
- vfMapNew.put(entry.getKey(), tmp);
|
|
|
}
|
|
|
- } catch (Exception e) {
|
|
|
- log.error(String.format("parse video item_rt_fea_1day_ json is wrong in {} with {}", this.CLASS_NAME, e));
|
|
|
}
|
|
|
- Map<String, String> f8 = RankExtractorItemFeature.getItemRealtimeRate(vfMapNew, rtFeaPart1day);
|
|
|
- item.getFeatureMap().putAll(f8);
|
|
|
}
|
|
|
- for (RankItem item : rankItems) {
|
|
|
- String vF = videoRtFeatures.get(j);
|
|
|
- ++j;
|
|
|
- if (vF == null) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- Map<String, String> vfMap = new HashMap<>();
|
|
|
- Map<String, Map<String, Double>> vfMapNew = new HashMap<>();
|
|
|
- try {
|
|
|
- vfMap = JSONUtils.fromJson(vF, new TypeToken<Map<String, String>>() {
|
|
|
- }, vfMap);
|
|
|
-
|
|
|
- for (Map.Entry<String, String> entry : vfMap.entrySet()) {
|
|
|
- String value = entry.getValue();
|
|
|
- if (value == null) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- String[] var1 = value.split(",");
|
|
|
- Map<String, Double> tmp = new HashMap<>();
|
|
|
- for (String var2 : var1) {
|
|
|
- String[] var3 = var2.split(":");
|
|
|
- tmp.put(var3[0], Double.valueOf(var3[1]));
|
|
|
- }
|
|
|
- vfMapNew.put(entry.getKey(), tmp);
|
|
|
- }
|
|
|
- item.setItemRealTimeFeature(vfMapNew);
|
|
|
- } catch (Exception e) {
|
|
|
- log.error(String.format("parse video item_rt_fea_1h_ json is wrong in {} with {}", this.CLASS_NAME, e));
|
|
|
- }
|
|
|
- Map<String, String> f8 = RankExtractorItemFeature.getItemRealtimeRate(vfMapNew, rtFeaPart1h);
|
|
|
- item.getFeatureMap().putAll(f8);
|
|
|
+ Map<String, String> d1 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_recsys_feature_cf_i2i_new", new HashMap<>());
|
|
|
+ if (!d1.isEmpty()) {
|
|
|
+ featureMap.put("d1_exp", Double.parseDouble(d1.getOrDefault("exp", "0")));
|
|
|
+ featureMap.put("d1_return_n", Double.parseDouble(d1.getOrDefault("return_n", "0")));
|
|
|
+ featureMap.put("d1_rovn", Double.parseDouble(d1.getOrDefault("rovn", "0")));
|
|
|
}
|
|
|
+ item.featureMapDouble = featureMap;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
- List<RankItem> rovRecallScore = ScorerUtils.getScorerPipeline(ScorerUtils.BASE_CONF)
|
|
|
- .scoring(sceneFeatureMap, userFeatureMap, rankItems);
|
|
|
- return rovRecallScore;
|
|
|
- }
|
|
|
-
|
|
|
- private Map<String, String> getSceneFeature(RankParam param) {
|
|
|
- Map<String, String> sceneFeatureMap = new HashMap<>();
|
|
|
- String provinceCn = param.getProvince();
|
|
|
- provinceCn = provinceCn.replaceAll("省$", "");
|
|
|
- sceneFeatureMap.put("ctx_region", provinceCn);
|
|
|
- String city = param.getCity();
|
|
|
- if ("台北市".equals(city) |
|
|
|
- "高雄市".equals(city) |
|
|
|
- "台中市".equals(city) |
|
|
|
- "桃园市".equals(city) |
|
|
|
- "新北市".equals(city) |
|
|
|
- "台南市".equals(city) |
|
|
|
- "基隆市".equals(city) |
|
|
|
- "吉林市".equals(city) |
|
|
|
- "新竹市".equals(city) |
|
|
|
- "嘉义市".equals(city)
|
|
|
- ) {
|
|
|
- } else {
|
|
|
- city = city.replaceAll("市$", "");
|
|
|
- }
|
|
|
- sceneFeatureMap.put("ctx_city", city);
|
|
|
-
|
|
|
- Calendar calendar = Calendar.getInstance();
|
|
|
- sceneFeatureMap.put("ctx_week", (calendar.get(Calendar.DAY_OF_WEEK) + 6) % 7 + "");
|
|
|
- sceneFeatureMap.put("ctx_hour", new SimpleDateFormat("HH").format(calendar.getTime()));
|
|
|
-
|
|
|
- return sceneFeatureMap;
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public RankResult mergeAndSort(RankParam param, List<Video> rovVideos, List<Video> flowVideos) {
|
|
|
-
|
|
|
- //1 兜底策略,rov池子不足时,用冷启池填补。直接返回。
|
|
|
- if (CollectionUtils.isEmpty(rovVideos)) {
|
|
|
- if (param.getSize() < flowVideos.size()) {
|
|
|
- return new RankResult(flowVideos.subList(0, param.getSize()));
|
|
|
- } else {
|
|
|
- return new RankResult(flowVideos);
|
|
|
+ // 3 连续值特征分桶
|
|
|
+ readBucketFile();
|
|
|
+ Map<String, String> userFeatureMap = new HashMap<>(userFeatureMapDouble.size());
|
|
|
+ for (Map.Entry<String, Double> entry : userFeatureMapDouble.entrySet()) {
|
|
|
+ String name = entry.getKey();
|
|
|
+ Double score = entry.getValue();
|
|
|
+ // 注意:0值、不在分桶文件中的特征,会被过滤掉。
|
|
|
+ if (score > 1E-8 && this.bucketsLen.containsKey(name) && this.bucketsMap.containsKey(name)) {
|
|
|
+ Double bucketNum = this.bucketsLen.get(name);
|
|
|
+ double[] buckets = this.bucketsMap.get(name);
|
|
|
+ Double scoreNew = 1.0 / bucketNum * (ExtractorUtils.findInsertPosition(buckets, score) + 1.0);
|
|
|
+ userFeatureMap.put(name, String.valueOf(scoreNew));
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- //2 根据实验号解析阿波罗参数。
|
|
|
- String abCode = param.getAbCode();
|
|
|
- Map<String, Map<String, String>> rulesMap = this.filterRules.getOrDefault(abCode, new HashMap<>(0));
|
|
|
-
|
|
|
- //3 标签读取
|
|
|
- if (rulesMap != null && !rulesMap.isEmpty()) {
|
|
|
- RankExtractorItemTags extractorItemTags = new RankExtractorItemTags(this.redisTemplate);
|
|
|
- extractorItemTags.processor(rovVideos, flowVideos);
|
|
|
- }
|
|
|
- //6 合并结果时间卡控
|
|
|
- if (rulesMap != null && !rulesMap.isEmpty()) {
|
|
|
- RankProcessorTagFilter.processor(rovVideos, flowVideos, rulesMap);
|
|
|
+ for (RankItem item : rankItems) {
|
|
|
+ Map<String, String> featureMap = new HashMap<>();
|
|
|
+ Map<String, Double> featureMapDouble = item.featureMapDouble;
|
|
|
+
|
|
|
+ for (Map.Entry<String, Double> entry : featureMapDouble.entrySet()) {
|
|
|
+ String name = entry.getKey();
|
|
|
+ Double score = entry.getValue();
|
|
|
+ // 注意:0值、不在分桶文件中的特征,会被过滤掉。
|
|
|
+ if (score > 1E-8 && this.bucketsLen.containsKey(name) && this.bucketsMap.containsKey(name)) {
|
|
|
+ Double bucketNum = this.bucketsLen.get(name);
|
|
|
+ double[] buckets = this.bucketsMap.get(name);
|
|
|
+ Double scoreNew = 1.0 / bucketNum * (ExtractorUtils.findInsertPosition(buckets, score) + 1.0);
|
|
|
+ featureMap.put(name, String.valueOf(scoreNew));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ item.featureMap = featureMap;
|
|
|
}
|
|
|
|
|
|
- //4 rov池提权功能
|
|
|
- RankProcessorBoost.boostByTag(rovVideos, rulesMap);
|
|
|
-
|
|
|
- //5 rov池强插功能
|
|
|
- RankProcessorInsert.insertByTag(param, rovVideos, rulesMap);
|
|
|
+ // 排序模型
|
|
|
+ Map<String, String> sceneFeatureMap = new HashMap<>(0);
|
|
|
+ List<RankItem> items = ScorerUtils.getScorerPipeline("feeds_score_config_20240609.conf")
|
|
|
+ .scoring(sceneFeatureMap, userFeatureMap, rankItems);
|
|
|
|
|
|
- //7 流量池按比例强插
|
|
|
+ // 排序融合分
|
|
|
+ String redisScoreKey = "redis:vid_hasreturn1d7d_rov:";
|
|
|
+ Map<String, Map<String, String>> vid2MapFeature = this.getVideoRedisFeature(vids, redisScoreKey);
|
|
|
List<Video> result = new ArrayList<>();
|
|
|
- for (int i = 0; i < param.getTopK() && i < rovVideos.size(); i++) {
|
|
|
- result.add(rovVideos.get(i));
|
|
|
- }
|
|
|
- double flowPoolP = getFlowPoolP(param);
|
|
|
- int flowPoolIndex = 0;
|
|
|
- int rovPoolIndex = param.getTopK();
|
|
|
- for (int i = 0; i < param.getSize() - param.getTopK(); i++) {
|
|
|
- double rand = RandomUtils.nextDouble(0, 1);
|
|
|
- if (rand < flowPoolP) {
|
|
|
- if (flowPoolIndex < flowVideos.size()) {
|
|
|
- result.add(flowVideos.get(flowPoolIndex++));
|
|
|
- } else {
|
|
|
- break;
|
|
|
- }
|
|
|
- } else {
|
|
|
- if (rovPoolIndex < rovVideos.size()) {
|
|
|
- result.add(rovVideos.get(rovPoolIndex++));
|
|
|
- } else {
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- if (rovPoolIndex >= rovVideos.size()) {
|
|
|
- for (int i = flowPoolIndex; i < flowVideos.size() && result.size() < param.getSize(); i++) {
|
|
|
- result.add(flowVideos.get(i));
|
|
|
+ Double chooseFunction = mergeWeight.getOrDefault("chooseFunction", 0.0);
|
|
|
+ Double rosDefault = mergeWeight.getOrDefault("rosDefault", 0.1);
|
|
|
+ Double has_1d = mergeWeight.getOrDefault("has_1d", 1.0);
|
|
|
+ Double has_2d = mergeWeight.getOrDefault("has_2d", 1.0);
|
|
|
+ Double has_3d = mergeWeight.getOrDefault("has_3d", 1.0);
|
|
|
+ Double has_4d = mergeWeight.getOrDefault("has_4d", 0.0);
|
|
|
+ Double has_5d = mergeWeight.getOrDefault("has_5d", 0.0);
|
|
|
+ Double has_6d = mergeWeight.getOrDefault("has_6d", 0.0);
|
|
|
+ Double has_7d = mergeWeight.getOrDefault("has_7d", 1.0);
|
|
|
+
|
|
|
+ for (RankItem item : items) {
|
|
|
+ double score = 0.0;
|
|
|
+ Map<String, String> hasReturnRovScoreMap = vid2MapFeature.getOrDefault(item.getVideoId() + "", new HashMap<>());
|
|
|
+ double hasReturnRovScore_1d = Double.parseDouble(hasReturnRovScoreMap.getOrDefault("rate_n_1d", "0"));
|
|
|
+ double hasReturnRovScore_2d = Double.parseDouble(hasReturnRovScoreMap.getOrDefault("rate_n_2d", "0"));
|
|
|
+ double hasReturnRovScore_3d = Double.parseDouble(hasReturnRovScoreMap.getOrDefault("rate_n_3d", "0"));
|
|
|
+ double hasReturnRovScore_4d = Double.parseDouble(hasReturnRovScoreMap.getOrDefault("rate_n_4d", "0"));
|
|
|
+ double hasReturnRovScore_5d = Double.parseDouble(hasReturnRovScoreMap.getOrDefault("rate_n_5d", "0"));
|
|
|
+ double hasReturnRovScore_6d = Double.parseDouble(hasReturnRovScoreMap.getOrDefault("rate_n_6d", "0"));
|
|
|
+ double hasReturnRovScore_7d = Double.parseDouble(hasReturnRovScoreMap.getOrDefault("rate_n_7d", "0"));
|
|
|
+ double hasReturnRovScore = (
|
|
|
+ hasReturnRovScore_1d + hasReturnRovScore_2d + hasReturnRovScore_3d +
|
|
|
+ hasReturnRovScore_4d + hasReturnRovScore_5d + hasReturnRovScore_6d
|
|
|
+ + hasReturnRovScore_7d
|
|
|
+ ) / 7;
|
|
|
+ if (has_1d + has_2d + has_3d + has_4d + has_5d + has_6d + has_7d > 0){
|
|
|
+ hasReturnRovScore = (
|
|
|
+ hasReturnRovScore_1d*has_1d + hasReturnRovScore_2d*has_2d + hasReturnRovScore_3d*has_3d +
|
|
|
+ hasReturnRovScore_4d*has_4d + hasReturnRovScore_5d*has_5d + hasReturnRovScore_6d*has_6d
|
|
|
+ + hasReturnRovScore_7d*has_7d
|
|
|
+ ) / (
|
|
|
+ has_1d + has_2d + has_3d + has_4d + has_5d + has_6d + has_7d
|
|
|
+ );
|
|
|
}
|
|
|
- }
|
|
|
- if (flowPoolIndex >= flowVideos.size()) {
|
|
|
- for (int i = rovPoolIndex; i < rovVideos.size() && result.size() < param.getSize(); i++) {
|
|
|
- result.add(rovVideos.get(i));
|
|
|
+ item.getScoresMap().put("hasReturnRovScore_1d", hasReturnRovScore_1d);
|
|
|
+ item.getScoresMap().put("hasReturnRovScore_2d", hasReturnRovScore_2d);
|
|
|
+ item.getScoresMap().put("hasReturnRovScore_3d", hasReturnRovScore_3d);
|
|
|
+ item.getScoresMap().put("hasReturnRovScore_4d", hasReturnRovScore_4d);
|
|
|
+ item.getScoresMap().put("hasReturnRovScore_5d", hasReturnRovScore_5d);
|
|
|
+ item.getScoresMap().put("hasReturnRovScore_6d", hasReturnRovScore_6d);
|
|
|
+ item.getScoresMap().put("hasReturnRovScore_7d", hasReturnRovScore_7d);
|
|
|
+ item.getScoresMap().put("hasReturnRovScore", hasReturnRovScore);
|
|
|
+ double fmRov = item.getScoreRov();
|
|
|
+ item.getScoresMap().put("fmRov", fmRov);
|
|
|
+ if (chooseFunction == 0){
|
|
|
+ score = fmRov * (rosDefault + hasReturnRovScore);
|
|
|
+ }else if (chooseFunction == 1){
|
|
|
+ score = fmRov * (1 + Math.log(hasReturnRovScore + 1));
|
|
|
+ }else {
|
|
|
+ score = fmRov * (1 + hasReturnRovScore);
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
- //8 合并结果密度控制
|
|
|
- Map<String, Integer> densityRules = new HashMap<>();
|
|
|
- if (rulesMap != null && !rulesMap.isEmpty()) {
|
|
|
- for (Map.Entry<String, Map<String, String>> entry : rulesMap.entrySet()) {
|
|
|
- String key = entry.getKey();
|
|
|
- Map<String, String> value = entry.getValue();
|
|
|
- if (value.containsKey("density")) {
|
|
|
- densityRules.put(key, Integer.valueOf(value.get("density")));
|
|
|
- }
|
|
|
+ Video video = item.getVideo();
|
|
|
+ video.setScore(score);
|
|
|
+ video.setSortScore(score);
|
|
|
+ video.setScoresMap(item.getScoresMap());
|
|
|
+ video.setAllFeatureMap(item.getAllFeatureMap());
|
|
|
+ if (feature != null
|
|
|
+ && MapUtils.isNotEmpty(feature.getVideoFeature())
|
|
|
+ && MapUtils.isNotEmpty(feature.getVideoFeature().get(item.getVideoId() + ""))) {
|
|
|
+ video.getMetaFeatureMap().putAll(feature.getVideoFeature().get(item.getVideoId() + ""));
|
|
|
+ }
|
|
|
+ if (feature != null
|
|
|
+ && MapUtils.isNotEmpty(feature.getUserFeature())) {
|
|
|
+ video.getMetaFeatureMap().putAll(feature.getUserFeature());
|
|
|
}
|
|
|
+ result.add(video);
|
|
|
}
|
|
|
- Set<Long> videosSet = result.stream().map(Video::getVideoId).collect(Collectors.toSet());
|
|
|
- List<Video> rovRecallRankNew = rovVideos.stream().filter(r -> !videosSet.contains(r.getVideoId())).collect(Collectors.toList());
|
|
|
- List<Video> flowPoolRankNew = flowVideos.stream().filter(r -> !videosSet.contains(r.getVideoId())).collect(Collectors.toList());
|
|
|
- List<Video> resultWithDensity = RankProcessorDensity.mergeDensityControl(result,
|
|
|
- rovRecallRankNew, flowPoolRankNew, densityRules);
|
|
|
+ result.sort(Comparator.comparingDouble(o -> -o.getSortScore()));
|
|
|
|
|
|
- return new RankResult(resultWithDensity);
|
|
|
+ return result;
|
|
|
}
|
|
|
|
|
|
- private void getVideoFeatureFromRedis(List<RankItem> items){
|
|
|
- List<Long> videoIds = CommonCollectionUtils.toListDistinct(items, RankItem::getVideoId);
|
|
|
- List<String> videoKeys = videoIds.stream().map(r -> "item_rt_fea_1hrootall_" + r)
|
|
|
- .collect(Collectors.toList());
|
|
|
- List<String> videoRtFeatures = this.redisTemplate.opsForValue().multiGet(videoKeys);
|
|
|
- int j = 0;
|
|
|
- if (videoRtFeatures != null) {
|
|
|
- for (RankItem item : items) {
|
|
|
- String vF = videoRtFeatures.get(j);
|
|
|
- ++j;
|
|
|
- if (vF == null) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- Map<String, String> vfMap = new HashMap<>();
|
|
|
- Map<String, Map<String, Double>> vfMapNew = new HashMap<>();
|
|
|
- try {
|
|
|
- vfMap = JSONUtils.fromJson(vF, new TypeToken<Map<String, String>>() {
|
|
|
- }, vfMap);
|
|
|
- for (Map.Entry<String, String> entry : vfMap.entrySet()) {
|
|
|
- String value = entry.getValue();
|
|
|
- if (value == null) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- String[] var1 = value.split(",");
|
|
|
- Map<String, Double> tmp = new HashMap<>();
|
|
|
- for (String var2 : var1) {
|
|
|
- String[] var3 = var2.split(":");
|
|
|
- tmp.put(var3[0], Double.valueOf(var3[1]));
|
|
|
+ private Map<String, Map<String, String>> extractVideoFeature(Map<String, Map<String, Map<String, String>>> featureMap) {
|
|
|
+ // TODO
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ private Map<String, String> extractSceneFeature(Map<String, Map<String, Map<String, String>>> featureMap) {
|
|
|
+ // TODO
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ private Map<String, String> extractUserFeature(Map<String, Map<String, Map<String, String>>> featureMap) {
|
|
|
+ // TODO
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ private void readBucketFile() {
|
|
|
+ InputStream resourceStream = RankStrategy4RegionMergeModelBasic.class.getClassLoader().getResourceAsStream("20240609_bucket_274.txt");
|
|
|
+ if (resourceStream != null) {
|
|
|
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(resourceStream))) {
|
|
|
+ Map<String, double[]> bucketsMap = new HashMap<>();
|
|
|
+ Map<String, Double> bucketsLen = new HashMap<>();
|
|
|
+ String line;
|
|
|
+ while ((line = reader.readLine()) != null) {
|
|
|
+ // 替换空格和换行符,过滤空行
|
|
|
+ line = line.replace(" ", "").replaceAll("\n", "");
|
|
|
+ if (!line.isEmpty()) {
|
|
|
+ String[] rList = line.split("\t");
|
|
|
+ if (rList.length == 3) {
|
|
|
+ String key = rList[0];
|
|
|
+ double value1 = Double.parseDouble(rList[1]);
|
|
|
+ bucketsLen.put(key, value1);
|
|
|
+ double[] value2 = Arrays.stream(rList[2].split(","))
|
|
|
+ .mapToDouble(Double::valueOf)
|
|
|
+ .toArray();
|
|
|
+ bucketsMap.put(key, value2);
|
|
|
}
|
|
|
- vfMapNew.put(entry.getKey(), tmp);
|
|
|
}
|
|
|
- item.setItemRealTimeRootFeature(vfMapNew);
|
|
|
- } catch (Exception e) {
|
|
|
- log.error(String.format("parse video item_rt_fea_1hrootall_ json is wrong in {} with {}", this.CLASS_NAME, e));
|
|
|
}
|
|
|
+ this.bucketsMap = bucketsMap;
|
|
|
+ this.bucketsLen = bucketsLen;
|
|
|
+ } catch (IOException e) {
|
|
|
+ log.error("something is wrong in parse bucket file:" + e);
|
|
|
}
|
|
|
+ } else {
|
|
|
+ log.error("no bucket file");
|
|
|
}
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
- public static void main(String[] args) {
|
|
|
-
|
|
|
|
|
|
}
|
|
|
|
|
|
-
|
|
|
-
|
|
|
}
|