|
@@ -10,13 +10,10 @@ import com.tzld.piaoquan.recommend.server.service.rank.extractor.ExtractorUtils;
|
|
|
import com.tzld.piaoquan.recommend.server.service.recall.strategy.*;
|
|
|
import com.tzld.piaoquan.recommend.server.service.score.ScorerUtils;
|
|
|
import com.tzld.piaoquan.recommend.server.util.CommonCollectionUtils;
|
|
|
-import com.tzld.piaoquan.recommend.server.util.FeatureBucketUtils;
|
|
|
-import com.tzld.piaoquan.recommend.server.util.SimilarityUtils;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
import org.apache.commons.collections4.MapUtils;
|
|
|
import org.apache.commons.math3.util.Pair;
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
-import org.springframework.beans.factory.annotation.Value;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
|
|
|
import java.util.*;
|
|
@@ -33,14 +30,6 @@ public class RankStrategy4RegionMergeModelV562 extends RankStrategy4RegionMergeM
|
|
|
@Autowired
|
|
|
private FeatureService featureService;
|
|
|
|
|
|
- private static final List<String> shortPeriod = Arrays.asList("1h", "2h", "4h", "6h", "12h", "24h", "7d");
|
|
|
- private static final List<String> middlePeriod = Arrays.asList("14d", "30d");
|
|
|
- private static final List<String> longPeriod = Arrays.asList("7d", "35d", "90d", "365d");
|
|
|
- private static final List<String> cfRosList = Collections.singletonList("rosn");
|
|
|
- private static final List<String> cfRovList = Collections.singletonList("rovn");
|
|
|
- private static final List<String> videoSimAttrs = Arrays.asList("cate1_list", "cate2", "cate2_list",
|
|
|
- "keywords", "style", "theme", "title", "topic", "user_value");
|
|
|
-
|
|
|
@Override
|
|
|
public List<Video> mergeAndRankRovRecall(RankParam param) {
|
|
|
Map<String, Double> mergeWeight = this.mergeWeight != null ? this.mergeWeight : new HashMap<>(0);
|
|
@@ -88,12 +77,11 @@ public class RankStrategy4RegionMergeModelV562 extends RankStrategy4RegionMergeM
|
|
|
// k1:视频、k2:表、k3:特征、v:特征值
|
|
|
String provinceCn = param.getProvince().replaceAll("省$", "");
|
|
|
String headVid = String.valueOf(param.getHeadVid());
|
|
|
- String sceneType = String.valueOf(param.getHotSceneType());
|
|
|
- Map<String, Map<String, Map<String, String>>> videoBaseInfoMap = featureService.getVideoBaseInfo(headVid, vids);
|
|
|
- FeatureService.Feature feature = featureService.getNewFeature(provinceCn, param.getMid(), sceneType, headVid, videoBaseInfoMap, vids);
|
|
|
+ FeatureService.Feature feature = featureService.getFeature(param.getMid(), vids,
|
|
|
+ String.valueOf(param.getAppType()), provinceCn, headVid);
|
|
|
Map<String, Map<String, String>> featureOriginUser = feature.getUserFeature();
|
|
|
Map<String, Map<String, Map<String, String>>> featureOriginVideo = feature.getVideoFeature();
|
|
|
- Map<String, String> headVideoInfo = videoBaseInfoMap.getOrDefault(headVid, new HashMap<>()).getOrDefault("alg_vid_feature_basic_info", new HashMap<>());
|
|
|
+
|
|
|
|
|
|
// 2 特征处理
|
|
|
Map<String, Double> userFeatureMapDouble = new HashMap<>();
|
|
@@ -209,25 +197,22 @@ public class RankStrategy4RegionMergeModelV562 extends RankStrategy4RegionMergeM
|
|
|
double f3 = ExtractorUtils.calDiv(returns, exp);
|
|
|
double f4 = ExtractorUtils.calLog(returns);
|
|
|
double f5 = f3 * f4;
|
|
|
- double f6 = ExtractorUtils.calDiv(returns, share);
|
|
|
|
|
|
String key1 = tuple4.name + "_" + prefix2 + "_" + "STR";
|
|
|
String key2 = tuple4.name + "_" + prefix2 + "_" + "log(share)";
|
|
|
String key3 = tuple4.name + "_" + prefix2 + "_" + "ROV";
|
|
|
String key4 = tuple4.name + "_" + prefix2 + "_" + "log(return)";
|
|
|
String key5 = tuple4.name + "_" + prefix2 + "_" + "ROV*log(return)";
|
|
|
- String key6 = tuple4.name + "_" + prefix2 + "_" + "ROS";
|
|
|
|
|
|
featureMap.put(key1, f1);
|
|
|
featureMap.put(key2, f2);
|
|
|
featureMap.put(key3, f3);
|
|
|
featureMap.put(key4, f4);
|
|
|
featureMap.put(key5, f5);
|
|
|
- featureMap.put(key6, f6);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- Map<String, String> videoInfo = videoBaseInfoMap.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_basic_info", new HashMap<>());
|
|
|
+ Map<String, String> videoInfo = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_basic_info", new HashMap<>());
|
|
|
featureMap.put("total_time", Double.parseDouble(videoInfo.getOrDefault("total_time", "0")));
|
|
|
featureMap.put("bit_rate", Double.parseDouble(videoInfo.getOrDefault("bit_rate", "0")));
|
|
|
|
|
@@ -240,7 +225,12 @@ public class RankStrategy4RegionMergeModelV562 extends RankStrategy4RegionMergeM
|
|
|
String tags = c34567Map.getOrDefault(key, "");
|
|
|
if (!tags.isEmpty()) {
|
|
|
Future<Pair<String, Double[]>> future = ThreadPoolFactory.defaultPool().submit(() -> {
|
|
|
- Double[] doubles = ExtractorUtils.funcC34567ForTagsNew(tags, title);
|
|
|
+ Double[] doubles = null;
|
|
|
+ if (param.getAbExpCodes().contains(word2vecExp)) {
|
|
|
+ doubles = ExtractorUtils.funcC34567ForTagsNew(tags, title);
|
|
|
+ } else {
|
|
|
+ doubles = ExtractorUtils.funcC34567ForTags(tags, title);
|
|
|
+ }
|
|
|
return Pair.create(key, doubles);
|
|
|
});
|
|
|
futures.add(future);
|
|
@@ -281,25 +271,43 @@ public class RankStrategy4RegionMergeModelV562 extends RankStrategy4RegionMergeM
|
|
|
featureMap.put("d1_return_n", Double.parseDouble(d1.getOrDefault("return_n", "0")));
|
|
|
featureMap.put("d1_rovn", Double.parseDouble(d1.getOrDefault("rovn", "0")));
|
|
|
}
|
|
|
- // ******************** new feature ********************
|
|
|
- addVideoStatFeature(vid, featureOriginVideo, featureMap);
|
|
|
- //addVideoCFFeature(vid, featureOriginVideo, featureMap);
|
|
|
- addVideoSimFeature(headVideoInfo, videoInfo, featureMap);
|
|
|
-
|
|
|
item.featureMapDouble = featureMap;
|
|
|
}
|
|
|
|
|
|
// 3 连续值特征分桶
|
|
|
- Map<String, String> userFeatureMap = FeatureBucketUtils.bucketFeature("20241209_rov_bucket.txt", userFeatureMapDouble);
|
|
|
- Map<String, String> norUserFeatureMap = FeatureBucketUtils.bucketFeature("20241209_nor_bucket.txt", userFeatureMapDouble);
|
|
|
+ readBucketFile();
|
|
|
+ Map<String, String> userFeatureMap = new HashMap<>(userFeatureMapDouble.size());
|
|
|
+ for (Map.Entry<String, Double> entry : userFeatureMapDouble.entrySet()) {
|
|
|
+ String name = entry.getKey();
|
|
|
+ Double score = entry.getValue();
|
|
|
+ // 注意:0值、不在分桶文件中的特征,会被过滤掉。
|
|
|
+ if (score > 1E-8 && this.bucketsLen.containsKey(name) && this.bucketsMap.containsKey(name)) {
|
|
|
+ Double bucketNum = this.bucketsLen.get(name);
|
|
|
+ double[] buckets = this.bucketsMap.get(name);
|
|
|
+ Double scoreNew = 1.0 / bucketNum * (ExtractorUtils.findInsertPosition(buckets, score) + 1.0);
|
|
|
+ userFeatureMap.put(name, String.valueOf(scoreNew));
|
|
|
+ }
|
|
|
+ }
|
|
|
for (RankItem item : rankItems) {
|
|
|
+ Map<String, String> featureMap = new HashMap<>();
|
|
|
Map<String, Double> featureMapDouble = item.featureMapDouble;
|
|
|
- item.featureMap = FeatureBucketUtils.bucketFeature("20241209_rov_bucket.txt", featureMapDouble);
|
|
|
- item.norFeatureMap = FeatureBucketUtils.bucketFeature("20241209_nor_bucket.txt", featureMapDouble);
|
|
|
+
|
|
|
+ for (Map.Entry<String, Double> entry : featureMapDouble.entrySet()) {
|
|
|
+ String name = entry.getKey();
|
|
|
+ Double score = entry.getValue();
|
|
|
+ // 注意:0值、不在分桶文件中的特征,会被过滤掉。
|
|
|
+ if (score > 1E-8 && this.bucketsLen.containsKey(name) && this.bucketsMap.containsKey(name)) {
|
|
|
+ Double bucketNum = this.bucketsLen.get(name);
|
|
|
+ double[] buckets = this.bucketsMap.get(name);
|
|
|
+ Double scoreNew = 1.0 / bucketNum * (ExtractorUtils.findInsertPosition(buckets, score) + 1.0);
|
|
|
+ featureMap.put(name, String.valueOf(scoreNew));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ item.featureMap = featureMap;
|
|
|
}
|
|
|
// 4 排序模型计算
|
|
|
Map<String, String> sceneFeatureMap = new HashMap<>(0);
|
|
|
- List<RankItem> items = ScorerUtils.getScorerPipeline("feeds_score_config_xgb_20241209.conf").scoring(sceneFeatureMap, userFeatureMap, norUserFeatureMap, rankItems);
|
|
|
+ List<RankItem> items = ScorerUtils.getScorerPipeline("feeds_score_config_20240807.conf").scoring(sceneFeatureMap, userFeatureMap, rankItems);
|
|
|
// 5 排序公式特征
|
|
|
Map<String, Map<String, String>> vid2MapFeature = this.getVideoRedisFeature(vids, "redis:vid_hasreturn_vor:");
|
|
|
List<Video> result = new ArrayList<>();
|
|
@@ -309,12 +317,11 @@ public class RankStrategy4RegionMergeModelV562 extends RankStrategy4RegionMergeM
|
|
|
item.getScoresMap().put("fmRovOrigin", fmRovOrigin);
|
|
|
double fmRov = restoreScore(fmRovOrigin);
|
|
|
item.getScoresMap().put("fmRov", fmRov);
|
|
|
- double hasReturnRovScore = Double.parseDouble(vid2MapFeature.getOrDefault(item.getVideoId() + "", new HashMap<>()).getOrDefault("rov", "0"));
|
|
|
+ double hasReturnRovScore = this.calcHasReturnRovScore(vid2MapFeature.getOrDefault(item.getVideoId() + "", new HashMap<>()));
|
|
|
item.getScoresMap().put("hasReturnRovScore", hasReturnRovScore);
|
|
|
- double norXGBScore = item.getScoresMap().getOrDefault("NorXGBScore", 0d);
|
|
|
double vor = Double.parseDouble(vid2MapFeature.getOrDefault(item.getVideoId() + "", new HashMap<>()).getOrDefault("vor", "0"));
|
|
|
item.getScoresMap().put("vor", vor);
|
|
|
- score = fmRov * (0.1 + Math.pow(norXGBScore, 1.1)) * (0.1 + vor);
|
|
|
+ score = fmRov * (0.1 + hasReturnRovScore) * (0.1 + vor);
|
|
|
Video video = item.getVideo();
|
|
|
video.setScore(score);
|
|
|
video.setSortScore(score);
|
|
@@ -323,12 +330,6 @@ public class RankStrategy4RegionMergeModelV562 extends RankStrategy4RegionMergeM
|
|
|
if (MapUtils.isNotEmpty(feature.getVideoFeature()) && MapUtils.isNotEmpty(feature.getVideoFeature().get(item.getVideoId() + ""))) {
|
|
|
video.getMetaFeatureMap().putAll(feature.getVideoFeature().get(item.getVideoId() + ""));
|
|
|
}
|
|
|
- if (MapUtils.isNotEmpty(videoBaseInfoMap) && MapUtils.isNotEmpty(videoBaseInfoMap.get(item.getVideoId() + ""))) {
|
|
|
- video.getMetaFeatureMap().putAll(videoBaseInfoMap.get(item.getVideoId() + ""));
|
|
|
- }
|
|
|
- if (MapUtils.isNotEmpty(headVideoInfo)) {
|
|
|
- video.getMetaFeatureMap().put("head_video", headVideoInfo);
|
|
|
- }
|
|
|
if (MapUtils.isNotEmpty(feature.getUserFeature())) {
|
|
|
video.getMetaFeatureMap().putAll(feature.getUserFeature());
|
|
|
}
|
|
@@ -338,114 +339,7 @@ public class RankStrategy4RegionMergeModelV562 extends RankStrategy4RegionMergeM
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- private Map<String, String> getVideoOneTypeInfo(String vid, String name,
|
|
|
- Map<String, Map<String, Map<String, String>>> videoAllInfoMap) {
|
|
|
- if (null == videoAllInfoMap) {
|
|
|
- return new HashMap<>();
|
|
|
- }
|
|
|
- return videoAllInfoMap.getOrDefault(vid, new HashMap<>()).getOrDefault(name, new HashMap<>());
|
|
|
- }
|
|
|
-
|
|
|
- private double getVideoOneInfo(String name, Map<String, String> infoMap) {
|
|
|
- if (null == infoMap) {
|
|
|
- return 0.0;
|
|
|
- }
|
|
|
- return infoMap.isEmpty() ? 0 : Double.parseDouble(infoMap.getOrDefault(name, "0.0"));
|
|
|
- }
|
|
|
-
|
|
|
- private void addVideoStatFeature(String vid, Map<String, Map<String, Map<String, String>>> videoAllInfoMap,
|
|
|
- Map<String, Double> featureMap) {
|
|
|
- List<Tuple3> vidStatInfo = Arrays.asList(
|
|
|
- new Tuple3("b20", shortPeriod, getVideoOneTypeInfo(vid, "alg_cate2_feature", videoAllInfoMap)),
|
|
|
- new Tuple3("b21", shortPeriod, getVideoOneTypeInfo(vid, "alg_cate1_feature", videoAllInfoMap)),
|
|
|
- new Tuple3("b22", shortPeriod, getVideoOneTypeInfo(vid, "alg_vid_source_feature", videoAllInfoMap)),
|
|
|
- new Tuple3("b28", shortPeriod, getVideoOneTypeInfo(vid, "alg_sence_type_feature", videoAllInfoMap)),
|
|
|
- new Tuple3("b29", shortPeriod, getVideoOneTypeInfo(vid, "alg_videoid_feature", videoAllInfoMap)),
|
|
|
- new Tuple3("b23", middlePeriod, getVideoOneTypeInfo(vid, "alg_cate2_feature_day", videoAllInfoMap)),
|
|
|
- new Tuple3("b24", middlePeriod, getVideoOneTypeInfo(vid, "alg_cate1_feature_day", videoAllInfoMap)),
|
|
|
- new Tuple3("b25", middlePeriod, getVideoOneTypeInfo(vid, "alg_video_source_feature_day", videoAllInfoMap)),
|
|
|
- new Tuple3("b26", longPeriod, getVideoOneTypeInfo(vid, "alg_video_unionid_feature_day", videoAllInfoMap)),
|
|
|
- new Tuple3("b27", longPeriod, getVideoOneTypeInfo(vid, "alg_vid_feature_day", videoAllInfoMap))
|
|
|
- );
|
|
|
- for (Tuple3 tuple3 : vidStatInfo) {
|
|
|
- String infoType = tuple3.first;
|
|
|
- List<String> infoPeriod = tuple3.second;
|
|
|
- Map<String, String> infoMap = tuple3.third;
|
|
|
- for (String period : infoPeriod) {
|
|
|
- double share = getVideoOneInfo("share_" + period, infoMap);
|
|
|
- double return_ = getVideoOneInfo("return_" + period, infoMap);
|
|
|
- double view_hasreturn = getVideoOneInfo("view_hasreturn_" + period, infoMap);
|
|
|
- double share_hasreturn = getVideoOneInfo("share_hasreturn_" + period, infoMap);
|
|
|
- double ros = getVideoOneInfo("ros_" + period, infoMap);
|
|
|
- double rov = getVideoOneInfo("rov_" + period, infoMap);
|
|
|
- double r_cnt = getVideoOneInfo("r_cnt_" + period, infoMap);
|
|
|
- double r_rate = getVideoOneInfo("r_rate_" + period, infoMap);
|
|
|
- double r_cnt4s = getVideoOneInfo("r_cnt4s_" + period, infoMap);
|
|
|
- double str = getVideoOneInfo("str_" + period, infoMap);
|
|
|
-
|
|
|
- featureMap.put(infoType + "_" + period + "_" + "share", ExtractorUtils.calLog(share));
|
|
|
- featureMap.put(infoType + "_" + period + "_" + "return", ExtractorUtils.calLog(return_));
|
|
|
- featureMap.put(infoType + "_" + period + "_" + "view_hasreturn", ExtractorUtils.calLog(view_hasreturn));
|
|
|
- featureMap.put(infoType + "_" + period + "_" + "share_hasreturn", ExtractorUtils.calLog(share_hasreturn));
|
|
|
- featureMap.put(infoType + "_" + period + "_" + "ros", ros);
|
|
|
- featureMap.put(infoType + "_" + period + "_" + "rov", rov);
|
|
|
- featureMap.put(infoType + "_" + period + "_" + "r_cnt", r_cnt);
|
|
|
- featureMap.put(infoType + "_" + period + "_" + "r_rate", r_rate);
|
|
|
- featureMap.put(infoType + "_" + period + "_" + "r_cnt4s", r_cnt4s);
|
|
|
- featureMap.put(infoType + "_" + period + "_" + "str", str);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- private void addVideoCFFeature(String vid, Map<String, Map<String, Map<String, String>>> videoAllInfoMap,
|
|
|
- Map<String, Double> featureMap) {
|
|
|
- List<Tuple3> vidCFInfo = Arrays.asList(
|
|
|
- new Tuple3("d2", cfRosList, getVideoOneTypeInfo(vid, "alg_recsys_feature_weak_cf_i2i_scene_ros", videoAllInfoMap)),
|
|
|
- new Tuple3("d3", cfRosList, getVideoOneTypeInfo(vid, "alg_recsys_feature_cf_i2i_scene_ros", videoAllInfoMap)),
|
|
|
- new Tuple3("d4", cfRovList, getVideoOneTypeInfo(vid, "alg_recsys_feature_weak_cf_i2i_scene_rov", videoAllInfoMap)),
|
|
|
- new Tuple3("d5", cfRovList, getVideoOneTypeInfo(vid, "alg_recsys_feature_cf_i2i_scene_rov", videoAllInfoMap))
|
|
|
- );
|
|
|
- for (Tuple3 tuple3 : vidCFInfo) {
|
|
|
- String infoType = tuple3.first;
|
|
|
- List<String> valTypeList = tuple3.second;
|
|
|
- Map<String, String> infoMap = tuple3.third;
|
|
|
- if (!infoMap.isEmpty()) {
|
|
|
- for (String valType : valTypeList) {
|
|
|
- double exp = getVideoOneInfo("exp", infoMap);
|
|
|
- double return_n = getVideoOneInfo("return_n", infoMap);
|
|
|
- double value = getVideoOneInfo(valType, infoMap);
|
|
|
-
|
|
|
- featureMap.put(infoType + "_exp", ExtractorUtils.calLog(exp));
|
|
|
- featureMap.put(infoType + "_return_n", ExtractorUtils.calLog(return_n));
|
|
|
- featureMap.put(infoType + "_" + valType, value);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- private void addVideoSimFeature(Map<String, String> headInfo, Map<String, String> rankInfo, Map<String, Double> featureMap) {
|
|
|
- if (!headInfo.isEmpty() && !rankInfo.isEmpty()) {
|
|
|
- List<Future<Pair<String, Double>>> futures = new ArrayList<>();
|
|
|
- for (String attr : videoSimAttrs) {
|
|
|
- String headAttr = headInfo.getOrDefault(attr, "");
|
|
|
- String rankAttr = rankInfo.getOrDefault(attr, "");
|
|
|
- if (!"".equals(headAttr) && !"".equals(rankAttr)) {
|
|
|
- String key = "video_sim_" + attr;
|
|
|
- Future<Pair<String, Double>> future = ThreadPoolFactory.defaultPool().submit(() -> {
|
|
|
- double simScore = SimilarityUtils.word2VecSimilarity(headAttr, rankAttr);
|
|
|
- return Pair.create(key, simScore);
|
|
|
- });
|
|
|
- futures.add(future);
|
|
|
- }
|
|
|
- }
|
|
|
- try {
|
|
|
- for (Future<Pair<String, Double>> future : futures) {
|
|
|
- Pair<String, Double> pair = future.get(1000, TimeUnit.MILLISECONDS);
|
|
|
- featureMap.put(pair.getFirst(), pair.getSecond());
|
|
|
- }
|
|
|
- } catch (Exception e) {
|
|
|
- log.error("video attr similarity error", e);
|
|
|
- }
|
|
|
- }
|
|
|
+ private double calcHasReturnRovScore(Map<String, String> feature){
|
|
|
+ return Double.parseDouble(feature.getOrDefault("rov", "0"));
|
|
|
}
|
|
|
}
|