|
@@ -17,12 +17,7 @@ import org.springframework.beans.factory.annotation.Autowired;
|
|
|
import org.springframework.beans.factory.annotation.Value;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
|
|
|
-import java.io.BufferedReader;
|
|
|
-import java.io.IOException;
|
|
|
-import java.io.InputStream;
|
|
|
-import java.io.InputStreamReader;
|
|
|
import java.util.*;
|
|
|
-import java.util.concurrent.Callable;
|
|
|
import java.util.concurrent.Future;
|
|
|
import java.util.concurrent.TimeUnit;
|
|
|
import java.util.stream.Collectors;
|
|
@@ -36,10 +31,7 @@ public class RankStrategy4RegionMergeModelV563 extends RankStrategy4RegionMergeM
|
|
|
@Autowired
|
|
|
private FeatureService featureService;
|
|
|
|
|
|
- Map<String, double[]> bucketsMap = new HashMap<>();
|
|
|
- Map<String, Double> bucketsLen = new HashMap<>();
|
|
|
-
|
|
|
- @Value("${similarity.concurrent: false}")
|
|
|
+ @Value("${similarity.concurrent: true}")
|
|
|
private boolean similarityConcurrent;
|
|
|
|
|
|
@Override
|
|
@@ -67,13 +59,13 @@ public class RankStrategy4RegionMergeModelV563 extends RankStrategy4RegionMergeM
|
|
|
List<Video> rovRecallRank = new ArrayList<>(v0);
|
|
|
//-------------------return相似召回------------------
|
|
|
List<Video> v6 = extractAndSort(param, ReturnVideoRecallStrategy.PUSH_FORM);
|
|
|
- v6 = v6.stream().filter(r-> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
|
|
|
+ v6 = v6.stream().filter(r -> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
|
|
|
v6 = v6.subList(0, Math.min(mergeWeight.getOrDefault("v6", 5.0).intValue(), v6.size()));
|
|
|
rovRecallRank.addAll(v6);
|
|
|
setVideo.addAll(v6.stream().map(Video::getVideoId).collect(Collectors.toSet()));
|
|
|
//-------------------新地域召回------------------
|
|
|
- List<Video> v1 = extractAndSort(param, RegionRealtimeRecallStrategyV1_sort.PUSH_FORM);
|
|
|
- v1 = v1.stream().filter(r-> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
|
|
|
+ List<Video> v1 = extractAndSort(param, RegionRealtimeRecallStrategyV1.PUSH_FORM);
|
|
|
+ v1 = v1.stream().filter(r -> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
|
|
|
v1 = v1.subList(0, Math.min(mergeWeight.getOrDefault("v1", 5.0).intValue(), v1.size()));
|
|
|
rovRecallRank.addAll(v1);
|
|
|
setVideo.addAll(v1.stream().map(Video::getVideoId).collect(Collectors.toSet()));
|
|
@@ -238,7 +230,12 @@ public class RankStrategy4RegionMergeModelV563 extends RankStrategy4RegionMergeM
|
|
|
String tags = c34567Map.getOrDefault(key, "");
|
|
|
if (!tags.isEmpty()) {
|
|
|
Future<Pair<String, Double[]>> future = ThreadPoolFactory.defaultPool().submit(() -> {
|
|
|
- Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
|
|
|
+ Double[] doubles = null;
|
|
|
+ if (param.getAbExpCodes().contains(word2vecExp)) {
|
|
|
+ doubles = ExtractorUtils.funcC34567ForTagsNew(tags, title);
|
|
|
+ } else {
|
|
|
+ doubles = ExtractorUtils.funcC34567ForTags(tags, title);
|
|
|
+ }
|
|
|
return Pair.create(key, doubles);
|
|
|
});
|
|
|
futures.add(future);
|
|
@@ -260,7 +257,12 @@ public class RankStrategy4RegionMergeModelV563 extends RankStrategy4RegionMergeM
|
|
|
for (String key_time : Arrays.asList("tags_1d", "tags_3d", "tags_7d")) {
|
|
|
String tags = c34567Map.getOrDefault(name + "_" + key_time, "");
|
|
|
if (!tags.isEmpty()) {
|
|
|
- Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
|
|
|
+ Double[] doubles = null;
|
|
|
+ if (param.getAbExpCodes().contains(word2vecExp)) {
|
|
|
+ doubles = ExtractorUtils.funcC34567ForTagsNew(tags, title);
|
|
|
+ } else {
|
|
|
+ doubles = ExtractorUtils.funcC34567ForTags(tags, title);
|
|
|
+ }
|
|
|
featureMap.put(name + "_" + key_time + "_matchnum", doubles[0]);
|
|
|
featureMap.put(name + "_" + key_time + "_maxscore", doubles[1]);
|
|
|
featureMap.put(name + "_" + key_time + "_avgscore", doubles[2]);
|
|
@@ -328,32 +330,40 @@ public class RankStrategy4RegionMergeModelV563 extends RankStrategy4RegionMergeM
|
|
|
item.featureMap = featureMap;
|
|
|
}
|
|
|
|
|
|
- // 3 排序
|
|
|
- Map<String, String> sceneFeatureMap = new HashMap<>(0);
|
|
|
|
|
|
- List<RankItem> items = ScorerUtils.getScorerPipeline("feeds_score_config_20240807.conf")
|
|
|
- .scoring(sceneFeatureMap, userFeatureMap, rankItems);
|
|
|
- String redisScoreKey = mergeWeight.getOrDefault("redisScoreKey", 0.0) < 0.5 ? "redis:vid_hasreturn_rov:" : "redis:vid_hasreturn_rov_7d:";
|
|
|
- Map<String, Map<String, String>> vid2MapFeature = this.getVideoRedisFeature(vids, redisScoreKey);
|
|
|
- List<Video> result = new ArrayList<>();
|
|
|
- String hasReturnRovKey = mergeWeight.getOrDefault("hasReturnRovKey", 1.0) < 0.5 ? "rate_1" : "rate_n";
|
|
|
- Double chooseFunction = mergeWeight.getOrDefault("chooseFunction", 0.0);
|
|
|
+ // vovh24特征
|
|
|
+ String partition = redisTemplate.opsForValue().get("redis:vid_vovh24pred_time:partition");
|
|
|
+ Map<String, Map<String, String>> vid2VovFeatureMap = this.getVideoRedisFeature(vids, "redis:vid_vovh24pred_time:" + partition + ":");
|
|
|
+ for (RankItem rankItem : rankItems) {
|
|
|
+ if (vid2VovFeatureMap.containsKey(String.valueOf(rankItem.getVideoId()))) {
|
|
|
+ rankItem.getFeatureMap().putAll(vid2VovFeatureMap.get(String.valueOf(rankItem.getVideoId())));
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
+ // 4 排序模型计算
|
|
|
+ Map<String, String> sceneFeatureMap = new HashMap<>(0);
|
|
|
+ sceneFeatureMap.put("weightKey", partition.substring(partition.length() - 2));
|
|
|
+ List<RankItem> items = ScorerUtils.getScorerPipeline("feeds_score_config_20241107.conf").scoring(sceneFeatureMap, userFeatureMap, rankItems);
|
|
|
+ // 5 排序公式特征
|
|
|
+ Map<String, Map<String, String>> vid2MapFeature = this.getVideoRedisFeature(vids, "redis:vid_hasreturn_rov:");
|
|
|
+ double alpha_vov = mergeWeight.getOrDefault("alpha_vov", 0.05);
|
|
|
+ double func = mergeWeight.getOrDefault("func", 1.0);
|
|
|
+ List<Video> result = new ArrayList<>();
|
|
|
for (RankItem item : items) {
|
|
|
+ item.getScoresMap().put("alpha_vov", alpha_vov);
|
|
|
double score = 0.0;
|
|
|
- double hasReturnRovScore = Double.parseDouble(vid2MapFeature.getOrDefault(item.getVideoId() + "", new HashMap<>())
|
|
|
- .getOrDefault(hasReturnRovKey, "0"));
|
|
|
- item.getScoresMap().put("hasReturnRovScore", hasReturnRovScore);
|
|
|
double fmRovOrigin = item.getScoreRov();
|
|
|
item.getScoresMap().put("fmRovOrigin", fmRovOrigin);
|
|
|
double fmRov = restoreScore(fmRovOrigin);
|
|
|
item.getScoresMap().put("fmRov", fmRov);
|
|
|
- if (chooseFunction == 0){
|
|
|
- score = fmRov * (1 + hasReturnRovScore);
|
|
|
- }else if (chooseFunction == 1){
|
|
|
- score = fmRov * (1 + Math.log(hasReturnRovScore + 1));
|
|
|
- }else {
|
|
|
- score = fmRov * ExtractorUtils.sigmoid(hasReturnRovScore);
|
|
|
+ double hasReturnRovScore = Double.parseDouble(vid2MapFeature.getOrDefault(item.getVideoId() + "", new HashMap<>()).getOrDefault("rate_n", "0"));
|
|
|
+ item.getScoresMap().put("hasReturnRovScore", hasReturnRovScore);
|
|
|
+ double vovScore = item.getVovScore();
|
|
|
+ item.getScoresMap().put("vovScore", vovScore);
|
|
|
+ if (func == 1) {
|
|
|
+ score = fmRov * (1 + hasReturnRovScore) + alpha_vov * vovScore;
|
|
|
+ } else {
|
|
|
+ score = fmRov * (1 + hasReturnRovScore) * (1.0 + alpha_vov * vovScore);
|
|
|
}
|
|
|
|
|
|
Video video = item.getVideo();
|
|
@@ -373,42 +383,29 @@ public class RankStrategy4RegionMergeModelV563 extends RankStrategy4RegionMergeM
|
|
|
result.add(video);
|
|
|
}
|
|
|
result.sort(Comparator.comparingDouble(o -> -o.getSortScore()));
|
|
|
-
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
- public void readBucketFile() {
|
|
|
- InputStream resourceStream = RankStrategy4RegionMergeModelV552.class.getClassLoader().getResourceAsStream("20240609_bucket_274.txt");
|
|
|
- if (resourceStream != null) {
|
|
|
- try (BufferedReader reader = new BufferedReader(new InputStreamReader(resourceStream))) {
|
|
|
- Map<String, double[]> bucketsMap = new HashMap<>();
|
|
|
- Map<String, Double> bucketsLen = new HashMap<>();
|
|
|
- String line;
|
|
|
- while ((line = reader.readLine()) != null) {
|
|
|
- // 替换空格和换行符,过滤空行
|
|
|
- line = line.replace(" ", "").replaceAll("\n", "");
|
|
|
- if (!line.isEmpty()) {
|
|
|
- String[] rList = line.split("\t");
|
|
|
- if (rList.length == 3) {
|
|
|
- String key = rList[0];
|
|
|
- double value1 = Double.parseDouble(rList[1]);
|
|
|
- bucketsLen.put(key, value1);
|
|
|
- double[] value2 = Arrays.stream(rList[2].split(","))
|
|
|
- .mapToDouble(Double::valueOf)
|
|
|
- .toArray();
|
|
|
- bucketsMap.put(key, value2);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- this.bucketsMap = bucketsMap;
|
|
|
- this.bucketsLen = bucketsLen;
|
|
|
- } catch (IOException e) {
|
|
|
- log.error("something is wrong in parse bucket file:" + e);
|
|
|
- }
|
|
|
- } else {
|
|
|
- log.error("no bucket file");
|
|
|
+ public double calVovScore(RankItem item, Map<String, Map<String, String>> vid2VovFeatureMap) {
|
|
|
+ String id = item.getVideoId() + "";
|
|
|
+ Map<String, String> featureMap = vid2VovFeatureMap.getOrDefault(id, new HashMap<>());
|
|
|
+ double numerator = 0D;
|
|
|
+ final Set<String> ups = new HashSet<>(Arrays.asList(
|
|
|
+ "1_vovh0分子", "2_vovh1分子", "3_vovh2分子", "4_vovh3分子", "7_vovh6分子", "13_vovh12分子", "25_vovh24分子", "2_vovd1分子"
|
|
|
+ ));
|
|
|
+ for (String key : ups) {
|
|
|
+ numerator += Double.parseDouble(featureMap.getOrDefault(key, "0"));
|
|
|
}
|
|
|
+ double denominator = 0D;
|
|
|
+ final Set<String> downs = new HashSet<>(Arrays.asList(
|
|
|
+ "1_vovh分母", "2_vovh分母", "3_vovh分母", "4_vovh分母", "7_vovh分母", "13_vovh分母", "25_vovh分母", "2_vovd分母"
|
|
|
+ ));
|
|
|
+ for (String key : downs) {
|
|
|
+ denominator += Double.parseDouble(featureMap.getOrDefault(key, "0"));
|
|
|
+ }
|
|
|
+ item.getScoresMap().put("numerator", numerator);
|
|
|
+ item.getScoresMap().put("denominator", denominator);
|
|
|
+ return denominator != 0.0 ? numerator / denominator : 0.0;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
}
|