zhaohaipeng преди 1 месец
родител
ревизия
acc39ed1e6

+ 49 - 5
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/FeatureService.java

@@ -7,6 +7,7 @@ import com.tzld.piaoquan.recommend.server.remote.FeatureV2RemoteService;
 import com.tzld.piaoquan.recommend.server.util.JSONUtils;
 import lombok.Data;
 import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.MapUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Service;
@@ -163,16 +164,16 @@ public class FeatureService {
             protos.add(genWithVidAndProvince("alg_vid_feature_feed_province_root_return_v2", vid, province));
 
             // headvid + vid
-            //protos.add(genWithKeyMap("alg_recsys_feature_cf_i2i_new_v2", vid, ImmutableMap.of("vid_a", headVid, "vid_b", vid)));
+            // protos.add(genWithKeyMap("alg_recsys_feature_cf_i2i_new_v2", vid, ImmutableMap.of("vid_a", headVid, "vid_b", vid)));
 
             // ********************* new vid ******************
             protos.add(genWithKeyMap("alg_vid_feature_day", vid, ImmutableMap.of("vid", vid)));
             protos.add(genWithKeyMap("alg_sence_type_feature", vid, ImmutableMap.of("sence_type", sceneType, "videoid", vid)));
             protos.add(genWithKeyMap("alg_videoid_feature", vid, ImmutableMap.of("videoid", vid)));
-            //protos.add(genWithKeyMap("alg_recsys_feature_cf_i2i_scene_rov", vid, ImmutableMap.of("sence_type", i2iSceneType, "vid_a", headVid, "vid_b", vid)));
-            //protos.add(genWithKeyMap("alg_recsys_feature_cf_i2i_scene_ros", vid, ImmutableMap.of("sence_type", i2iSceneType, "vid_a", headVid, "vid_b", vid)));
-            //protos.add(genWithKeyMap("alg_recsys_feature_weak_cf_i2i_scene_rov", vid, ImmutableMap.of("sence_type", i2iSceneType, "vid_a", headVid, "vid_b", vid)));
-            //protos.add(genWithKeyMap("alg_recsys_feature_weak_cf_i2i_scene_ros", vid, ImmutableMap.of("sence_type", i2iSceneType, "vid_a", headVid, "vid_b", vid)));
+            // protos.add(genWithKeyMap("alg_recsys_feature_cf_i2i_scene_rov", vid, ImmutableMap.of("sence_type", i2iSceneType, "vid_a", headVid, "vid_b", vid)));
+            // protos.add(genWithKeyMap("alg_recsys_feature_cf_i2i_scene_ros", vid, ImmutableMap.of("sence_type", i2iSceneType, "vid_a", headVid, "vid_b", vid)));
+            // protos.add(genWithKeyMap("alg_recsys_feature_weak_cf_i2i_scene_rov", vid, ImmutableMap.of("sence_type", i2iSceneType, "vid_a", headVid, "vid_b", vid)));
+            // protos.add(genWithKeyMap("alg_recsys_feature_weak_cf_i2i_scene_ros", vid, ImmutableMap.of("sence_type", i2iSceneType, "vid_a", headVid, "vid_b", vid)));
             if (null != videoBaseInfoMap && videoBaseInfoMap.containsKey(vid)) {
                 Map<String, Map<String, String>> videoInfo = videoBaseInfoMap.get(vid);
                 if (null != videoInfo && videoInfo.containsKey("alg_vid_feature_basic_info")) {
@@ -216,6 +217,49 @@ public class FeatureService {
         return getFeatureByProto(protos);
     }
 
+    public Feature getFeatureByNewLabel(String appType, String hotSceneType, String province, String brand, String mid, String headVideoId, List<String> vidList, Map<String, Map<String, Map<String, String>>> videoBaseInfoMap) {
+
+        List<FeatureKeyProto> protos = new ArrayList<>();
+        // 视频维度的特征
+        for (String vid : vidList) {
+            protos.add(this.genWithKeyMap("alg_vid_global_feature_20250212", vid, ImmutableMap.of("vid", vid)));
+            protos.add(this.genWithKeyMap("alg_vid_recommend_exp_feature_20250212", vid, ImmutableMap.of("vid", vid)));
+            protos.add(this.genWithKeyMap("alg_vid_recommend_flowpool_exp_feature_20250212", vid, ImmutableMap.of("vid", vid)));
+            protos.add(this.genWithKeyMap("alg_vid_apptype_recommend_exp_feature_20250212", vid, ImmutableMap.of("vid", vid, "apptype", appType)));
+            protos.add(this.genWithKeyMap("alg_vid_province_recommend_exp_feature_20250212", vid, ImmutableMap.of("vid", vid, "province", province)));
+            protos.add(this.genWithKeyMap("alg_vid_brand_recommend_exp_feature_20250212", vid, ImmutableMap.of("vid", vid, "brand", brand)));
+            protos.add(this.genWithKeyMap("alg_vid_hotsencetype_recommend_exp_feature_20250212", vid, ImmutableMap.of("vid", vid, "hotscenetype", hotSceneType)));
+            protos.add(this.genWithKeyMap("scene_type_vid_cf_feature_20250212", vid, ImmutableMap.of("vid_a", headVideoId, "vid_b", vid, "sence_type", hotSceneType)));
+            protos.add(this.genWithKeyMap("vid_click_cf_feature_20250212", vid, ImmutableMap.of("vid_a", headVideoId, "vid_b", vid)));
+            protos.add(this.genWithKeyMap("alg_recsys_feature_cf_i2i_v2", vid, ImmutableMap.of("vid_a", headVideoId, "vid_b", vid)));
+
+            if (MapUtils.isNotEmpty(videoBaseInfoMap)) {
+                Map<String, Map<String, String>> baseInfo = videoBaseInfoMap.getOrDefault(vid, new HashMap<>());
+                Map<String, String> featureMap = baseInfo.getOrDefault("alg_vid_feature_basic_info", new HashMap<>());
+                protos.add(this.genWithKeyMap("alg_channel_recommend_exp_feature_20250212", vid, ImmutableMap.of("channel", featureMap.getOrDefault("channel", "unknown"))));
+                protos.add(this.genWithKeyMap("alg_merge_cate1_recommend_exp_feature_20250212", vid, ImmutableMap.of("merge_cate1", featureMap.getOrDefault("merge_first_level_cate", "unknown"))));
+                protos.add(this.genWithKeyMap("alg_merge_cate2_recommend_exp_feature_20250212", vid, ImmutableMap.of("merge_cate2", featureMap.getOrDefault("merge_second_level_cate", "unknown"))));
+                protos.add(this.genWithKeyMap("alg_video_unionid_recommend_exp_feature_20250212", vid, ImmutableMap.of("video_unionid", featureMap.getOrDefault("title_time_w_h_unionid", "unknown"))));
+
+                protos.add(this.genWithKeyMap("mid_merge_cate1_feature_20250212", vid, ImmutableMap.of("mid", mid, "merge_cate1", featureMap.getOrDefault("merge_first_level_cate", "unknown"))));
+                protos.add(this.genWithKeyMap("mid_merge_cate2_feature_20250212", vid, ImmutableMap.of("mid", mid, "merge_cate2", featureMap.getOrDefault("merge_second_level_cate", "unknown"))));
+
+            }
+        }
+
+
+        // 用户维度特征
+        protos.add(this.genWithKeyMap("mid_global_feature_20250212", mid, ImmutableMap.of("mid", mid)));
+        protos.add(this.genWithKeyMap("mid_u2u_friend_index_feature_20250212", mid, ImmutableMap.of("mid", mid)));
+        protos.add(this.genWithKeyMap("alg_mid_feature_return_tags", mid, ImmutableMap.of("mid", mid)));
+        protos.add(this.genWithKeyMap("alg_mid_feature_share_tags", mid, ImmutableMap.of("mid", mid)));
+        protos.add(this.genWithKeyMap("alg_mid_feature_sharecf", mid, ImmutableMap.of("mid", mid)));
+        protos.add(this.genWithKeyMap("alg_mid_feature_returncf", mid, ImmutableMap.of("mid", mid)));
+        protos.add(this.genWithKeyMap("alg_recsys_feature_user_share_return_stat", mid, ImmutableMap.of("mid", mid)));
+
+        return this.getFeatureByProto(protos);
+    }
+
     private Feature getFeatureByProto(List<FeatureKeyProto> protos) {
         Map<String, String> result = remoteService.getFeature(protos);
         Feature feature = new Feature();

+ 72 - 186
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/rank/strategy/RankStrategy4RegionMergeModelV565.java

@@ -10,6 +10,7 @@ import com.tzld.piaoquan.recommend.server.service.rank.extractor.ExtractorUtils;
 import com.tzld.piaoquan.recommend.server.service.recall.strategy.*;
 import com.tzld.piaoquan.recommend.server.service.score.ScorerUtils;
 import com.tzld.piaoquan.recommend.server.util.CommonCollectionUtils;
+import com.tzld.piaoquan.recommend.server.util.ExtractFeature20250218;
 import com.tzld.piaoquan.recommend.server.util.FeatureBucketUtils;
 import com.tzld.piaoquan.recommend.server.util.SimilarityUtils;
 import lombok.extern.slf4j.Slf4j;
@@ -89,215 +90,100 @@ public class RankStrategy4RegionMergeModelV565 extends RankStrategy4RegionMergeM
         String headVid = String.valueOf(param.getHeadVid());
         String sceneType = String.valueOf(param.getHotSceneType());
         Map<String, Map<String, Map<String, String>>> videoBaseInfoMap = featureService.getVideoBaseInfo(headVid, vids);
-        FeatureService.Feature feature = featureService.getNewFeature(provinceCn, param.getMid(), sceneType, headVid, videoBaseInfoMap, vids);
+        String appType = String.valueOf(param.getAppType());
+        String brand = "";
+        if (Objects.nonNull(param.getMachineInfo())) {
+            brand = param.getMachineInfo().getBrand();
+        }
+
+        long ts = System.currentTimeMillis() / 1000;
+
+        FeatureService.Feature feature = featureService.getFeatureByNewLabel(appType, sceneType, provinceCn, brand, param.getMid(), param.getHeadVid().toString(), vids, videoBaseInfoMap);
         Map<String, Map<String, String>> featureOriginUser = feature.getUserFeature();
         Map<String, Map<String, Map<String, String>>> featureOriginVideo = feature.getVideoFeature();
         Map<String, String> headVideoInfo = videoBaseInfoMap.getOrDefault(headVid, new HashMap<>()).getOrDefault("alg_vid_feature_basic_info", new HashMap<>());
 
         // 2 特征处理
         Map<String, Double> userFeatureMapDouble = new HashMap<>();
-        String mid = param.getMid();
-        Map<String, String> c1 = featureOriginUser.getOrDefault("alg_mid_feature_play", new HashMap<>());
-        Map<String, String> c2 = featureOriginUser.getOrDefault("alg_mid_feature_share_and_return", new HashMap<>());
-        Map<String, String> c3 = featureOriginUser.getOrDefault("alg_mid_feature_play_tags", new HashMap<>());
-        Map<String, String> c4 = featureOriginUser.getOrDefault("alg_mid_feature_return_tags", new HashMap<>());
-        Map<String, String> c5 = featureOriginUser.getOrDefault("alg_mid_feature_share_tags", new HashMap<>());
-        Map<String, String> c6 = featureOriginUser.getOrDefault("alg_mid_feature_feed_exp_share_tags_v2", new HashMap<>());
-        Map<String, String> c7 = featureOriginUser.getOrDefault("alg_mid_feature_feed_exp_return_tags_v2", new HashMap<>());
-        Map<String, String> c8 = featureOriginUser.getOrDefault("alg_mid_feature_sharecf", new HashMap<>());
-        Map<String, String> c9 = featureOriginUser.getOrDefault("alg_mid_feature_returncf", new HashMap<>());
-
-        if (!c1.isEmpty()) {
-            userFeatureMapDouble.put("playcnt_6h", Double.parseDouble(c1.getOrDefault("playcnt_6h", "0")));
-            userFeatureMapDouble.put("playcnt_1d", Double.parseDouble(c1.getOrDefault("playcnt_1d", "0")));
-            userFeatureMapDouble.put("playcnt_3d", Double.parseDouble(c1.getOrDefault("playcnt_3d", "0")));
-            userFeatureMapDouble.put("playcnt_7d", Double.parseDouble(c1.getOrDefault("playcnt_7d", "0")));
-        }
-        if (!c2.isEmpty()) {
-            userFeatureMapDouble.put("share_pv_12h", Double.parseDouble(c2.getOrDefault("share_pv_12h", "0")));
-            userFeatureMapDouble.put("share_pv_1d", Double.parseDouble(c2.getOrDefault("share_pv_1d", "0")));
-            userFeatureMapDouble.put("share_pv_3d", Double.parseDouble(c2.getOrDefault("share_pv_3d", "0")));
-            userFeatureMapDouble.put("share_pv_7d", Double.parseDouble(c2.getOrDefault("share_pv_7d", "0")));
-            userFeatureMapDouble.put("return_uv_12h", Double.parseDouble(c2.getOrDefault("return_uv_12h", "0")));
-            userFeatureMapDouble.put("return_uv_1d", Double.parseDouble(c2.getOrDefault("return_uv_1d", "0")));
-            userFeatureMapDouble.put("return_uv_3d", Double.parseDouble(c2.getOrDefault("return_uv_3d", "0")));
-            userFeatureMapDouble.put("return_uv_7d", Double.parseDouble(c2.getOrDefault("return_uv_7d", "0")));
-        }
-
-        Map<String, String> c34567Map = new HashMap<>(15);
-        List<Tuple2> tmpList0 = Arrays.asList(
-                new Tuple2(c3, "c3_feature"),
-                new Tuple2(c4, "c4_feature"),
-                new Tuple2(c5, "c5_feature"),
-                new Tuple2(c6, "c6_feature"),
-                new Tuple2(c7, "c7_feature")
-        );
-        for (Tuple2 tuple2 : tmpList0) {
-            for (String key_time : Arrays.asList("tags_1d", "tags_3d", "tags_7d")) {
-                String tags = tuple2.first.getOrDefault(key_time, "");
-                if (!tags.isEmpty()) {
-                    c34567Map.put(tuple2.name + "_" + key_time, tags);
-                }
-            }
-        }
 
-        Map<String, Map<String, String[]>> c89Map = new HashMap<>(4);
-        List<Tuple2> tmpList1 = Arrays.asList(
-                new Tuple2(c8, "c8_feature"),
-                new Tuple2(c9, "c9_feature")
-        );
-        for (Tuple2 tuple2 : tmpList1) {
-            for (String key_action : Arrays.asList("share", "return")) {
-                String cfListStr = tuple2.first.getOrDefault(key_action, "");
-                if (!cfListStr.isEmpty()) {
-                    Map<String, String[]> cfMap = new HashMap<>();
-                    String[] entries = cfListStr.split(",");
-                    for (String entry : entries) {
-                        String[] rList = entry.split(":");
-                        if (rList.length >= 4) { // 确保分割后有四个元素
-                            String key = rList[0];
-                            String value1 = rList[1];
-                            String value2 = rList[2];
-                            String value3 = rList[3];
-                            String[] strs = {value1, value2, value3};
-                            cfMap.put(key, strs);
-                        }
-                    }
-                    c89Map.put(tuple2.name + "_" + key_action, cfMap);
-                }
-            }
-        }
+        Map<String, String> c1 = featureOriginUser.getOrDefault("mid_global_feature_20250212", new HashMap<>());
+        Map<String, String> c4 = featureOriginUser.getOrDefault("mid_u2u_friend_index_feature_20250212", new HashMap<>());
+        Map<String, String> c5 = featureOriginUser.getOrDefault("alg_mid_feature_return_tags", new HashMap<>());
+        Map<String, String> c6 = featureOriginUser.getOrDefault("alg_mid_feature_share_tags", new HashMap<>());
+        Map<String, String> c7 = featureOriginUser.getOrDefault("alg_mid_feature_sharecf", new HashMap<>());
+        Map<String, String> c8 = featureOriginUser.getOrDefault("alg_mid_feature_returncf", new HashMap<>());
 
+        ExtractFeature20250218.handleC1(c1, userFeatureMapDouble);
+        ExtractFeature20250218.handleC4(c4, userFeatureMapDouble);
+        Map<String, Map<String, String[]>> c78FeatureMap = ExtractFeature20250218.handleC7ToC8(c7, c8);
 
         List<RankItem> rankItems = CommonCollectionUtils.toList(rovRecallRank, RankItem::new);
         for (RankItem item : rankItems) {
-            Map<String, Double> featureMap = new HashMap<>();
-            String vid = item.getVideoId() + "";
-            Map<String, String> b1 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_all_exp_v2", new HashMap<>());
-            Map<String, String> b2 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_all_share", new HashMap<>());
-            Map<String, String> b3 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_all_return", new HashMap<>());
-            Map<String, String> b6 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_exp2share_v2", new HashMap<>());
-            Map<String, String> b7 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_share2return", new HashMap<>());
-
-            Map<String, String> b8 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_noflow_exp_v2", new HashMap<>());
-            Map<String, String> b9 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_noflow_root_share_v2", new HashMap<>());
-            Map<String, String> b10 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_noflow_root_return_v2", new HashMap<>());
-            Map<String, String> b11 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_flow_exp_v2", new HashMap<>());
-            Map<String, String> b12 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_flow_root_share_v2", new HashMap<>());
-            Map<String, String> b13 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_flow_root_return_v2", new HashMap<>());
-            Map<String, String> b17 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_province_exp_v2", new HashMap<>());
-            Map<String, String> b18 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_province_root_share_v2", new HashMap<>());
-            Map<String, String> b19 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_province_root_return_v2", new HashMap<>());
 
-            List<Tuple4> originData = Arrays.asList(
-                    new Tuple4(b1, b2, b3, "b123"),
-                    new Tuple4(b1, b6, b7, "b167"),
-                    new Tuple4(b8, b9, b10, "b8910"),
-                    new Tuple4(b11, b12, b13, "b111213"),
-                    new Tuple4(b17, b18, b19, "b171819")
-            );
+            String vidStr = String.valueOf(item.getVideoId());
 
-            for (Tuple4 tuple4 : originData) {
-                for (String prefix2 : Arrays.asList("1h", "2h", "3h", "4h", "12h", "1d", "3d", "7d")) {
-                    double exp = tuple4.first.isEmpty() ? 0 : Double.parseDouble(tuple4.first.getOrDefault("exp_pv_" + prefix2, "0.0"));
-                    double share = tuple4.second.isEmpty() ? 0 : Double.parseDouble(tuple4.second.getOrDefault("share_pv_" + prefix2, "0.0"));
-                    double returns = tuple4.third.isEmpty() ? 0 : Double.parseDouble(tuple4.third.getOrDefault("return_uv_" + prefix2, "0.0"));
-
-                    double f1 = ExtractorUtils.calDiv(share, exp);
-                    double f2 = ExtractorUtils.calLog(share);
-                    double f3 = ExtractorUtils.calDiv(returns, exp);
-                    double f4 = ExtractorUtils.calLog(returns);
-                    double f5 = f3 * f4;
-                    double f6 = ExtractorUtils.calDiv(returns, share);
-
-                    String key1 = tuple4.name + "_" + prefix2 + "_" + "STR";
-                    String key2 = tuple4.name + "_" + prefix2 + "_" + "log(share)";
-                    String key3 = tuple4.name + "_" + prefix2 + "_" + "ROV";
-                    String key4 = tuple4.name + "_" + prefix2 + "_" + "log(return)";
-                    String key5 = tuple4.name + "_" + prefix2 + "_" + "ROV*log(return)";
-                    String key6 = tuple4.name + "_" + prefix2 + "_" + "ROS";
-
-                    featureMap.put(key1, f1);
-                    featureMap.put(key2, f2);
-                    featureMap.put(key3, f3);
-                    featureMap.put(key4, f4);
-                    featureMap.put(key5, f5);
-                    featureMap.put(key6, f6);
-                }
-            }
-
-            Map<String, String> videoInfo = videoBaseInfoMap.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_basic_info", new HashMap<>());
-            featureMap.put("total_time", Double.parseDouble(videoInfo.getOrDefault("total_time", "0")));
-            featureMap.put("bit_rate", Double.parseDouble(videoInfo.getOrDefault("bit_rate", "0")));
-
-            String title = videoInfo.getOrDefault("title", "");
-            if (!title.isEmpty()) {
-                List<Future<Pair<String, Double[]>>> futures = new ArrayList<>();
-                for (String name : Arrays.asList("c3_feature", "c4_feature", "c5_feature", "c6_feature", "c7_feature")) {
-                    for (String key_time : Arrays.asList("tags_1d", "tags_3d", "tags_7d")) {
-                        String key = name + "_" + key_time;
-                        String tags = c34567Map.getOrDefault(key, "");
-                        if (!tags.isEmpty()) {
-                            Future<Pair<String, Double[]>> future = ThreadPoolFactory.defaultPool().submit(() -> {
-                                Double[] doubles = ExtractorUtils.funcC34567ForTagsNew(tags, title);
-                                return Pair.create(key, doubles);
-                            });
-                            futures.add(future);
-                        }
-                    }
-                }
-                try {
-                    for (Future<Pair<String, Double[]>> future : futures) {
-                        Pair<String, Double[]> pair = future.get(1000, TimeUnit.MILLISECONDS);
-                        featureMap.put(pair.getFirst() + "_matchnum", pair.getSecond()[0]);
-                        featureMap.put(pair.getFirst() + "_maxscore", pair.getSecond()[1]);
-                        featureMap.put(pair.getFirst() + "_avgscore", pair.getSecond()[2]);
-                    }
-                } catch (Exception e) {
-                    log.error("concurrent similarity error", e);
-                }
-            }
-
-            if (!vid.isEmpty()) {
-                for (String key_feature : Arrays.asList("c8_feature", "c9_feature")) {
-                    for (String key_action : Arrays.asList("share", "return")) {
-                        Map<String, String[]> cfMap = c89Map.getOrDefault(key_feature + "_" + key_action, new HashMap<>());
-                        if (cfMap.containsKey(vid)) {
-                            String[] scores = cfMap.get(vid);
-                            Double score1 = Double.parseDouble(scores[0]);
-                            Double score2 = Double.parseDouble(scores[1]);
-                            Double score3 = Double.parseDouble(scores[2]) <= 0 ? 0D : 1.0 / Double.parseDouble(scores[2]);
-                            featureMap.put(key_feature + "_" + key_action + "_score", score1);
-                            featureMap.put(key_feature + "_" + key_action + "_num", score2);
-                            featureMap.put(key_feature + "_" + key_action + "_rank", score3);
-                        }
-                    }
-                }
-            }
-            Map<String, String> d1 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_recsys_feature_cf_i2i_new_v2", new HashMap<>());
-            if (!d1.isEmpty()) {
-                featureMap.put("d1_exp", Double.parseDouble(d1.getOrDefault("exp", "0")));
-                featureMap.put("d1_return_n", Double.parseDouble(d1.getOrDefault("return_n", "0")));
-                featureMap.put("d1_rovn", Double.parseDouble(d1.getOrDefault("rovn", "0")));
-            }
-            // ******************** new feature ********************
-            addVideoStatFeature(vid, featureOriginVideo, featureMap);
-            //addVideoCFFeature(vid, featureOriginVideo, featureMap);
-            addVideoSimFeature(headVideoInfo, videoInfo, featureMap);
+            Map<String, Double> featureMap = new HashMap<>();
+            Map<String, String> v1Feature = videoBaseInfoMap.getOrDefault(vidStr, new HashMap<>()).getOrDefault("alg_vid_feature_basic_info", new HashMap<>());
+            Map<String, Map<String, String>> videoFeatureMap = featureOriginVideo.getOrDefault(vidStr, new HashMap<>());
+            Map<String, String> b1 = videoFeatureMap.getOrDefault("alg_vid_global_feature_20250212", new HashMap<>());
+            Map<String, String> b2 = videoFeatureMap.getOrDefault("alg_vid_recommend_exp_feature_20250212", new HashMap<>());
+            Map<String, String> b3 = videoFeatureMap.getOrDefault("alg_vid_recommend_flowpool_exp_feature_20250212", new HashMap<>());
+            Map<String, String> b4 = videoFeatureMap.getOrDefault("alg_vid_apptype_recommend_exp_feature_20250212", new HashMap<>());
+            Map<String, String> b5 = videoFeatureMap.getOrDefault("alg_vid_province_recommend_exp_feature_20250212", new HashMap<>());
+            Map<String, String> b6 = videoFeatureMap.getOrDefault("alg_vid_brand_recommend_exp_feature_20250212", new HashMap<>());
+            Map<String, String> b7 = videoFeatureMap.getOrDefault("alg_vid_hotsencetype_recommend_exp_feature_20250212", new HashMap<>());
+            Map<String, String> b8 = videoFeatureMap.getOrDefault("alg_merge_cate1_recommend_exp_feature_20250212", new HashMap<>());
+            Map<String, String> b9 = videoFeatureMap.getOrDefault("alg_merge_cate2_recommend_exp_feature_20250212", new HashMap<>());
+            Map<String, String> b10 = videoFeatureMap.getOrDefault("alg_channel_recommend_exp_feature_20250212", new HashMap<>());
+            Map<String, String> b11 = videoFeatureMap.getOrDefault("alg_festive_recommend_exp_feature_20250212", new HashMap<>());
+            Map<String, String> b12 = videoFeatureMap.getOrDefault("alg_vid_long_period_recommend_exp_feature_20250212", new HashMap<>());
+            Map<String, String> b13 = videoFeatureMap.getOrDefault("alg_video_unionid_recommend_exp_feature_20250212", new HashMap<>());
+
+            Map<String, String> c2 = videoFeatureMap.getOrDefault("mid_merge_cate1_feature_20250212", new HashMap<>());
+            Map<String, String> c3 = videoFeatureMap.getOrDefault("mid_merge_cate2_feature_20250212", new HashMap<>());
+
+            Map<String, String> d1 = videoFeatureMap.getOrDefault("scene_type_vid_cf_feature_20250212", new HashMap<>());
+            Map<String, String> d2 = videoFeatureMap.getOrDefault("vid_click_cf_feature_20250212", new HashMap<>());
+            Map<String, String> d3 = videoFeatureMap.getOrDefault("alg_recsys_feature_cf_i2i_v2", new HashMap<>());
+
+            Map<String, Map<String, String>> b2ToB11AndB13Map = new HashMap<>();
+            b2ToB11AndB13Map.put("b2", b2);
+            b2ToB11AndB13Map.put("b3", b3);
+            b2ToB11AndB13Map.put("b4", b4);
+            b2ToB11AndB13Map.put("b5", b5);
+            b2ToB11AndB13Map.put("b6", b6);
+            b2ToB11AndB13Map.put("b7", b7);
+            b2ToB11AndB13Map.put("b8", b8);
+            b2ToB11AndB13Map.put("b9", b9);
+            b2ToB11AndB13Map.put("b10", b10);
+            b2ToB11AndB13Map.put("b11", b11);
+            b2ToB11AndB13Map.put("b13", b13);
+
+            ExtractFeature20250218.handleB1(b1, featureMap);
+            ExtractFeature20250218.handleB12(b12, featureMap);
+            ExtractFeature20250218.handleB2ToB11AndB13(b2ToB11AndB13Map, featureMap);
+            ExtractFeature20250218.handleC2ToC3(c2, c3, featureMap);
+            ExtractFeature20250218.useC7ToC8(c78FeatureMap, vidStr, featureMap);
+            ExtractFeature20250218.handleC5ToC6(c5, c6, v1Feature, featureMap);
+            ExtractFeature20250218.handleD1(d1, featureMap);
+            ExtractFeature20250218.handleD2(d2, featureMap);
+            ExtractFeature20250218.handleD3(d3, featureMap);
+            ExtractFeature20250218.handleVideoBasicFeature(v1Feature, ts, featureMap);
+            ExtractFeature20250218.handleVideoSimilarity(v1Feature, headVideoInfo, featureMap);
 
             item.featureMapDouble = featureMap;
         }
 
         // 3 连续值特征分桶
-        Map<String, String> userFeatureMap = FeatureBucketUtils.bucketFeature("20241209_rov_bucket.txt", userFeatureMapDouble);
+        Map<String, String> userFeatureMap = FeatureBucketUtils.bucketFeatureV2("20250218_bucket_322.txt", userFeatureMapDouble);
         for (RankItem item : rankItems) {
             Map<String, Double> featureMapDouble = item.featureMapDouble;
-            item.featureMap = FeatureBucketUtils.bucketFeature("20241209_rov_bucket.txt", featureMapDouble);
+            item.featureMap = FeatureBucketUtils.bucketFeatureV2("20250218_bucket_322.txt", featureMapDouble);
         }
         // 4 排序模型计算
-        double xgbRovNegRate = mergeWeight.getOrDefault("xgbRovNegRate", 0.02);
+        double xgbRovNegRate = mergeWeight.getOrDefault("xgbRovNegRate", 0.05);
         Map<String, String> sceneFeatureMap = new HashMap<>(0);
-        List<RankItem> items = ScorerUtils.getScorerPipeline("feeds_score_config_xgb_rov_20250109.conf").scoring(sceneFeatureMap, userFeatureMap, rankItems);
+        List<RankItem> items = ScorerUtils.getScorerPipeline("feeds_score_config_xgb_rov_20250228.conf").scoring(sceneFeatureMap, userFeatureMap, rankItems);
         // 5 排序公式特征
         Map<String, Map<String, String>> vid2MapFeature = this.getVideoRedisFeature(vids, "redis:vid_hasreturn_vor:");
         List<Video> result = new ArrayList<>();

+ 2 - 1
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/score/ScorerUtils.java

@@ -39,12 +39,13 @@ public final class ScorerUtils {
         ScorerUtils.init("feeds_score_config_xgb_20250109.conf");
         ScorerUtils.init("feeds_score_config_xgb_rov_20241209.conf");
         ScorerUtils.init("feeds_score_config_xgb_rov_20250109.conf");
+        ScorerUtils.init("feeds_score_config_xgb_rov_20250228.conf");
         ScorerUtils.init4Recall("feeds_recall_config_region_v1.conf");
         ScorerUtils.init4Recall("feeds_recall_config_region_ros.conf");
         ScorerUtils.init4Recall("feeds_score_config_bless.conf");
         ScorerUtils.init4Recall("feeds_recall_config_tomson.conf");
         ScorerUtils.init4Recall("feeds_recall_config_region_v7_longterm.conf");
-        List<String> bucketFileList = Arrays.asList("20241209_rov_bucket.txt", "20241209_nor_bucket.txt");
+        List<String> bucketFileList = Arrays.asList("20241209_rov_bucket.txt", "20241209_nor_bucket.txt", "20250218_bucket_322.txt");
         FeatureBucketUtils.init(bucketFileList);
     }
 

+ 78 - 78
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/util/ExtractFeature20250218.java

@@ -17,37 +17,37 @@ public class ExtractFeature20250218 {
     private ExtractFeature20250218() {
     }
 
-    public static void handleB1(Map<String, Object> b1Feature, Map<String, Object> featureMap) {
+    public static void handleB1(Map<String, String> b1Feature, Map<String, Double> featureMap) {
         List<String> times = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h");
         List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_1_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
         for (String time : times) {
             for (String index : indexList) {
-                double value = Double.parseDouble(b1Feature.getOrDefault(index + "_" + time, "0").toString());
+                double value = Double.parseDouble(b1Feature.getOrDefault(index + "_" + time, "0"));
                 featureMap.put("b1_" + index + "_" + time, value);
             }
 
-            double rovn = Double.parseDouble(b1Feature.getOrDefault("rovn_" + time, "0").toString());
-            double returnNUv = Double.parseDouble(b1Feature.getOrDefault("return_1_uv_" + time, "0").toString());
+            double rovn = Double.parseDouble(b1Feature.getOrDefault("rovn_" + time, "0"));
+            double returnNUv = Double.parseDouble(b1Feature.getOrDefault("return_1_uv_" + time, "0"));
 
             featureMap.put("b1_rovn*log(r)_" + time, rovn * ExtractorUtils.calLog(returnNUv));
         }
 
     }
 
-    public static void handleB2ToB11AndB13(Map<String, Map<String, Object>> videoFeature, Map<String, Object> featureMap) {
+    public static void handleB2ToB11AndB13(Map<String, Map<String, String>> videoFeature, Map<String, Double> featureMap) {
         List<String> times = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h");
         List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
-        for (Map.Entry<String, Map<String, Object>> entry : videoFeature.entrySet()) {
+        for (Map.Entry<String, Map<String, String>> entry : videoFeature.entrySet()) {
             String key = entry.getKey();
-            Map<String, Object> feature = entry.getValue();
+            Map<String, String> feature = entry.getValue();
             for (String time : times) {
                 for (String index : indexList) {
-                    double value = Double.parseDouble(feature.getOrDefault(index + "_" + time, "0").toString());
+                    double value = Double.parseDouble(feature.getOrDefault(index + "_" + time, "0"));
                     featureMap.put(key + "_" + index + "_" + time, value);
                 }
 
-                double rovn = Double.parseDouble(feature.getOrDefault("rovn_" + time, "0").toString());
-                double returnNUv = Double.parseDouble(feature.getOrDefault("return_n_uv_" + time, "0").toString());
+                double rovn = Double.parseDouble(feature.getOrDefault("rovn_" + time, "0"));
+                double returnNUv = Double.parseDouble(feature.getOrDefault("return_n_uv_" + time, "0"));
 
                 featureMap.put(key + "_rovn*log(r)_" + time, rovn * ExtractorUtils.calLog(returnNUv));
             }
@@ -55,28 +55,28 @@ public class ExtractFeature20250218 {
         }
     }
 
-    public static void handleB12(Map<String, Object> b12Feature, Map<String, Object> featureMap) {
+    public static void handleB12(Map<String, String> b12Feature, Map<String, Double> featureMap) {
         List<String> times = Arrays.asList("7d", "14d", "30d", "60d");
         List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
         for (String time : times) {
             for (String index : indexList) {
-                double value = Double.parseDouble(b12Feature.getOrDefault(index + "_" + time, "0").toString());
+                double value = Double.parseDouble(b12Feature.getOrDefault(index + "_" + time, "0"));
                 featureMap.put("b12_" + index + "_" + time, value);
             }
-            double rovn = Double.parseDouble(b12Feature.getOrDefault("rovn_" + time, "0").toString());
-            double returnNUv = Double.parseDouble(b12Feature.getOrDefault("return_n_uv_" + time, "0").toString());
+            double rovn = Double.parseDouble(b12Feature.getOrDefault("rovn_" + time, "0"));
+            double returnNUv = Double.parseDouble(b12Feature.getOrDefault("return_n_uv_" + time, "0"));
             featureMap.put("b12_rovn*log(r)_" + time, rovn * ExtractorUtils.calLog(returnNUv));
         }
     }
 
-    public static void handleVideoBasicFeature(Map<String, Object> videoFeature, long ts, Map<String, Object> featureMap) {
-        Double totalTime = Double.parseDouble(videoFeature.getOrDefault("total_time", "0").toString());
-        Double width = Double.parseDouble(videoFeature.getOrDefault("width", "0d").toString());
-        Double height = Double.parseDouble(videoFeature.getOrDefault("height", "0d").toString());
-        Double size = Double.parseDouble(videoFeature.getOrDefault("size", "0d").toString());
-        Double bit_rate = Double.parseDouble(videoFeature.getOrDefault("bit_rate", "0d").toString());
-        String festiveLabel1 = videoFeature.getOrDefault("festive_label1", "").toString();
-        String festiveLabel2 = videoFeature.getOrDefault("festive_label2", "").toString();
+    public static void handleVideoBasicFeature(Map<String, String> videoFeature, long ts, Map<String, Double> featureMap) {
+        Double totalTime = Double.parseDouble(videoFeature.getOrDefault("total_time", "0"));
+        Double width = Double.parseDouble(videoFeature.getOrDefault("width", "0d"));
+        Double height = Double.parseDouble(videoFeature.getOrDefault("height", "0d"));
+        Double size = Double.parseDouble(videoFeature.getOrDefault("size", "0d"));
+        Double bit_rate = Double.parseDouble(videoFeature.getOrDefault("bit_rate", "0d"));
+        String festiveLabel1 = videoFeature.getOrDefault("festive_label1", "");
+        String festiveLabel2 = videoFeature.getOrDefault("festive_label2", "");
 
 
         featureMap.put("total_time", totalTime);
@@ -85,78 +85,78 @@ public class ExtractFeature20250218 {
         featureMap.put("size", size);
         featureMap.put("bit_rate", bit_rate);
         featureMap.put("width/height", ExtractorUtils.divisionDouble(width, height));
-        featureMap.put("is_festive", 0);
-        featureMap.put("is_greeting", 0);
+        featureMap.put("is_festive", 0d);
+        featureMap.put("is_greeting", 0d);
         if (StringUtils.equals(festiveLabel1, "节假日")) {
-            featureMap.put("is_festive", 1);
+            featureMap.put("is_festive", 1d);
         } else if (StringUtils.equals(festiveLabel1, "问候语")) {
-            featureMap.put("is_greeting", 1);
+            featureMap.put("is_greeting", 1d);
         }
 
-        featureMap.put("hour", ExtractorUtils.getHourByTimestamp(ts));
-        featureMap.put("day_of_week", ExtractorUtils.getDayOfWeekByTimestamp(ts));
+        featureMap.put("hour", Double.parseDouble(String.valueOf(ExtractorUtils.getHourByTimestamp(ts))));
+        featureMap.put("day_of_week", Double.parseDouble(String.valueOf(ExtractorUtils.getDayOfWeekByTimestamp(ts))));
 
-        long createTs = Long.parseLong(videoFeature.getOrDefault("gmt_create_timestamp", "0").toString()) / 1000;
-        featureMap.put("create_ts_diff", ExtractorUtils.getDaysBetween(createTs, ts));
+        long createTs = Long.parseLong(videoFeature.getOrDefault("gmt_create_timestamp", "0")) / 1000;
+        featureMap.put("create_ts_diff", Double.parseDouble(String.valueOf(ExtractorUtils.getDaysBetween(createTs, ts))));
 
         String date = LocalDateTime.ofInstant(Instant.ofEpochSecond(ts), ZoneId.systemDefault()).format(DateTimeFormatter.ofPattern("yyyy-MM-dd"));
         String festiveByDate = FestiveUtil.getFestiveByDate(date);
-        featureMap.put("today_is_fes", 0);
-        featureMap.put("video_fes_eq", 0);
+        featureMap.put("today_is_fes", 0d);
+        featureMap.put("video_fes_eq", 0d);
         if (StringUtils.isNotBlank(festiveByDate)) {
-            featureMap.put("today_is_fes", 1);
+            featureMap.put("today_is_fes", 1d);
             if (StringUtils.equals(festiveByDate, festiveLabel2)) {
-                featureMap.put("video_today_fes_eq", 1);
+                featureMap.put("video_today_fes_eq", 1d);
             }
         }
 
     }
 
-    public static void handleC1(Map<String, Object> c1Feature, Map<String, Object> featureMap) {
+    public static void handleC1(Map<String, String> c1Feature, Map<String, Double> featureMap) {
         List<String> times = Arrays.asList("12h", "24h", "72h", "168h");
         List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_1_uv", "click", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
         for (String time : times) {
             for (String index : indexList) {
-                double value = Double.parseDouble(c1Feature.getOrDefault(index + "_" + time, "0").toString());
+                double value = Double.parseDouble(c1Feature.getOrDefault(index + "_" + time, "0"));
                 featureMap.put("c1_" + index + "_" + time, value);
             }
-            double rovn = Double.parseDouble(c1Feature.getOrDefault("rovn_" + time, "0").toString());
-            double returnNUv = Double.parseDouble(c1Feature.getOrDefault("return_1_uv_" + time, "0").toString());
+            double rovn = Double.parseDouble(c1Feature.getOrDefault("rovn_" + time, "0"));
+            double returnNUv = Double.parseDouble(c1Feature.getOrDefault("return_1_uv_" + time, "0"));
             featureMap.put("c1_rovn*log(r)_" + time, rovn * ExtractorUtils.calLog(returnNUv));
         }
     }
 
-    public static void handleC2ToC3(Map<String, Object> c2Feature, Map<String, Object> c3Feature, Map<String, Object> featureMap) {
-        Map<String, Map<String, Object>> featureMaps = new HashMap<>();
+    public static void handleC2ToC3(Map<String, String> c2Feature, Map<String, String> c3Feature, Map<String, Double> featureMap) {
+        Map<String, Map<String, String>> featureMaps = new HashMap<>();
         featureMaps.put("c2", c2Feature);
         featureMaps.put("c3", c3Feature);
 
         List<String> times = Arrays.asList("12h", "24h", "72h", "168h");
         List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "click");
 
-        for (Map.Entry<String, Map<String, Object>> entry : featureMaps.entrySet()) {
+        for (Map.Entry<String, Map<String, String>> entry : featureMaps.entrySet()) {
             String key = entry.getKey();
-            Map<String, Object> feature = entry.getValue();
+            Map<String, String> feature = entry.getValue();
             for (String time : times) {
                 for (String index : indexList) {
-                    double value = Double.parseDouble(feature.getOrDefault(index + "_" + time, "0").toString());
+                    double value = Double.parseDouble(feature.getOrDefault(index + "_" + time, "0"));
                     featureMap.put(key + "_" + index + "_" + time, value);
                 }
             }
         }
     }
 
-    public static void handleC4(Map<String, Object> c4Feature, Map<String, Object> featureMap) {
+    public static void handleC4(Map<String, String> c4Feature, Map<String, Double> featureMap) {
         List<String> times = Arrays.asList("24h", "72h", "168h");
         List<String> indexList = Arrays.asList("str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
 
         for (String time : times) {
             for (String index : indexList) {
-                double value = Double.parseDouble(c4Feature.getOrDefault("avg_" + index + "_" + time, "0").toString());
+                double value = Double.parseDouble(c4Feature.getOrDefault("avg_" + index + "_" + time, "0"));
                 featureMap.put("c4_avg_" + index + "_" + time, value);
 
-                double max = Double.parseDouble(c4Feature.getOrDefault("max_" + index + "_" + time, "0").toString());
-                double min = Double.parseDouble(c4Feature.getOrDefault("min_" + index + "_" + time, "0").toString());
+                double max = Double.parseDouble(c4Feature.getOrDefault("max_" + index + "_" + time, "0"));
+                double min = Double.parseDouble(c4Feature.getOrDefault("min_" + index + "_" + time, "0"));
 
                 featureMap.put("c4_diff_" + index + "_" + time, max - min);
             }
@@ -164,19 +164,19 @@ public class ExtractFeature20250218 {
 
     }
 
-    public static void handleC5ToC6(Map<String, Object> c5Feature, Map<String, Object> c6Feature, Map<String, Object> videoMap, Map<String, Object> featureMap) {
-        Map<String, Map<String, Object>> featureMaps = new HashMap<>();
+    public static void handleC5ToC6(Map<String, String> c5Feature, Map<String, String> c6Feature, Map<String, String> videoMap, Map<String, Double> featureMap) {
+        Map<String, Map<String, String>> featureMaps = new HashMap<>();
         featureMaps.put("c5", c5Feature);
         featureMaps.put("c6", c6Feature);
         List<String> times = Arrays.asList("tags_1d", "tags_3d", "tags_7d");
 
-        String title = videoMap.getOrDefault("title", "").toString();
+        String title = videoMap.getOrDefault("title", "");
 
-        for (Map.Entry<String, Map<String, Object>> entry : featureMaps.entrySet()) {
+        for (Map.Entry<String, Map<String, String>> entry : featureMaps.entrySet()) {
             String key = entry.getKey();
-            Map<String, Object> feature = entry.getValue();
+            Map<String, String> feature = entry.getValue();
             for (String time : times) {
-                String tags = feature.getOrDefault(time, "").toString();
+                String tags = feature.getOrDefault(time, "");
                 Double[] scores = ExtractorUtils.funcC34567ForTagsNew(tags, title);
                 featureMap.put(key + "_matchnum" + "_" + time, scores[0]);
                 featureMap.put(key + "_maxscore" + "_" + time, scores[1]);
@@ -186,20 +186,20 @@ public class ExtractFeature20250218 {
 
     }
 
-    public static Map<String, Map<String, String[]>> handleC7ToC8(Map<String, Object> c7Feature, Map<String, Object> c8Feature) {
+    public static Map<String, Map<String, String[]>> handleC7ToC8(Map<String, String> c7Feature, Map<String, String> c8Feature) {
         Map<String, Map<String, String[]>> resultMap = new HashMap<>();
 
-        Map<String, Map<String, Object>> featureMaps = new HashMap<>();
+        Map<String, Map<String, String>> featureMaps = new HashMap<>();
         featureMaps.put("c7", c7Feature);
         featureMaps.put("c8", c8Feature);
         List<String> indexList = Arrays.asList("share", "return");
-        for (Map.Entry<String, Map<String, Object>> entry : featureMaps.entrySet()) {
+        for (Map.Entry<String, Map<String, String>> entry : featureMaps.entrySet()) {
             String key = entry.getKey();
-            Map<String, Object> feature = entry.getValue();
+            Map<String, String> feature = entry.getValue();
             for (String index : indexList) {
                 if (feature.containsKey(index)) {
                     Map<String, String[]> cfMap = new HashMap<>();
-                    String[] entries = feature.get(index).toString().split(",");
+                    String[] entries = feature.get(index).split(",");
                     for (String e : entries) {
                         String[] rList = e.split(":");
                         if (rList.length >= 4) {
@@ -219,7 +219,7 @@ public class ExtractFeature20250218 {
         return resultMap;
     }
 
-    public static void useC7ToC8(Map<String, Map<String, String[]>> map, String vid, Map<String, Object> featureMap) {
+    public static void useC7ToC8(Map<String, Map<String, String[]>> map, String vid, Map<String, Double> featureMap) {
         if (StringUtils.isBlank(vid)) {
             return;
         }
@@ -237,44 +237,44 @@ public class ExtractFeature20250218 {
         }
     }
 
-    public static void handleD3(Map<String, Object> d3Feature, Map<String, Object> featureMap) {
+    public static void handleD3(Map<String, String> d3Feature, Map<String, Double> featureMap) {
         for (String index : Arrays.asList("exp", "return_n", "rovn")) {
-            double value = Double.parseDouble(d3Feature.getOrDefault(index, "0").toString());
+            double value = Double.parseDouble(d3Feature.getOrDefault(index, "0"));
             featureMap.put("d3_" + index, value);
         }
     }
 
-    public static void handleD1(Map<String, Object> d4Feature, Map<String, Object> featureMap) {
-        double rosCfScores = Double.parseDouble(d4Feature.getOrDefault("ros_cf_score", "0").toString());
+    public static void handleD1(Map<String, String> d1Feature, Map<String, Double> featureMap) {
+        double rosCfScores = Double.parseDouble(d1Feature.getOrDefault("ros_cf_score", "0"));
         featureMap.put("d1_ros_cf_score", rosCfScores);
-        double rovCfScores = Double.parseDouble(d4Feature.getOrDefault("rov_cf_score", "0").toString());
+        double rovCfScores = Double.parseDouble(d1Feature.getOrDefault("rov_cf_score", "0"));
         featureMap.put("d1_rov_cf_score", rovCfScores);
 
-        double rosCfRank = Double.parseDouble(d4Feature.getOrDefault("ros_cf_rank", "0").toString());
+        double rosCfRank = Double.parseDouble(d1Feature.getOrDefault("ros_cf_rank", "0"));
         featureMap.put("d1_ros_cf_rank", ExtractorUtils.reciprocal(rosCfRank));
-        double rovCfRank = Double.parseDouble(d4Feature.getOrDefault("rov_cf_rank", "0").toString());
+        double rovCfRank = Double.parseDouble(d1Feature.getOrDefault("rov_cf_rank", "0"));
         featureMap.put("d1_rov_cf_rank", ExtractorUtils.reciprocal(rovCfRank));
     }
 
-    public static void handleD2(Map<String, Object> d5Feature, Map<String, Object> featureMap) {
-        double score = Double.parseDouble(d5Feature.getOrDefault("score", "0").toString());
+    public static void handleD2(Map<String, String> d5Feature, Map<String, Double> featureMap) {
+        double score = Double.parseDouble(d5Feature.getOrDefault("score", "0"));
         featureMap.put("d2_score", score);
 
-        double rank = Double.parseDouble(d5Feature.getOrDefault("rank", "0").toString());
+        double rank = Double.parseDouble(d5Feature.getOrDefault("rank", "0"));
         featureMap.put("d2_rank", ExtractorUtils.reciprocal(rank));
     }
 
-    public static void handleVideoSimilarity(Map<String, Object> videoFeature, Map<String, Object> headVideoFeature, Map<String, Object> featureMap) {
-        String headVideoTitle = headVideoFeature.getOrDefault("title", "").toString();
-        String headVideoMergeCate2 = headVideoFeature.getOrDefault("merge_second_level_cate", "").toString();
-        String headVideoMergeCate1 = headVideoFeature.getOrDefault("merge_first_level_cate", "").toString();
-        String headVideoFestiveLabel2 = headVideoFeature.getOrDefault("festive_label2", "").toString();
+    public static void handleVideoSimilarity(Map<String, String> videoFeature, Map<String, String> headVideoFeature, Map<String, Double> featureMap) {
+        String headVideoTitle = headVideoFeature.getOrDefault("title", "");
+        String headVideoMergeCate2 = headVideoFeature.getOrDefault("merge_second_level_cate", "");
+        String headVideoMergeCate1 = headVideoFeature.getOrDefault("merge_first_level_cate", "");
+        String headVideoFestiveLabel2 = headVideoFeature.getOrDefault("festive_label2", "");
 
 
-        String videoTitle = videoFeature.getOrDefault("title", "").toString();
-        String videoMergeCate2 = videoFeature.getOrDefault("merge_second_level_cate", "").toString();
-        String videoMergeCate1 = videoFeature.getOrDefault("merge_first_level_cate", "").toString();
-        String videoFestiveLabel2 = videoFeature.getOrDefault("festive_label2", "").toString();
+        String videoTitle = videoFeature.getOrDefault("title", "");
+        String videoMergeCate2 = videoFeature.getOrDefault("merge_second_level_cate", "");
+        String videoMergeCate1 = videoFeature.getOrDefault("merge_first_level_cate", "");
+        String videoFestiveLabel2 = videoFeature.getOrDefault("festive_label2", "");
 
         double titleSimilarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoTitle);
         double headTitleAndMerge1Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoMergeCate1);

+ 28 - 0
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/util/FeatureBucketUtils.java

@@ -73,6 +73,34 @@ public class FeatureBucketUtils {
         return featureMap;
     }
 
+    public static Map<String, String> bucketFeatureV2(String bucketFile, Map<String, Double> srcFeature) {
+        if (MapUtils.isEmpty(srcFeature)) {
+            return new HashMap<>();
+        }
+        Map<String, String> featureMap = new HashMap<>(srcFeature.size());
+        Pair<Map<String, Double>, Map<String, double[]>> pair = featureBucketMap.get(bucketFile);
+        if (null != pair && MapUtils.isNotEmpty(pair.getLeft()) && MapUtils.isNotEmpty(pair.getRight())) {
+            Map<String, Double> bucketsLen = pair.getLeft();
+            Map<String, double[]> bucketsMap = pair.getRight();
+            for (Map.Entry<String, Double> entry : srcFeature.entrySet()) {
+                String name = entry.getKey();
+                Double score = entry.getValue();
+                // 注意:0值、不在分桶文件中的特征,会被过滤掉。
+                if (score > 1E-8 && bucketsLen.containsKey(name) && bucketsMap.containsKey(name)) {
+                    Double bucketNum = bucketsLen.get(name);
+                    double[] buckets = bucketsMap.get(name);
+                    Double scoreNew = 1.0 / bucketNum * (ExtractorUtils.findInsertPosition(buckets, score) + 1.0);
+                    featureMap.put(name, String.valueOf(scoreNew));
+                } else {
+                    featureMap.put(name, String.valueOf(score));
+                }
+            }
+        } else {
+            log.error("failed to get {} bucket feature", bucketFile);
+        }
+        return featureMap;
+    }
+
     private static Pair<Map<String, Double>, Map<String, double[]>> loadBucket(String bucketFile) {
         Map<String, Double> bucketsLen = new HashMap<>();
         Map<String, double[]> bucketsMap = new HashMap<>();

Файловите разлики са ограничени, защото са твърде много
+ 0 - 6
recommend-server-service/src/main/resources/20250218_bucket_322.txt


+ 315 - 0
recommend-server-service/src/main/resources/feeds_score_config_xgb_rov_20250228.conf

@@ -0,0 +1,315 @@
+scorer-config = {
+  rov-score-config = {
+    scorer-name = "com.tzld.piaoquan.recommend.server.service.score.XGBoostScorer"
+    scorer-priority = 99
+    model-path = "zhangbo/model_xgb_for_str_v2.tar.gz"
+    param = {
+      localDir = "xgboost/model_xgb_for_str_v2"
+      features = [
+            "b1_is_return_1_1h"
+            "b1_is_return_1_3h"
+            "b1_is_return_1_24h"
+            "b1_is_return_1_168h"
+            "b1_is_share_1h"
+            "b1_is_share_3h"
+            "b1_is_share_24h"
+            "b1_is_share_168h"
+            "b1_rovn*log(r)_1h"
+            "b1_rovn*log(r)_3h"
+            "b1_rovn*log(r)_24h"
+            "b1_rovn*log(r)_168h"
+            "b1_str_1h"
+            "b1_str_3h"
+            "b1_str_24h"
+            "b1_str_168h"
+            "b1_str_one_1h"
+            "b1_str_one_3h"
+            "b1_str_one_24h"
+            "b1_str_one_168h"
+            "b1_str_plus_1h"
+            "b1_str_plus_3h"
+            "b1_str_plus_24h"
+            "b1_str_plus_168h"
+            "b2_is_return_1_1h"
+            "b2_is_return_1_3h"
+            "b2_is_return_1_24h"
+            "b2_is_return_1_168h"
+            "b2_is_share_1h"
+            "b2_is_share_3h"
+            "b2_is_share_24h"
+            "b2_is_share_168h"
+            "b2_rovn*log(r)_1h"
+            "b2_rovn*log(r)_3h"
+            "b2_rovn*log(r)_24h"
+            "b2_rovn*log(r)_168h"
+            "b2_str_1h"
+            "b2_str_3h"
+            "b2_str_24h"
+            "b2_str_168h"
+            "b2_str_one_1h"
+            "b2_str_one_3h"
+            "b2_str_one_24h"
+            "b2_str_one_168h"
+            "b2_str_plus_1h"
+            "b2_str_plus_3h"
+            "b2_str_plus_24h"
+            "b2_str_plus_168h"
+            "b3_is_return_1_24h"
+            "b3_is_return_1_168h"
+            "b3_is_share_24h"
+            "b3_is_share_168h"
+            "b3_rovn*log(r)_24h"
+            "b3_rovn*log(r)_168h"
+            "b3_str_24h"
+            "b3_str_168h"
+            "b3_str_one_24h"
+            "b3_str_one_168h"
+            "b3_str_plus_24h"
+            "b3_str_plus_168h"
+            "b4_is_return_1_6h"
+            "b4_is_return_1_24h"
+            "b4_is_share_6h"
+            "b4_is_share_24h"
+            "b4_rovn*log(r)_6h"
+            "b4_rovn*log(r)_24h"
+            "b4_str_6h"
+            "b4_str_24h"
+            "b4_str_one_6h"
+            "b4_str_one_24h"
+            "b4_str_plus_6h"
+            "b4_str_plus_24h"
+            "b5_is_return_1_6h"
+            "b5_is_return_1_24h"
+            "b5_is_share_6h"
+            "b5_is_share_24h"
+            "b5_rovn*log(r)_6h"
+            "b5_rovn*log(r)_24h"
+            "b5_str_6h"
+            "b5_str_24h"
+            "b5_str_one_6h"
+            "b5_str_one_24h"
+            "b5_str_plus_6h"
+            "b5_str_plus_24h"
+            "b6_is_return_1_6h"
+            "b6_is_return_1_24h"
+            "b6_is_share_6h"
+            "b6_is_share_24h"
+            "b6_rovn*log(r)_6h"
+            "b6_rovn*log(r)_24h"
+            "b6_str_6h"
+            "b6_str_24h"
+            "b6_str_one_6h"
+            "b6_str_one_24h"
+            "b6_str_plus_6h"
+            "b6_str_plus_24h"
+            "b7_is_return_1_6h"
+            "b7_is_return_1_24h"
+            "b7_is_share_6h"
+            "b7_is_share_24h"
+            "b7_rovn*log(r)_6h"
+            "b7_rovn*log(r)_24h"
+            "b7_str_6h"
+            "b7_str_24h"
+            "b7_str_one_6h"
+            "b7_str_one_24h"
+            "b7_str_plus_6h"
+            "b7_str_plus_24h"
+            "b8_is_return_1_1h"
+            "b8_is_return_1_12h"
+            "b8_is_share_1h"
+            "b8_is_share_12h"
+            "b8_rovn*log(r)_1h"
+            "b8_rovn*log(r)_12h"
+            "b8_str_1h"
+            "b8_str_12h"
+            "b8_str_one_1h"
+            "b8_str_one_12h"
+            "b8_str_plus_1h"
+            "b8_str_plus_12h"
+            "b9_is_return_1_1h"
+            "b9_is_return_1_12h"
+            "b9_is_share_1h"
+            "b9_is_share_12h"
+            "b9_rovn*log(r)_1h"
+            "b9_rovn*log(r)_12h"
+            "b9_str_1h"
+            "b9_str_12h"
+            "b9_str_one_1h"
+            "b9_str_one_12h"
+            "b9_str_plus_1h"
+            "b9_str_plus_12h"
+            "b10_is_return_1_6h"
+            "b10_is_share_6h"
+            "b10_rovn*log(r)_6h"
+            "b10_str_6h"
+            "b10_str_one_6h"
+            "b10_str_plus_6h"
+            "b11_is_return_1_1h"
+            "b11_is_share_1h"
+            "b11_rovn*log(r)_1h"
+            "b11_str_1h"
+            "b11_str_one_1h"
+            "b11_str_plus_1h"
+            "b13_is_return_1_1h"
+            "b13_is_return_1_3h"
+            "b13_is_return_1_24h"
+            "b13_is_return_1_168h"
+            "b13_is_share_1h"
+            "b13_is_share_3h"
+            "b13_is_share_24h"
+            "b13_is_share_168h"
+            "b13_rovn*log(r)_1h"
+            "b13_rovn*log(r)_3h"
+            "b13_rovn*log(r)_24h"
+            "b13_rovn*log(r)_168h"
+            "b13_str_1h"
+            "b13_str_3h"
+            "b13_str_24h"
+            "b13_str_168h"
+            "b13_str_one_1h"
+            "b13_str_one_3h"
+            "b13_str_one_24h"
+            "b13_str_one_168h"
+            "b13_str_plus_1h"
+            "b13_str_plus_3h"
+            "b13_str_plus_24h"
+            "b13_str_plus_168h"
+            "c1_click_12h"
+            "c1_click_24h"
+            "c1_click_72h"
+            "c1_click_168h"
+            "c1_is_return_1_12h"
+            "c1_is_return_1_24h"
+            "c1_is_return_1_72h"
+            "c1_is_return_1_168h"
+            "c1_is_share_12h"
+            "c1_is_share_24h"
+            "c1_is_share_72h"
+            "c1_is_share_168h"
+            "c1_rovn*log(r)_12h"
+            "c1_rovn*log(r)_24h"
+            "c1_rovn*log(r)_72h"
+            "c1_rovn*log(r)_168h"
+            "c1_str_12h"
+            "c1_str_24h"
+            "c1_str_72h"
+            "c1_str_168h"
+            "c1_str_one_12h"
+            "c1_str_one_24h"
+            "c1_str_one_72h"
+            "c1_str_one_168h"
+            "c1_str_plus_12h"
+            "c1_str_plus_24h"
+            "c1_str_plus_72h"
+            "c1_str_plus_168h"
+            "c2_click_12h"
+            "c2_click_168h"
+            "c2_click_24h"
+            "c2_click_72h"
+            "c2_is_return_1_12h"
+            "c2_is_return_1_168h"
+            "c2_is_return_1_24h"
+            "c2_is_return_1_72h"
+            "c2_is_share_12h"
+            "c2_is_share_168h"
+            "c2_is_share_24h"
+            "c2_is_share_72h"
+            "c2_return_n_uv_12h"
+            "c2_return_n_uv_168h"
+            "c2_return_n_uv_24h"
+            "c2_return_n_uv_72h"
+            "c2_share_cnt_12h"
+            "c2_share_cnt_168h"
+            "c2_share_cnt_24h"
+            "c2_share_cnt_72h"
+            "c3_click_12h"
+            "c3_click_168h"
+            "c3_click_24h"
+            "c3_click_72h"
+            "c3_is_return_1_12h"
+            "c3_is_return_1_168h"
+            "c3_is_return_1_24h"
+            "c3_is_return_1_72h"
+            "c3_is_share_12h"
+            "c3_is_share_168h"
+            "c3_is_share_24h"
+            "c3_is_share_72h"
+            "c3_return_n_uv_12h"
+            "c3_return_n_uv_168h"
+            "c3_return_n_uv_24h"
+            "c3_return_n_uv_72h"
+            "c3_share_cnt_12h"
+            "c3_share_cnt_168h"
+            "c3_share_cnt_24h"
+            "c3_share_cnt_72h"
+            "c4_avg_rovn_24h"
+            "c4_avg_rovn_72h"
+            "c4_avg_rovn_168h"
+            "c4_avg_str_24h"
+            "c4_avg_str_72h"
+            "c4_avg_str_168h"
+            "c4_avg_str_one_24h"
+            "c4_avg_str_one_72h"
+            "c4_avg_str_one_168h"
+            "c4_avg_str_plus_24h"
+            "c4_avg_str_plus_72h"
+            "c4_avg_str_plus_168h"
+            "c4_diff_rovn_24h"
+            "c4_diff_rovn_72h"
+            "c4_diff_rovn_168h"
+            "c4_diff_str_24h"
+            "c4_diff_str_72h"
+            "c4_diff_str_168h"
+            "c4_diff_str_one_24h"
+            "c4_diff_str_one_72h"
+            "c4_diff_str_one_168h"
+            "c4_diff_str_plus_24h"
+            "c4_diff_str_plus_72h"
+            "c4_diff_str_plus_168h"
+            "c5_avgscore_tags_1d"
+            "c5_avgscore_tags_3d"
+            "c5_avgscore_tags_7d"
+            "c5_matchnum_tags_1d"
+            "c5_matchnum_tags_3d"
+            "c5_matchnum_tags_7d"
+            "c5_maxscore_tags_1d"
+            "c5_maxscore_tags_3d"
+            "c5_maxscore_tags_7d"
+            "c6_avgscore_tags_1d"
+            "c6_avgscore_tags_3d"
+            "c6_avgscore_tags_7d"
+            "c6_matchnum_tags_1d"
+            "c6_matchnum_tags_3d"
+            "c6_matchnum_tags_7d"
+            "c6_maxscore_tags_1d"
+            "c6_maxscore_tags_3d"
+            "c6_maxscore_tags_7d"
+            "d1_ros_cf_rank"
+            "d1_ros_cf_score"
+            "d1_rov_cf_rank"
+            "d1_rov_cf_score"
+            "d2_rank"
+            "d2_score"
+            "d3_exp"
+            "d3_return_n"
+            "d3_rovn"
+            "total_time"
+            "width"
+            "height"
+            "width/height"
+            "size"
+            "bit_rate"
+            "is_greeting"
+            "festive_sim"
+            "head_title_festive_sim"
+            "head_title_merge1_sim"
+            "head_title_merge2_sim"
+            "merge1_sim"
+            "merge2_sim"
+            "title_sim"
+            "hour"
+            "create_ts_diff"
+      ]
+    }
+  }
+}

Някои файлове не бяха показани, защото твърде много файлове са промени