Browse Source

feat:新推荐特征处理_临时提交

zhaohaipeng 2 months ago
parent
commit
c25bd64e70
1 changed files with 95 additions and 1 deletions
  1. 95 1
      src/main/java/examples/extractor/v20250218/ExtractItemFeature.java

+ 95 - 1
src/main/java/examples/extractor/v20250218/ExtractItemFeature.java

@@ -2,6 +2,7 @@ package examples.extractor.v20250218;
 
 import examples.extractor.ExtractorUtils;
 import examples.extractor.RankExtractorFeature_20240530;
+import examples.utils.SimilarityUtils;
 import org.apache.commons.lang3.StringUtils;
 
 import java.util.Arrays;
@@ -133,7 +134,100 @@ public class ExtractItemFeature {
 
     }
 
-    public static void handleC6ToC7(Map<String, Object> c6Feature, Map<String, Object> c7Feature, Map<String, Object> featureMap) {
+    public static Map<String, Map<String, String[]>> handleC6ToC7(Map<String, Object> c6Feature, Map<String, Object> c7Feature) {
+        Map<String, Map<String, String[]>> resultMap = new HashMap<>();
 
+        Map<String, Map<String, Object>> featureMaps = new HashMap<>();
+        featureMaps.put("c6", c6Feature);
+        featureMaps.put("c7", c7Feature);
+        List<String> indexList = Arrays.asList("share", "return");
+        for (Map.Entry<String, Map<String, Object>> entry : featureMaps.entrySet()) {
+            String key = entry.getKey();
+            Map<String, Object> feature = entry.getValue();
+            for (String index : indexList) {
+                if (feature.containsKey(index)) {
+                    Map<String, String[]> cfMap = new HashMap<>();
+                    String[] entries = feature.get(index).toString().split(",");
+                    for (String e : entries) {
+                        String[] rList = e.split(":");
+                        if (rList.length >= 4) {
+                            String vid = rList[0];
+                            String value1 = rList[1];
+                            String value2 = rList[2];
+                            String value3 = rList[3];
+                            String[] strs = {value1, value2, value3};
+                            cfMap.put(vid, strs);
+                        }
+                    }
+                    resultMap.put(key, cfMap);
+                }
+            }
+        }
+
+        return resultMap;
+    }
+
+    public static void handleD3(Map<String, Object> d3Feature, Map<String, Object> featureMap) {
+        for (String index : Arrays.asList("exp", "return_n", "rovn")) {
+            double value = Double.parseDouble(d3Feature.getOrDefault(index, "0").toString());
+            featureMap.put("d3_" + index, value);
+        }
+    }
+
+    public static void handleD1(Map<String, Object> d4Feature, Map<String, Object> featureMap) {
+        double rosCfScores = Double.parseDouble(d4Feature.getOrDefault("ros_cf_score", "0").toString());
+        featureMap.put("d1_ros_cf_score", rosCfScores);
+        double rovCfScores = Double.parseDouble(d4Feature.getOrDefault("rov_cf_score", "0").toString());
+        featureMap.put("d1_rov_cf_score", rovCfScores);
+
+        double rosCfRank = Double.parseDouble(d4Feature.getOrDefault("ros_cf_rank", "0").toString());
+        featureMap.put("d1_ros_cf_rank", 1 / rosCfRank);
+        double rovCfRank = Double.parseDouble(d4Feature.getOrDefault("rov_cf_rank", "0").toString());
+        featureMap.put("d1_rov_cf_rank", 1 / rovCfRank);
+    }
+
+    public static void handleD2(Map<String, Object> d5Feature, Map<String, Object> featureMap) {
+        double score = Double.parseDouble(d5Feature.getOrDefault("score", "0").toString());
+        featureMap.put("d2_score", score);
+
+        double rank = Double.parseDouble(d5Feature.getOrDefault("rank", "0").toString());
+        featureMap.put("d2_rank", 1 / rank);
+    }
+
+    public static void handleVideoSimilarity(Map<String, Object> videoFeature, Map<String, Object> headVideoFeature, Map<String, Object> featureMap) {
+        String headVideoTitle = headVideoFeature.getOrDefault("title", "").toString();
+        String headVideoMergeCate2 = headVideoFeature.getOrDefault("merge_second_level_cate", "").toString();
+        String headVideoMergeCate1 = headVideoFeature.getOrDefault("merge_first_level_cate", "").toString();
+        String headVideoFestiveLabel2 = headVideoFeature.getOrDefault("festive_label2", "").toString();
+
+
+        String videoTitle = videoFeature.getOrDefault("title", "").toString();
+        String videoMergeCate2 = videoFeature.getOrDefault("merge_second_level_cate", "").toString();
+        String videoMergeCate1 = videoFeature.getOrDefault("merge_first_level_cate", "").toString();
+        String videoFestiveLabel2 = videoFeature.getOrDefault("festive_label2", "").toString();
+
+        double titleSimilarity = ExtractItemFeature.calcTxtSimilarity(headVideoTitle, videoTitle);
+        double headTitleAndMerge1Similarity = ExtractItemFeature.calcTxtSimilarity(headVideoTitle, videoMergeCate1);
+        double headTitleAndMerge2Similarity = ExtractItemFeature.calcTxtSimilarity(headVideoTitle, videoMergeCate2);
+        double headTitleAndFestiveSimilarity = ExtractItemFeature.calcTxtSimilarity(headVideoTitle, videoFestiveLabel2);
+        double merge1Similarity = ExtractItemFeature.calcTxtSimilarity(headVideoMergeCate1, videoMergeCate1);
+        double merge2Similarity = ExtractItemFeature.calcTxtSimilarity(headVideoMergeCate2, videoMergeCate2);
+        double festiveSimilarity = ExtractItemFeature.calcTxtSimilarity(headVideoFestiveLabel2, videoFestiveLabel2);
+
+        featureMap.put("title_sim", titleSimilarity);
+        featureMap.put("head_title_merge1_sim", headTitleAndMerge1Similarity);
+        featureMap.put("head_title_merge2_sim", headTitleAndMerge2Similarity);
+        featureMap.put("head_title_festive_sim", headTitleAndFestiveSimilarity);
+        featureMap.put("merge1_sim", merge1Similarity);
+        featureMap.put("merge2_sim", merge2Similarity);
+        featureMap.put("festive_sim", festiveSimilarity);
+
+    }
+
+    private static double calcTxtSimilarity(String txt1, String txt2) {
+        if (StringUtils.isBlank(txt1) || StringUtils.isBlank(txt2)) {
+            return 0d;
+        }
+        return SimilarityUtils.word2VecSimilarity(txt1, txt2);
     }
 }