Browse Source

feat:新推荐特征处理_临时提交

zhaohaipeng 2 months ago
parent
commit
2cc27ff457

+ 28 - 0
src/main/java/examples/extractor/ExtractorUtils.java

@@ -1,5 +1,7 @@
 package examples.extractor;
 
+import examples.utils.SimilarityUtils;
+
 import java.util.Map;
 import java.time.LocalDateTime;
 import java.time.format.DateTimeFormatter;
@@ -142,6 +144,32 @@ public class ExtractorUtils {
         return low; // 返回low作为插入点
     }
 
+    public static Double[] funcC34567ForTagsNew(String tags, String title) {
+        String[] tagsList = tags.split(",");
+        int d1 = 0;
+        List<String> d2 = new ArrayList<>();
+        double d3 = 0.0;
+        double d4 = 0.0;
+
+        for (String tag : tagsList) {
+            if (title.contains(tag)) {
+                d1++;
+                d2.add(tag);
+            }
+            double score = SimilarityUtils.word2VecSimilarity(tag, title);
+            if (score > d3) {
+                d3 = score;
+            }
+            d4 += score;
+        }
+
+        d4 = (tagsList.length > 0) ? d4 / tagsList.length : d4;
+
+        // 使用数组来返回多个值
+        Double[] result = {(double) d1, d3, d4};
+        return result;
+    }
+
     public static void main(String[] args) {
         double[] sortedArray = {1.0, 2.0, 4.0, 4.0, 6.0};
         double target = 0.0;

+ 139 - 0
src/main/java/examples/extractor/v20250218/ExtractItemFeature.java

@@ -0,0 +1,139 @@
+package examples.extractor.v20250218;
+
+import examples.extractor.ExtractorUtils;
+import examples.extractor.RankExtractorFeature_20240530;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class ExtractItemFeature {
+
+    private ExtractItemFeature() {
+    }
+
+    public static void handleB1ToB13(Map<String, Map<String, Object>> videoFeature, Map<String, Object> featureMap) {
+        List<String> times = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h");
+        List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
+        for (Map.Entry<String, Map<String, Object>> entry : videoFeature.entrySet()) {
+            String key = entry.getKey();
+            Map<String, Object> feature = entry.getValue();
+            for (String time : times) {
+                for (String index : indexList) {
+                    double value = Double.parseDouble(feature.getOrDefault(index + "_" + time, "0").toString());
+                    featureMap.put(key + "_" + index + "_" + time, value);
+                }
+
+                double rovn = Double.parseDouble(feature.getOrDefault("rovn_" + time, "0").toString());
+                double returnNUv = Double.parseDouble(feature.getOrDefault("return_n_uv", "0").toString());
+
+                featureMap.put(key + "_rovn*log(r)_" + time, rovn * RankExtractorFeature_20240530.calLog(returnNUv));
+            }
+
+        }
+    }
+
+    public static void handleVideoBasicFeature(Map<String, Object> videoFeature, Map<String, Object> featureMap) {
+        Object totalTime = videoFeature.getOrDefault("total_time", "0");
+        Double width = Double.parseDouble(videoFeature.getOrDefault("width", "0d").toString());
+        Double height = Double.parseDouble(videoFeature.getOrDefault("height", "0d").toString());
+        Object size = videoFeature.getOrDefault("size", "0d");
+        Object bit_rate = videoFeature.getOrDefault("bit_rate", "0d");
+        String festiveLabel1 = videoFeature.getOrDefault("festive_label1", "").toString();
+
+        featureMap.put("total_time", totalTime);
+        featureMap.put("width", width);
+        featureMap.put("height", height);
+        featureMap.put("size", size);
+        featureMap.put("bit_rate", bit_rate);
+        featureMap.put("width/height", ExtractorUtils.divisionDouble(width, height));
+        featureMap.put("is_festive", 0);
+        featureMap.put("is_greeting", 0);
+        if (StringUtils.equals(festiveLabel1, "节假日")) {
+            featureMap.put("is_festive", 1);
+        } else if (StringUtils.equals(festiveLabel1, "问候语")) {
+            featureMap.put("is_greeting", 1);
+        }
+
+    }
+
+    public static void handleC1(Map<String, Object> c1Feature, Map<String, Object> featureMap) {
+        List<String> times = Arrays.asList("12h", "24h", "72h", "168h");
+        List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "click", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
+        for (String time : times) {
+            for (String index : indexList) {
+                double value = Double.parseDouble(featureMap.getOrDefault(index + "_" + time, "0").toString());
+                featureMap.put("c1_" + index + "_" + time, value);
+            }
+            double rovn = Double.parseDouble(c1Feature.getOrDefault("rovn_" + time, "0").toString());
+            double returnNUv = Double.parseDouble(c1Feature.getOrDefault("return_n_uv", "0").toString());
+            featureMap.put("c1_rovn*log(r)_" + c1Feature, rovn * RankExtractorFeature_20240530.calLog(returnNUv));
+        }
+    }
+
+    public static void handleC2ToC3(Map<String, Object> c2Feature, Map<String, Object> c3Feature, Map<String, Object> featureMap) {
+        Map<String, Map<String, Object>> featureMaps = new HashMap<>();
+        featureMaps.put("c2", c2Feature);
+        featureMaps.put("c3", c3Feature);
+
+        List<String> times = Arrays.asList("12h", "24h", "72h", "168h");
+        List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "click");
+
+        for (Map.Entry<String, Map<String, Object>> entry : featureMaps.entrySet()) {
+            String key = entry.getKey();
+            Map<String, Object> feature = entry.getValue();
+            for (String time : times) {
+                for (String index : indexList) {
+                    double value = Double.parseDouble(feature.getOrDefault(index + "_" + time, "0").toString());
+                    featureMap.put(key + "_" + index + "_" + time, value);
+                }
+            }
+        }
+    }
+
+    public static void handleC4(Map<String, Object> c4Feature, Map<String, Object> featureMap) {
+        List<String> times = Arrays.asList("24h", "72h", "168h");
+        List<String> indexList = Arrays.asList("str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
+
+        for (String time : times) {
+            for (String index : indexList) {
+                double value = Double.parseDouble(c4Feature.getOrDefault("avg_" + index + "_" + time, "0").toString());
+                featureMap.put("c4_avg_" + index + "_" + time, value);
+
+                double max = Double.parseDouble(c4Feature.getOrDefault("max_" + index + "_" + time, "0").toString());
+                double min = Double.parseDouble(c4Feature.getOrDefault("min_" + index + "_" + time, "0").toString());
+
+                featureMap.put("c4_diff_" + index + "_" + time, max - min);
+            }
+        }
+
+    }
+
+    public static void handleC5ToC6(Map<String, Object> c5Feature, Map<String, Object> c6Feature, Map<String, Object> videoMap, Map<String, Object> featureMap) {
+        Map<String, Map<String, Object>> featureMaps = new HashMap<>();
+        featureMaps.put("c5", c5Feature);
+        featureMaps.put("c6", c6Feature);
+        List<String> times = Arrays.asList("tags_1d", "tags_3d", "tags_7d");
+
+        String title = videoMap.getOrDefault("title", "").toString();
+
+        for (Map.Entry<String, Map<String, Object>> entry : featureMaps.entrySet()) {
+            String key = entry.getKey();
+            Map<String, Object> feature = entry.getValue();
+            for (String time : times) {
+                String tags = feature.getOrDefault(time, "").toString();
+                Double[] scores = ExtractorUtils.funcC34567ForTagsNew(tags, title);
+                featureMap.put(key + "_" + time + "_matchnum", scores[0]);
+                featureMap.put(key + "_" + time + "_maxscore", scores[1]);
+                featureMap.put(key + "_" + time + "_avgscore", scores[2]);
+            }
+        }
+
+    }
+
+    public static void handleC6ToC7(Map<String, Object> c6Feature, Map<String, Object> c7Feature, Map<String, Object> featureMap) {
+
+    }
+}