Browse Source

样本重新制作: 小时级别特征。

zhangbo 1 year ago
parent
commit
8e8d2370ee

+ 14 - 9
src/main/java/examples/extractor/ExtractorUtils.java

@@ -65,7 +65,10 @@ public class ExtractorUtils {
         List<Double> differences = new ArrayList<>();
 
         for (int i = 0; i < numbers.size() - 1; i++) {
-            Double diff = numbers.get(i + 1) - numbers.get(i);
+            Double diff = 0.0;
+            if (!isDoubleEqualToZero(numbers.get(i))){
+                diff = (numbers.get(i + 1) - numbers.get(i)) / numbers.get(i);
+            }
             differences.add(diff);
         }
 
@@ -116,14 +119,16 @@ public class ExtractorUtils {
     }
 
     public static void main(String[] args) {
-        System.out.println(ceilLogRate(0.0002));
-        System.out.println(ceilLogRate(0.01));
-        System.out.println(ceilLogRate(0.2));
-        System.out.println(ceilLogRate(4.));
-        System.out.println(bucketCnt(1.));
-        System.out.println(bucketCnt(20.));
-        System.out.println(bucketCnt(500.));
-        System.out.println(bucketCnt(50000.));
+//        System.out.println(ceilLogRate(0.0002));
+//        System.out.println(ceilLogRate(0.01));
+//        System.out.println(ceilLogRate(0.2));
+//        System.out.println(ceilLogRate(4.));
+//        System.out.println(bucketCnt(1.));
+//        System.out.println(bucketCnt(20.));
+//        System.out.println(bucketCnt(500.));
+//        System.out.println(bucketCnt(50000.));
+
+        System.out.println(generateHourStrings("2024011603", 5));
 
     }
 

+ 33 - 21
src/main/java/examples/extractor/RankExtractorItemFeature.java

@@ -81,9 +81,10 @@ public class RankExtractorItemFeature {
     }
 
     public static Map<String, String> getItemRealtimeTrend(Map<String, Map<String, Double>> maps, String date, String hour){
-        Map<String, Double> result = new HashMap<>();
+        Map<String, Double> result1 = new HashMap<>();
+        Map<String, Double> result2 = new HashMap<>();
         if (date.isEmpty() || hour.isEmpty()){
-            return rateFeatureChange(result);
+            return rateFeatureChange(result1);
         }
         int N = 6;
 
@@ -95,51 +96,54 @@ public class RankExtractorItemFeature {
         if (maps.containsKey(key)){
             Map<String, Double> fList = maps.get(key);
             List<Double> arrs = hourStrs.stream().map(r -> fList.getOrDefault(r, 0.0D)).collect(Collectors.toList());
-            result.put(key+"_"+N+"_avg", ExtractorUtils.calculateAverage(arrs));
-            result.put(key+"_"+N+"_var", ExtractorUtils.calculateVariance(arrs));
+            result1.put(key+"_"+N+"_avg", ExtractorUtils.calculateAverage(arrs));
+            result1.put(key+"_"+N+"_var", ExtractorUtils.calculateVariance(arrs));
 
             List<Double> arrsDiff = ExtractorUtils.calculateDifferences(arrs);
-            result.put(key+"_diff_"+N+"_avg", ExtractorUtils.calculateAverage(arrsDiff));
-            result.put(key+"_diff_"+N+"_var", ExtractorUtils.calculateVariance(arrsDiff));
+            result2.put(key+"_diff_"+N+"_avg", ExtractorUtils.calculateAverage(arrsDiff));
+            result2.put(key+"_diff_"+N+"_var", ExtractorUtils.calculateVariance(arrsDiff));
         }
 
         key = "return_uv_list_1day";
         if (maps.containsKey(key)){
             Map<String, Double> fList = maps.get(key);
             List<Double> arrs = hourStrs.stream().map(r -> fList.getOrDefault(r, 0.0D)).collect(Collectors.toList());
-            result.put(key+"_"+N+"_avg", ExtractorUtils.calculateAverage(arrs));
-            result.put(key+"_"+N+"_var", ExtractorUtils.calculateVariance(arrs));
+            result1.put(key+"_"+N+"_avg", ExtractorUtils.calculateAverage(arrs));
+            result1.put(key+"_"+N+"_var", ExtractorUtils.calculateVariance(arrs));
 
             List<Double> arrsDiff = ExtractorUtils.calculateDifferences(arrs);
-            result.put(key+"_diff_"+N+"_avg", ExtractorUtils.calculateAverage(arrsDiff));
-            result.put(key+"_diff_"+N+"_var", ExtractorUtils.calculateVariance(arrsDiff));
+            result2.put(key+"_diff_"+N+"_avg", ExtractorUtils.calculateAverage(arrsDiff));
+            result2.put(key+"_diff_"+N+"_var", ExtractorUtils.calculateVariance(arrsDiff));
         }
 
         key = "share_uv_list_1h";
         if (maps.containsKey(key)){
             Map<String, Double> fList = maps.get(key);
             List<Double> arrs = hourStrs.stream().map(r -> fList.getOrDefault(r, 0.0D)).collect(Collectors.toList());
-            result.put(key+"_"+N+"_avg", ExtractorUtils.calculateAverage(arrs));
-            result.put(key+"_"+N+"_var", ExtractorUtils.calculateVariance(arrs));
+            result1.put(key+"_"+N+"_avg", ExtractorUtils.calculateAverage(arrs));
+            result1.put(key+"_"+N+"_var", ExtractorUtils.calculateVariance(arrs));
 
             List<Double> arrsDiff = ExtractorUtils.calculateDifferences(arrs);
-            result.put(key+"_diff_"+N+"_avg", ExtractorUtils.calculateAverage(arrsDiff));
-            result.put(key+"_diff_"+N+"_var", ExtractorUtils.calculateVariance(arrsDiff));
+            result2.put(key+"_diff_"+N+"_avg", ExtractorUtils.calculateAverage(arrsDiff));
+            result2.put(key+"_diff_"+N+"_var", ExtractorUtils.calculateVariance(arrsDiff));
         }
 
         key = "return_uv_list_1h";
         if (maps.containsKey(key)){
             Map<String, Double> fList = maps.get(key);
             List<Double> arrs = hourStrs.stream().map(r -> fList.getOrDefault(r, 0.0D)).collect(Collectors.toList());
-            result.put(key+"_"+N+"_avg", ExtractorUtils.calculateAverage(arrs));
-            result.put(key+"_"+N+"_var", ExtractorUtils.calculateVariance(arrs));
+            result1.put(key+"_"+N+"_avg", ExtractorUtils.calculateAverage(arrs));
+            result1.put(key+"_"+N+"_var", ExtractorUtils.calculateVariance(arrs));
 
             List<Double> arrsDiff = ExtractorUtils.calculateDifferences(arrs);
-            result.put(key+"_diff_"+N+"_avg", ExtractorUtils.calculateAverage(arrsDiff));
-            result.put(key+"_diff_"+N+"_var", ExtractorUtils.calculateVariance(arrsDiff));
+            result2.put(key+"_diff_"+N+"_avg", ExtractorUtils.calculateAverage(arrsDiff));
+            result2.put(key+"_diff_"+N+"_var", ExtractorUtils.calculateVariance(arrsDiff));
         }
+        Map<String, String> r1 = cntFeatureChange4Double(result1);
+        Map<String, String> r2 = rateFeatureChange(result2);
+        r1.putAll(r2);
 
-        return rateFeatureChange(result);
+        return r1;
     }
 
 
@@ -151,6 +155,14 @@ public class RankExtractorItemFeature {
         }
         return result;
     }
+    public static Map<String, String> cntFeatureChange4Double(Map<String, Double> maps){
+        Map<String, String> result = new HashMap<>();
+        for (Map.Entry<String, Double> entry : maps.entrySet()){
+            int value = ExtractorUtils.bucketCnt(Double.valueOf(entry.getValue()));
+            result.put(entry.getKey(), String.valueOf(value));
+        }
+        return result;
+    }
 
     public static Map<String, String> cntFeatureChange(Map<String, String> maps,
                                                        Set<String> names){
@@ -172,7 +184,7 @@ public class RankExtractorItemFeature {
         if (date.isEmpty() || hour.isEmpty()){
             return result;
         }
-        String dateHour = ExtractorUtils.subtractHours(date + hour, 1);
+        String dateHour = ExtractorUtils.subtractHours(date + hour, 0);
         for (Map.Entry<String, Map<String, Double>> entry : maps.entrySet()){
             if (!names.contains(entry.getKey())){
                 continue;
@@ -191,7 +203,7 @@ public class RankExtractorItemFeature {
         if (date.isEmpty() || hour.isEmpty()){
             return rateFeatureChange(result);
         }
-        String dateHour = ExtractorUtils.subtractHours(date + hour, 1);
+        String dateHour = ExtractorUtils.subtractHours(date + hour, 0);
 
         double d, d1, d2;
         String k1, k2;