Explorar el Código

feat:合并代码

zhaohaipeng hace 2 semanas
padre
commit
f6ea25134e

+ 0 - 1
.gitignore

@@ -40,6 +40,5 @@ xxl-job
 .DS_Store
 logs
 
-model
 predict
 .idea

+ 113 - 0
src/main/java/examples/model/FMModel.java

@@ -0,0 +1,113 @@
+package examples.model;
+
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class FMModel implements Serializable {
+    private int factor;
+    private double bias;
+    private Map<String, double[]> weight;
+
+    public static FMModel builder(List<String> lines, int factor) {
+        if (factor < 0 || factor > 32) {
+            System.out.printf("factor=%d is wrong\n", factor);
+            return null;
+        }
+
+        double bias = 0;
+        Map<String, double[]> weight = new HashMap<>();
+
+        int number = 0;
+        final int filedNum = 1 + 1 + factor;
+        for (String line : lines) {
+            number += 1;
+            String[] cells = line.split("\t");
+            if (1 == number) {
+                // w0
+                if (cells.length != 2) {
+                    return null;
+                }
+                if ("bias".equals(cells[0])) {
+                    bias = Double.parseDouble(cells[1]);
+                } else {
+                    return null;
+                }
+            } else {
+                // w1 & vector
+                if (cells.length != filedNum) {
+                    continue;
+                }
+                String key = cells[0];
+                double[] coefficient = new double[factor + 1];
+                for (int i = 1; i <= factor + 1; i++) {
+                    coefficient[i - 1] = Double.parseDouble(cells[i]);
+                }
+                weight.put(key, coefficient);
+            }
+        }
+        System.out.printf("load %d features\n", weight.size());
+        if (weight.size() < 100) {
+            return null;
+        }
+
+        FMModel model = new FMModel();
+        model.setFactor(factor);
+        model.setBias(bias);
+        model.setWeight(weight);
+        return model;
+    }
+
+    private void setBias(double bias) {
+        this.bias = bias;
+    }
+
+    private void setFactor(int factor) {
+        this.factor = factor;
+    }
+
+    private void setWeight(Map<String, double[]> weight) {
+        this.weight = weight;
+    }
+
+    private double sigmod(double score) {
+        return 1.0 / (1.0 + Math.exp(-score));
+    }
+
+    public double predict(Map<String, Double> features) {
+        double score = 0;
+        if (null != features && !features.isEmpty() && null != this.weight) {
+            // bias
+            score += this.bias;
+
+            double[] sumSquare = new double[this.factor];
+            double[] squareSum = new double[this.factor];
+            for (Map.Entry<String, Double> entry : features.entrySet()) {
+                String key = entry.getKey();
+                double val = entry.getValue();
+                double[] vector = this.weight.get(key);
+                if (vector == null) {
+                    continue;
+                }
+
+                // w1
+                score += val * vector[0];
+
+                // sumSquare, squareSum
+                for (int i = 0; i < this.factor; i++) {
+                    double mul = val * vector[i + 1]; // Vni*X
+                    sumSquare[i] += mul;
+                    squareSum[i] += mul * mul; // (Vni*X)^2
+
+                }
+            }
+
+            // vector
+            for (int i = 0; i < this.factor; i++) {
+                score += 0.5 * (sumSquare[i] * sumSquare[i] - squareSum[i]);
+            }
+        }
+        return sigmod(score);
+    }
+}

+ 1 - 0
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/check_case.scala

@@ -3,6 +3,7 @@ package com.aliyun.odps.spark.examples.makedata_recsys_r_rate
 import com.aliyun.odps.data.Record
 import com.aliyun.odps.spark.examples.myUtils.DataUtils.getStringValue
 import com.aliyun.odps.spark.examples.myUtils._
+import examples.myUtils.{ConvertUtils, ConvertV2}
 import examples.utils.SimilarityUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/make_title.scala

@@ -3,7 +3,7 @@ package com.aliyun.odps.spark.examples.makedata_recsys_r_rate
 import com.aliyun.odps.data.Record
 import com.aliyun.odps.spark.examples.myUtils.DataUtils.getStringValue
 import com.aliyun.odps.spark.examples.myUtils._
-import examples.myUtils.TitleConvert
+import examples.myUtils.{ConvertUtils, FeatureUtils, OnlineLogUtils, TitleConvert}
 import examples.utils.SimilarityUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_fission_20260106.scala

@@ -1,7 +1,7 @@
 package com.aliyun.odps.spark.examples.makedata_recsys_r_rate
 
 import com.aliyun.odps.spark.examples.myUtils._
-import examples.myUtils.FissionConvert
+import examples.myUtils.{FissionConvert, OnlineLogUtils}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession
 

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_fission_20260205.scala

@@ -2,7 +2,7 @@ package com.aliyun.odps.spark.examples.makedata_recsys_r_rate
 
 import com.alibaba.fastjson.JSONObject
 import com.aliyun.odps.spark.examples.myUtils._
-import examples.myUtils.FissionConvertV2
+import examples.myUtils.{FissionConvertV2, OnlineLogUtils}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession
 

+ 1 - 0
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_profile_20251209.scala

@@ -1,6 +1,7 @@
 package com.aliyun.odps.spark.examples.makedata_recsys_r_rate
 
 import com.aliyun.odps.spark.examples.myUtils._
+import examples.myUtils.{ConvertV1, OnlineLogUtils}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession
 

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_profile_20251211.scala

@@ -1,7 +1,7 @@
 package com.aliyun.odps.spark.examples.makedata_recsys_r_rate
 
 import com.aliyun.odps.spark.examples.myUtils._
-import examples.myUtils.ConvertV5
+import examples.myUtils.{ConvertV5, OnlineLogUtils}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession
 

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_70_origin_data_20250718.scala

@@ -4,7 +4,7 @@ import com.alibaba.fastjson.JSONObject
 import com.aliyun.odps.data.Record
 import com.aliyun.odps.spark.examples.myUtils.DataUtils.getStringValue
 import com.aliyun.odps.spark.examples.myUtils._
-import examples.myUtils.ConvertV4
+import examples.myUtils.{ConvertUtils, ConvertV4, OnlineLogUtils}
 import examples.utils.SimilarityUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession

+ 1 - 0
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_83_bucket_originData.scala

@@ -3,6 +3,7 @@ package com.aliyun.odps.spark.examples.makedata_recsys_r_rate
 import com.aliyun.odps.data.Record
 import com.aliyun.odps.spark.examples.myUtils.DataUtils.getStringValue
 import com.aliyun.odps.spark.examples.myUtils._
+import examples.myUtils.{ConvertUtils, ConvertV2}
 import examples.utils.SimilarityUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession

+ 1 - 0
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_84_originData_20250812.scala

@@ -3,6 +3,7 @@ package com.aliyun.odps.spark.examples.makedata_recsys_r_rate
 import com.aliyun.odps.data.Record
 import com.aliyun.odps.spark.examples.myUtils.DataUtils.getStringValue
 import com.aliyun.odps.spark.examples.myUtils._
+import examples.myUtils.{ConvertUtils, ConvertV2, FeatureUtils, OnlineLogUtils}
 import examples.utils.SimilarityUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession

+ 1 - 0
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/parse_org_log.scala

@@ -3,6 +3,7 @@ package com.aliyun.odps.spark.examples.makedata_recsys_r_rate
 import com.aliyun.odps.data.Record
 import com.aliyun.odps.spark.examples.myUtils.DataUtils.getStringValue
 import com.aliyun.odps.spark.examples.myUtils._
+import examples.myUtils.{ConvertUtils, ConvertV2, OnlineLogUtils}
 import examples.utils.SimilarityUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/predict_fission_20260205.scala

@@ -1,7 +1,7 @@
 package com.aliyun.odps.spark.examples.makedata_recsys_r_rate
 
-import com.aliyun.odps.spark.examples.model.FMModel
 import com.aliyun.odps.spark.examples.myUtils.{MyDateUtils, MyHdfsUtils, ParamUtils}
+import examples.model.FMModel
 import org.apache.commons.lang.math.NumberUtils
 import org.apache.commons.lang3.StringUtils
 import org.apache.hadoop.io.compress.GzipCodec