ソースを参照

rov xgb model

jch 4 ヶ月 前
コミット
47fb4b4315

+ 4 - 7
recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/pred_recsys_61_xgb_rov_hdfsfile_20241209.scala

@@ -47,13 +47,10 @@ object pred_recsys_61_xgb_rov_hdfsfile_20241209 {
       .filter(r => r.nonEmpty || !featureFilter.contains(r))
     println("features.size=" + features.length)
 
-    var fields = Array(
+    val fields = Array(
       DataTypes.createStructField("label", DataTypes.IntegerType, true)
     ) ++ features.map(f => DataTypes.createStructField(f, DataTypes.DoubleType, true))
 
-    fields = fields ++ Array(
-      DataTypes.createStructField("logKey", DataTypes.StringType, true)
-    )
     val schema = DataTypes.createStructType(fields)
     val vectorAssembler = new VectorAssembler().setInputCols(features).setOutputCol("features")
 
@@ -66,12 +63,12 @@ object pred_recsys_61_xgb_rov_hdfsfile_20241209 {
     )
 
     val testDataSet = spark.createDataFrame(testData, schema)
-    val testDataSetTrans = vectorAssembler.transform(testDataSet).select("features", "label", "logKey")
+    val testDataSetTrans = vectorAssembler.transform(testDataSet).select("features", "label")
     val predictions = model.transform(testDataSetTrans)
 
-    val saveData = predictions.select("label", "rawPrediction", "probability", "logKey").rdd
+    val saveData = predictions.select("label", "rawPrediction", "probability").rdd
       .map(r => {
-        (r.get(0), r.get(1), r.get(2), r.get(3)).productIterator.mkString("\t")
+        (r.get(0), r.get(1), r.get(2)).productIterator.mkString("\t")
       })
     val hdfsPath = savePath
     if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {

+ 4 - 7
recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/train_recsys_61_xgb_rov_20241209.scala

@@ -61,13 +61,10 @@ object train_recsys_61_xgb_rov_20241209 {
     )
     println("recsys rov:train data size:" + trainData.count())
 
-    var fields = Array(
+    val fields = Array(
       DataTypes.createStructField("label", DataTypes.IntegerType, true)
     ) ++ features.map(f => DataTypes.createStructField(f, DataTypes.DoubleType, true))
 
-    fields = fields ++ Array(
-      DataTypes.createStructField("logKey", DataTypes.StringType, true)
-    )
     val schema = DataTypes.createStructType(fields)
     val trainDataSet: Dataset[Row] = spark.createDataFrame(trainData, schema)
     val vectorAssembler = new VectorAssembler().setInputCols(features).setOutputCol("features")
@@ -107,13 +104,13 @@ object train_recsys_61_xgb_rov_20241209 {
         features
       )
       val testDataSet = spark.createDataFrame(testData, schema)
-      val testDataSetTrans = vectorAssembler.transform(testDataSet).select("features", "label", "logKey")
+      val testDataSetTrans = vectorAssembler.transform(testDataSet).select("features", "label")
       val predictions = model.transform(testDataSetTrans)
 
       println("recsys rov:columns:" + predictions.columns.mkString(",")) //[label, features, probability, prediction, rawPrediction]
-      val saveData = predictions.select("label", "rawPrediction", "probability", "logKey").rdd
+      val saveData = predictions.select("label", "rawPrediction", "probability").rdd
         .map(r => {
-          (r.get(0), r.get(1), r.get(2), r.get(3)).productIterator.mkString("\t")
+          (r.get(0), r.get(1), r.get(2)).productIterator.mkString("\t")
         })
       val hdfsPath = savePath
       if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {