瀏覽代碼

feat:添加分桶脚本

zhaohaipeng 2 月之前
父節點
當前提交
433661bab0

+ 2 - 5
recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/train_01_xgb_ad_20240808.scala

@@ -68,9 +68,6 @@ object train_01_xgb_ad_20240808 {
 
     ) ++ features.map(f => DataTypes.createStructField(f, DataTypes.DoubleType, true))
 
-    fields = fields ++ Array(
-      DataTypes.createStructField("logKey", DataTypes.StringType, true)
-    )
     val schema = DataTypes.createStructType(fields)
     val trainDataSet: Dataset[Row] = spark.createDataFrame(trainData, schema)
     val vectorAssembler = new VectorAssembler().setInputCols(features).setOutputCol("features")
@@ -104,11 +101,11 @@ object train_01_xgb_ad_20240808 {
       features
     )
     val testDataSet = spark.createDataFrame(testData, schema)
-    val testDataSetTrans = vectorAssembler.transform(testDataSet).select("features", "label", "logKey")
+    val testDataSetTrans = vectorAssembler.transform(testDataSet).select("features", "label")
     val predictions = model.transform(testDataSetTrans)
     //     [label, features, probability, prediction, rawPrediction]
     println("zhangbo:columns:" + predictions.columns.mkString(","))
-    val saveData = predictions.select("label", "rawPrediction", "probability", "logKey").rdd
+    val saveData = predictions.select("label", "rawPrediction", "probability").rdd
       .map(r => {
         (r.get(0), r.get(1), r.get(2), r.get(3)).productIterator.mkString("\t")
       })