|
@@ -68,10 +68,14 @@ object recsys_01_ros_reg_xgb_train {
|
|
)
|
|
)
|
|
println("recsys ros:train data size:" + trainData.count())
|
|
println("recsys ros:train data size:" + trainData.count())
|
|
|
|
|
|
- val fields = Array(
|
|
|
|
|
|
+ var fields = Array(
|
|
DataTypes.createStructField("label", DataTypes.DoubleType, true)
|
|
DataTypes.createStructField("label", DataTypes.DoubleType, true)
|
|
) ++ features.map(f => DataTypes.createStructField(f, DataTypes.DoubleType, true))
|
|
) ++ features.map(f => DataTypes.createStructField(f, DataTypes.DoubleType, true))
|
|
|
|
|
|
|
|
+ fields = fields ++ Array(
|
|
|
|
+ DataTypes.createStructField("logKey", DataTypes.StringType, true)
|
|
|
|
+ )
|
|
|
|
+
|
|
val schema = DataTypes.createStructType(fields)
|
|
val schema = DataTypes.createStructType(fields)
|
|
val trainDataSet: Dataset[Row] = spark.createDataFrame(trainData, schema)
|
|
val trainDataSet: Dataset[Row] = spark.createDataFrame(trainData, schema)
|
|
val vectorAssembler = new VectorAssembler().setInputCols(features).setOutputCol("features")
|
|
val vectorAssembler = new VectorAssembler().setInputCols(features).setOutputCol("features")
|
|
@@ -104,7 +108,7 @@ object recsys_01_ros_reg_xgb_train {
|
|
features
|
|
features
|
|
)
|
|
)
|
|
val testDataSet = spark.createDataFrame(testData, schema)
|
|
val testDataSet = spark.createDataFrame(testData, schema)
|
|
- val testDataSetTrans = vectorAssembler.transform(testDataSet).select("features", "label")
|
|
|
|
|
|
+ val testDataSetTrans = vectorAssembler.transform(testDataSet).select("features", "label", "logKey")
|
|
val predictions = model.transform(testDataSetTrans)
|
|
val predictions = model.transform(testDataSetTrans)
|
|
|
|
|
|
// 保存评估结果
|
|
// 保存评估结果
|