|
@@ -61,13 +61,10 @@ object train_recsys_61_xgb_rov_20241209 {
|
|
|
)
|
|
|
println("recsys rov:train data size:" + trainData.count())
|
|
|
|
|
|
- var fields = Array(
|
|
|
+ val fields = Array(
|
|
|
DataTypes.createStructField("label", DataTypes.IntegerType, true)
|
|
|
) ++ features.map(f => DataTypes.createStructField(f, DataTypes.DoubleType, true))
|
|
|
|
|
|
- fields = fields ++ Array(
|
|
|
- DataTypes.createStructField("logKey", DataTypes.StringType, true)
|
|
|
- )
|
|
|
val schema = DataTypes.createStructType(fields)
|
|
|
val trainDataSet: Dataset[Row] = spark.createDataFrame(trainData, schema)
|
|
|
val vectorAssembler = new VectorAssembler().setInputCols(features).setOutputCol("features")
|
|
@@ -107,13 +104,13 @@ object train_recsys_61_xgb_rov_20241209 {
|
|
|
features
|
|
|
)
|
|
|
val testDataSet = spark.createDataFrame(testData, schema)
|
|
|
- val testDataSetTrans = vectorAssembler.transform(testDataSet).select("features", "label", "logKey")
|
|
|
+ val testDataSetTrans = vectorAssembler.transform(testDataSet).select("features", "label")
|
|
|
val predictions = model.transform(testDataSetTrans)
|
|
|
|
|
|
println("recsys rov:columns:" + predictions.columns.mkString(",")) //[label, features, probability, prediction, rawPrediction]
|
|
|
- val saveData = predictions.select("label", "rawPrediction", "probability", "logKey").rdd
|
|
|
+ val saveData = predictions.select("label", "rawPrediction", "probability").rdd
|
|
|
.map(r => {
|
|
|
- (r.get(0), r.get(1), r.get(2), r.get(3)).productIterator.mkString("\t")
|
|
|
+ (r.get(0), r.get(1), r.get(2)).productIterator.mkString("\t")
|
|
|
})
|
|
|
val hdfsPath = savePath
|
|
|
if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
|