|
@@ -27,14 +27,13 @@ object train_recsys_61_xgb_nor_20241209 {
|
|
val testPath = param.getOrElse("testPath", "")
|
|
val testPath = param.getOrElse("testPath", "")
|
|
val savePath = param.getOrElse("savePath", "/dw/recommend/model/61_recsys_nor_predict_data/")
|
|
val savePath = param.getOrElse("savePath", "/dw/recommend/model/61_recsys_nor_predict_data/")
|
|
val featureFilter = param.getOrElse("featureFilter", "XXXXXX").split(",")
|
|
val featureFilter = param.getOrElse("featureFilter", "XXXXXX").split(",")
|
|
- val maxLabel = param.getOrElse("maxLabel", "30").toDouble
|
|
|
|
val eta = param.getOrElse("eta", "0.01").toDouble
|
|
val eta = param.getOrElse("eta", "0.01").toDouble
|
|
val gamma = param.getOrElse("gamma", "0.0").toDouble
|
|
val gamma = param.getOrElse("gamma", "0.0").toDouble
|
|
val max_depth = param.getOrElse("max_depth", "5").toInt
|
|
val max_depth = param.getOrElse("max_depth", "5").toInt
|
|
val num_round = param.getOrElse("num_round", "100").toInt
|
|
val num_round = param.getOrElse("num_round", "100").toInt
|
|
val num_worker = param.getOrElse("num_worker", "20").toInt
|
|
val num_worker = param.getOrElse("num_worker", "20").toInt
|
|
- val func_object = param.getOrElse("func_object", "reg:squarederror")
|
|
|
|
- val func_metric = param.getOrElse("func_metric", "rmse")
|
|
|
|
|
|
+ val func_object = param.getOrElse("func_object", "reg:squaredlogerror")
|
|
|
|
+ val func_metric = param.getOrElse("func_metric", "rmsle")
|
|
val repartition = param.getOrElse("repartition", "20").toInt
|
|
val repartition = param.getOrElse("repartition", "20").toInt
|
|
val modelPath = param.getOrElse("modelPath", "/dw/recommend/model/61_recsys_nor_model/model_xgb")
|
|
val modelPath = param.getOrElse("modelPath", "/dw/recommend/model/61_recsys_nor_model/model_xgb")
|
|
val modelFile = param.getOrElse("modelFile", "model_xgb_for_recsys_nor.tar.gz")
|
|
val modelFile = param.getOrElse("modelFile", "model_xgb_for_recsys_nor.tar.gz")
|
|
@@ -58,8 +57,7 @@ object train_recsys_61_xgb_nor_20241209 {
|
|
|
|
|
|
val trainData = createData(
|
|
val trainData = createData(
|
|
sc.textFile(trainPath),
|
|
sc.textFile(trainPath),
|
|
- features,
|
|
|
|
- maxLabel
|
|
|
|
|
|
+ features
|
|
)
|
|
)
|
|
println("recsys nor:train data size:" + trainData.count())
|
|
println("recsys nor:train data size:" + trainData.count())
|
|
|
|
|
|
@@ -130,7 +128,7 @@ object train_recsys_61_xgb_nor_20241209 {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- def createData(data: RDD[String], features: Array[String], maxLabel: Double = 30): RDD[Row] = {
|
|
|
|
|
|
+ def createData(data: RDD[String], features: Array[String]): RDD[Row] = {
|
|
data
|
|
data
|
|
.filter(r => {
|
|
.filter(r => {
|
|
val line: Array[String] = StringUtils.split(r, '\t')
|
|
val line: Array[String] = StringUtils.split(r, '\t')
|
|
@@ -148,7 +146,7 @@ object train_recsys_61_xgb_nor_20241209 {
|
|
}
|
|
}
|
|
|
|
|
|
val v: Array[Any] = new Array[Any](features.length + 1)
|
|
val v: Array[Any] = new Array[Any](features.length + 1)
|
|
- v(0) = clipLabel(label, maxLabel)
|
|
|
|
|
|
+ v(0) = label
|
|
for (i <- 0 until features.length) {
|
|
for (i <- 0 until features.length) {
|
|
v(i + 1) = map.getOrDefault(features(i), 0.0d)
|
|
v(i + 1) = map.getOrDefault(features(i), 0.0d)
|
|
}
|
|
}
|