|
@@ -24,6 +24,8 @@ object pred_recsys_61_xgb_nor_hdfsfile_20241209 {
|
|
val param = ParamUtils.parseArgs(args)
|
|
val param = ParamUtils.parseArgs(args)
|
|
val featureFile = param.getOrElse("featureFile", "20241209_recsys_nor_name.txt")
|
|
val featureFile = param.getOrElse("featureFile", "20241209_recsys_nor_name.txt")
|
|
val testPath = param.getOrElse("testPath", "")
|
|
val testPath = param.getOrElse("testPath", "")
|
|
|
|
+ val labelLogType = param.getOrElse("labelLogType", "0").toInt
|
|
|
|
+ val labelLogBase = param.getOrElse("labelLogBase", "2").toDouble
|
|
val savePath = param.getOrElse("savePath", "/dw/recommend/model/61_recsys_nor_predict_data/")
|
|
val savePath = param.getOrElse("savePath", "/dw/recommend/model/61_recsys_nor_predict_data/")
|
|
val featureFilter = param.getOrElse("featureFilter", "XXXXXX").split(",")
|
|
val featureFilter = param.getOrElse("featureFilter", "XXXXXX").split(",")
|
|
|
|
|
|
@@ -69,7 +71,7 @@ object pred_recsys_61_xgb_nor_hdfsfile_20241209 {
|
|
val testDataSet = spark.createDataFrame(testData, schema)
|
|
val testDataSet = spark.createDataFrame(testData, schema)
|
|
val testDataSetTrans = vectorAssembler.transform(testDataSet).select("features", "label", "logKey", "scoresMap")
|
|
val testDataSetTrans = vectorAssembler.transform(testDataSet).select("features", "label", "logKey", "scoresMap")
|
|
val predictions = model.transform(testDataSetTrans)
|
|
val predictions = model.transform(testDataSetTrans)
|
|
- val clipPrediction = getClipData(spark, predictions).persist()
|
|
|
|
|
|
+ val clipPrediction = getClipData(spark, predictions, labelLogType, labelLogBase).persist()
|
|
|
|
|
|
val saveData = clipPrediction.select("label", "prediction", "clipPrediction", "logKey", "scoresMap").rdd
|
|
val saveData = clipPrediction.select("label", "prediction", "clipPrediction", "logKey", "scoresMap").rdd
|
|
.map(r => {
|
|
.map(r => {
|
|
@@ -94,8 +96,8 @@ object pred_recsys_61_xgb_nor_hdfsfile_20241209 {
|
|
.setMetricName("mae")
|
|
.setMetricName("mae")
|
|
val rmse = rmseEvaluator.evaluate(clipPrediction.select("label", "clipPrediction"))
|
|
val rmse = rmseEvaluator.evaluate(clipPrediction.select("label", "clipPrediction"))
|
|
val mae = maeEvaluator.evaluate(clipPrediction.select("label", "clipPrediction"))
|
|
val mae = maeEvaluator.evaluate(clipPrediction.select("label", "clipPrediction"))
|
|
- val mape = calMAPE(clipPrediction.select("label", "clipPrediction").rdd)
|
|
|
|
- val rmsle = calRMSLE(clipPrediction.select("label", "clipPrediction").rdd)
|
|
|
|
|
|
+ val mape = MetricUtils.calMAPE(clipPrediction.select("label", "clipPrediction").rdd)
|
|
|
|
+ val rmsle = MetricUtils.calRMSLE(clipPrediction.select("label", "clipPrediction").rdd)
|
|
printf("recsys nor:rmse: %.6f\n", rmse)
|
|
printf("recsys nor:rmse: %.6f\n", rmse)
|
|
printf("recsys nor:mae: %.6f\n", mae)
|
|
printf("recsys nor:mae: %.6f\n", mae)
|
|
printf("recsys nor:mape: %.6f\n", mape)
|
|
printf("recsys nor:mape: %.6f\n", mape)
|
|
@@ -128,12 +130,12 @@ object pred_recsys_61_xgb_nor_hdfsfile_20241209 {
|
|
})
|
|
})
|
|
}
|
|
}
|
|
|
|
|
|
- def getClipData(spark: SparkSession, df: DataFrame): DataFrame = {
|
|
|
|
|
|
+ def getClipData(spark: SparkSession, df: DataFrame, logType: Int, logBase: Double): DataFrame = {
|
|
import spark.implicits._
|
|
import spark.implicits._
|
|
df.select("label", "prediction", "logKey", "scoresMap").rdd
|
|
df.select("label", "prediction", "logKey", "scoresMap").rdd
|
|
.map(row => {
|
|
.map(row => {
|
|
val label = row.getAs[Double]("label")
|
|
val label = row.getAs[Double]("label")
|
|
- val prediction = row.getAs[Double]("prediction")
|
|
|
|
|
|
+ val prediction = MetricUtils.restoreLog(row.getAs[Double]("prediction"), logType, logBase)
|
|
val logKey = row.getAs[String]("logKey")
|
|
val logKey = row.getAs[String]("logKey")
|
|
val scoresMap = row.getAs[String]("scoresMap")
|
|
val scoresMap = row.getAs[String]("scoresMap")
|
|
if (prediction < 1E-8) {
|
|
if (prediction < 1E-8) {
|
|
@@ -144,22 +146,4 @@ object pred_recsys_61_xgb_nor_hdfsfile_20241209 {
|
|
}
|
|
}
|
|
).toDF("label", "prediction", "clipPrediction", "logKey", "scoresMap")
|
|
).toDF("label", "prediction", "clipPrediction", "logKey", "scoresMap")
|
|
}
|
|
}
|
|
-
|
|
|
|
- def calMAPE(evalRdd: RDD[Row]): Double = {
|
|
|
|
- val apeRdd = evalRdd.map(raw => {
|
|
|
|
- val label = raw.get(0).toString.toDouble
|
|
|
|
- val pred = raw.get(1).toString.toDouble
|
|
|
|
- math.abs(label - pred) / label
|
|
|
|
- })
|
|
|
|
- apeRdd.sum() / apeRdd.count()
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- def calRMSLE(evalRdd: RDD[Row]): Double = {
|
|
|
|
- val sleRdd = evalRdd.map(raw => {
|
|
|
|
- val label = raw.get(0).toString.toDouble
|
|
|
|
- val pred = raw.get(1).toString.toDouble
|
|
|
|
- math.pow(math.log(pred + 1) - math.log(label + 1), 2)
|
|
|
|
- })
|
|
|
|
- math.sqrt(sleRdd.sum() / sleRdd.count())
|
|
|
|
- }
|
|
|
|
}
|
|
}
|