root 1 месяц назад
Родитель
Сommit
43226d949a

+ 3 - 3
recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/pred_recsys_61_xgb_nor_hdfsfile_20241209.scala

@@ -26,11 +26,11 @@ object pred_recsys_61_xgb_nor_hdfsfile_20241209 {
     val testPath = param.getOrElse("testPath", "")
     val testPath = param.getOrElse("testPath", "")
     val labelLogType = param.getOrElse("labelLogType", "0").toInt
     val labelLogType = param.getOrElse("labelLogType", "0").toInt
     val labelLogBase = param.getOrElse("labelLogBase", "2").toDouble
     val labelLogBase = param.getOrElse("labelLogBase", "2").toDouble
-    val savePath = param.getOrElse("savePath", "/dw/recommend/model/61_recsys_nor_predict_data/")
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model_yxh/61_recsys_nor_predict_data/")
     val featureFilter = param.getOrElse("featureFilter", "XXXXXX").split(",")
     val featureFilter = param.getOrElse("featureFilter", "XXXXXX").split(",")
 
 
     val repartition = param.getOrElse("repartition", "20").toInt
     val repartition = param.getOrElse("repartition", "20").toInt
-    val modelPath = param.getOrElse("modelPath", "/dw/recommend/model/61_recsys_nor_model/model_xgb")
+    val modelPath = param.getOrElse("modelPath", "/dw/recommend/model_yxh/61_recsys_nor_model/model_xgb")
 
 
     val loader = getClass.getClassLoader
     val loader = getClass.getClassLoader
     val resourceUrl = loader.getResource(featureFile)
     val resourceUrl = loader.getResource(featureFile)
@@ -79,7 +79,7 @@ object pred_recsys_61_xgb_nor_hdfsfile_20241209 {
         (r.get(0), r.get(1), r.get(2), r.get(3), r.get(4)).productIterator.mkString("\t")
         (r.get(0), r.get(1), r.get(2), r.get(3), r.get(4)).productIterator.mkString("\t")
       })
       })
     val hdfsPath = savePath
     val hdfsPath = savePath
-    if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
+    if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model_yxh/")) {
       println("删除路径并开始数据写入:" + hdfsPath)
       println("删除路径并开始数据写入:" + hdfsPath)
       MyHdfsUtils.delete_hdfs_path(hdfsPath)
       MyHdfsUtils.delete_hdfs_path(hdfsPath)
       saveData.repartition(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
       saveData.repartition(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])

+ 24 - 21
recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/train_recsys_61_xgb_nor_20241209.scala

@@ -25,7 +25,7 @@ object train_recsys_61_xgb_nor_20241209 {
     val featureFile = param.getOrElse("featureFile", "20241209_recsys_nor_name.txt")
     val featureFile = param.getOrElse("featureFile", "20241209_recsys_nor_name.txt")
     val trainPath = param.getOrElse("trainPath", "/dw/recommend/model/61_recsys_nor_train_data/20241210")
     val trainPath = param.getOrElse("trainPath", "/dw/recommend/model/61_recsys_nor_train_data/20241210")
     val testPath = param.getOrElse("testPath", "")
     val testPath = param.getOrElse("testPath", "")
-    val savePath = param.getOrElse("savePath", "/dw/recommend/model/61_recsys_nor_predict_data/")
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model_yxh/61_recsys_nor_predict_data/")
     val featureFilter = param.getOrElse("featureFilter", "XXXXXX").split(",")
     val featureFilter = param.getOrElse("featureFilter", "XXXXXX").split(",")
     val labelLogType = param.getOrElse("labelLogType", "0").toInt
     val labelLogType = param.getOrElse("labelLogType", "0").toInt
     val labelLogBase = param.getOrElse("labelLogBase", "2").toDouble
     val labelLogBase = param.getOrElse("labelLogBase", "2").toDouble
@@ -37,7 +37,7 @@ object train_recsys_61_xgb_nor_20241209 {
     val func_object = param.getOrElse("func_object", "reg:squaredlogerror")
     val func_object = param.getOrElse("func_object", "reg:squaredlogerror")
     val func_metric = param.getOrElse("func_metric", "rmsle")
     val func_metric = param.getOrElse("func_metric", "rmsle")
     val repartition = param.getOrElse("repartition", "20").toInt
     val repartition = param.getOrElse("repartition", "20").toInt
-    val modelPath = param.getOrElse("modelPath", "/dw/recommend/model/61_recsys_nor_model/model_xgb")
+    val modelPath = param.getOrElse("modelPath", "/dw/recommend/model_yxh/61_recsys_nor_model/model_xgb")
     val modelFile = param.getOrElse("modelFile", "model_xgb_for_recsys_nor.tar.gz")
     val modelFile = param.getOrElse("modelFile", "model_xgb_for_recsys_nor.tar.gz")
 
 
     val loader = getClass.getClassLoader
     val loader = getClass.getClassLoader
@@ -77,21 +77,22 @@ object train_recsys_61_xgb_nor_20241209 {
     //      "max_depth" -> 5,
     //      "max_depth" -> 5,
     //      "objective" -> "reg:squaredlogerror")
     //      "objective" -> "reg:squaredlogerror")
     val xgbRegressor = new XGBoostRegressor()
     val xgbRegressor = new XGBoostRegressor()
-      .setEta(eta)
-      .setGamma(gamma)
-      .setMissing(0.0f)
-      .setMaxDepth(max_depth)
-      .setNumRound(num_round)
-      .setSubsample(0.8)
-      .setColsampleBytree(0.8)
-      .setObjective(func_object)
-      .setEvalMetric(func_metric)
-      .setFeaturesCol("features")
-      .setLabelCol("label")
-      .setNthread(1)
-      .setNumWorkers(num_worker)
-      .setSeed(2024)
-      .setMinChildWeight(1)
+    .setObjective("count:poisson")
+    .setEvalMetric("poisson-nloglik")
+    .setEta(0.05)              // Poisson 通常比 squaredlog 要大一点
+    .setMaxDepth(5)
+    .setMinChildWeight(1)
+    .setGamma(0.0)
+    .setSubsample(0.8)
+    .setColsampleBytree(0.8)
+    .setNumRound(num_round)
+    .setMissing(0.0f)
+    .setNumWorkers(num_worker)
+    .setNthread(1)
+    .setSeed(2024)
+    .setFeaturesCol("features")
+    .setLabelCol("label")
+
     val model = xgbRegressor.fit(xgbInput)
     val model = xgbRegressor.fit(xgbInput)
 
 
     if (modelPath.nonEmpty && modelFile.nonEmpty) {
     if (modelPath.nonEmpty && modelFile.nonEmpty) {
@@ -117,7 +118,7 @@ object train_recsys_61_xgb_nor_20241209 {
           (r.get(0), r.get(1)).productIterator.mkString("\t")
           (r.get(0), r.get(1)).productIterator.mkString("\t")
         })
         })
       val hdfsPath = savePath
       val hdfsPath = savePath
-      if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
+      if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model_yxh/")) {
         println("删除路径并开始数据写入:" + hdfsPath)
         println("删除路径并开始数据写入:" + hdfsPath)
         MyHdfsUtils.delete_hdfs_path(hdfsPath)
         MyHdfsUtils.delete_hdfs_path(hdfsPath)
         saveData.repartition(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
         saveData.repartition(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
@@ -127,9 +128,9 @@ object train_recsys_61_xgb_nor_20241209 {
       val evaluator = new RegressionEvaluator()
       val evaluator = new RegressionEvaluator()
         .setLabelCol("label")
         .setLabelCol("label")
         .setPredictionCol("prediction")
         .setPredictionCol("prediction")
-        .setMetricName("rmse")
+        .setMetricName("poisson-nloglik")
       val rmse = evaluator.evaluate(predictions.select("label", "prediction"))
       val rmse = evaluator.evaluate(predictions.select("label", "prediction"))
-      println("recsys nor: rmse:" + rmse)
+      println("recsys nor: poisson-nloglik:" + rmse)
     }
     }
   }
   }
 
 
@@ -151,7 +152,9 @@ object train_recsys_61_xgb_nor_20241209 {
         }
         }
 
 
         val v: Array[Any] = new Array[Any](features.length + 1)
         val v: Array[Any] = new Array[Any](features.length + 1)
-        v(0) = MetricUtils.logScale(label, logType, logBase)
+        // v(0) = MetricUtils.logScale(label, logType, logBase)
+        v(0) = Math.max(label, 0.0)
+
         for (i <- 0 until features.length) {
         for (i <- 0 until features.length) {
           v(i + 1) = map.getOrDefault(features(i), 0.0d)
           v(i + 1) = map.getOrDefault(features(i), 0.0d)
         }
         }