Jelajahi Sumber

scala train

zhangbo 8 bulan lalu
induk
melakukan
85de2296cb

+ 19 - 1
recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/train_01_xgb_ad_20240808.scala

@@ -86,13 +86,15 @@ object train_01_xgb_ad_20240808{
       .setNumRound(num_round)
       .setSubsample(0.8)
       .setColsampleBytree(0.8)
-//      .setScalePosWeight(1)
+      .setScalePosWeight(1)
       .setObjective(func_object)
       .setEvalMetric(func_metric)
       .setFeaturesCol("features")
       .setLabelCol("label")
       .setNthread(1)
       .setNumWorkers(num_worker)
+  .setSeed(2024)
+  .setMinChildWeight(1)
     val model = xgbClassifier.fit(xgbInput)
 
 
@@ -126,6 +128,22 @@ object train_01_xgb_ad_20240808{
       .setMetricName("areaUnderROC")
     val auc = evaluator.evaluate(predictions.select("label", "probability"))
     println("zhangbo:auc:" + auc)
+
+    // 统计分cid的分数
+    sc.textFile(hdfsPath).map(r=>{
+      val rList = r.split("\t")
+      val cid = rList(3)
+      val score = rList(2).replace("[", "").replace("]", "")
+        .split(",")(2).toDouble
+      val label = rList(0).toDouble
+      (cid, (1, label, score))
+    }).reduceByKey{
+      case (a, b) => (a._1 + b._1, a._2 + b._2, a._3 + b._3)
+    }.map{
+      case (cid, (all, zheng, scores)) =>
+        (cid, all, zheng, scores, zheng / all, scores / all)
+    }.collect().sortBy(_._1).map(_.productIterator.mkString("\t")).foreach(println)
+
   }