xueyiming vor 1 Monat
Ursprung
Commit
d5233b92e0

+ 11 - 10
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/diff_data_20250319.scala

@@ -251,24 +251,25 @@ object diff_data_20250319 {
     println("featureCount: " + featureCount)
 
 
-
+    val count2 = joinedRDD.count()
     // 输出每个特征的平均差异
     println("每个特征的平均差异:")
     println(featureDiffSum.size)
     featureDiffSum.foreach { case (feature, sum) =>
       val count = featureCount(feature)
       val averageDiff = sum / count
-      println(s"  Feature: $feature, Average Diff: $averageDiff")
+      val rateDiff = count / count2
+      println(s"  Feature: $feature, Average Diff: $averageDiff  Rate Diff: $rateDiff")
     }
 
-    val count2 = joinedRDD.count()
-    println(s"对比总数: $count2")
-    println("每个特征的差异率:")
-    println(featureCount.size)
-    featureCount.foreach { case (feature, sum) =>
-      val rateDiff = sum / count2
-      println(s"  Feature: $feature, Rate Diff: $rateDiff")
-    }
+
+//    println(s"对比总数: $count2")
+//    println("每个特征的差异率:")
+//    println(featureCount.size)
+//    featureCount.foreach { case (feature, sum) =>
+//      val rateDiff = sum / count2
+//      println(s"  Feature: $feature, Rate Diff: $rateDiff")
+//    }
   }
 
   def tryToNumber(value: Any): Option[java.lang.Number] = {