xueyiming 1 month ago
parent
commit
8748b3ecad

+ 3 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/diff_data_20250319.scala

@@ -103,7 +103,7 @@ object diff_data_20250319 {
           (value1, value2) match {
             case (num1: java.lang.Number, num2: java.lang.Number) =>
               val diff = math.abs(num1.doubleValue() - num2.doubleValue())
-              if (diff != 0) {
+              if (diff > 0) {
                 featureDiffSum(key) = featureDiffSum.getOrElse(key, 0.0) + diff
                 featureCount(key) = featureCount.getOrElse(key, 0) + 1
               }
@@ -131,6 +131,7 @@ object diff_data_20250319 {
 
     // 输出每个特征的平均差异
     println("每个特征的平均差异:")
+    println(featureDiffSum.size)
     featureDiffSum.foreach { case (feature, sum) =>
       val count = featureCount(feature)
       val averageDiff = sum / count
@@ -140,6 +141,7 @@ object diff_data_20250319 {
     val count = joinedRDD.count()
     println(s"对比总数: $count")
     println("每个特征的差异率:")
+    println(featureCount.size)
     featureCount.foreach { case (feature, sum) =>
       val rateDiff = sum / count
       println(s"  Feature: $feature, Rate Diff: $rateDiff")