xueyiming 1 月之前
父節點
當前提交
4548c4e10d

+ 87 - 75
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/diff_data_20250319.scala

@@ -100,21 +100,33 @@ object diff_data_20250319 {
         if (map1.containsKey(key) && map2.containsKey(key)) {
           val value1 = map1.get(key)
           val value2 = map2.get(key)
-          (value1, value2) match {
-            case (num1: java.lang.Number, num2: java.lang.Number) =>
+
+          def tryToNumber(value: Any): Option[java.lang.Number] = {
+            value match {
+              case num: java.lang.Number => Some(num)
+              case str: String =>
+                try {
+                  Some(str.toDouble)
+                } catch {
+                  case _: NumberFormatException => None
+                }
+              case _ => None
+            }
+          }
+
+          (tryToNumber(value1), tryToNumber(value2)) match {
+            case (Some(num1), Some(num2)) =>
               val diff = math.abs(num1.doubleValue() - num2.doubleValue())
               if (diff > 0) {
                 featureDiffSum(key) = featureDiffSum.getOrElse(key, 0.0) + diff
                 featureCount(key) = featureCount.getOrElse(key, 0) + 1
               }
-            //              val diffRate = if (num1.doubleValue() != 0) diff / num1.doubleValue() else diff
-            //              currentDiffRates(key) = diffRate
-
-            case (num1: java.lang.String, num2: java.lang.String) =>
-              if (num1 != num2) {
+            case _ =>
+              val str1 = value1.toString
+              val str2 = value2.toString
+              if (str1 != str2) {
                 featureCount(key) = featureCount.getOrElse(key, 0) + 1
               }
-            case _ =>
           }
         }
       }
@@ -201,73 +213,73 @@ object diff_data_20250319 {
   //    val rdd = sc.parallelize(result)
 
 
-//  def calculateFeatureDiff(map1: JSONObject, map2: JSONObject): (Map[String, Double], Double) = {
-//    val keys = map1.keySet().asScala.toSet ++ map2.keySet().asScala.toSet
-//    var totalDiff = 0.0
-//    var validCount = 0
-//    val diffRates = collection.mutable.Map[String, Double]()
-//
-//    keys.foreach { key =>
-//      if (map1.containsKey(key) && map2.containsKey(key)) {
-//        val value1 = map1.get(key)
-//        val value2 = map2.get(key)
-//        (value1, value2) match {
-//          case (num1: java.lang.Number, num2: java.lang.Number) =>
-//            val diff = math.abs(num1.doubleValue() - num2.doubleValue())
-//            val diffRate = if (num1.doubleValue() != 0) diff / num1.doubleValue() else diff
-//            diffRates(key) = diffRate
-//            totalDiff += diff
-//            validCount += 1
-//          case _ =>
-//        }
-//      }
-//    }
-//
-//    val averageDiff = if (validCount > 0) totalDiff / validCount else 0.0
-//    (diffRates.toMap, averageDiff)
-//  }
-
-//  def compareJSONObjects(json1: JSONObject, json2: JSONObject, logkey: String): String = {
-//    // 存储不同值的键和对应不同的值
-//    val differentValues = collection.mutable.Map[String, (Any, Any)]()
-//    // 存储 json1 中缺少的键
-//    val missingInJson1 = collection.mutable.Set[String]()
-//    // 存储 json2 中缺少的键
-//    val missingInJson2 = collection.mutable.Set[String]()
-//
-//    // 获取 json1 和 json2 的所有键
-//    val keys1 = json1.keySet().asScala
-//    val keys2 = json2.keySet().asScala
-//
-//    // 找出不同值的键和对应不同的值
-//    keys1.foreach { key =>
-//      if (keys2.contains(key)) {
-//        val value1 = json1.get(key)
-//        val value2 = json2.get(key)
-//        if (value1 != value2) {
-//          differentValues(key) = (value1, value2)
-//        }
-//      } else {
-//        missingInJson2.add(key)
-//      }
-//    }
-//
-//    // 找出 json1 中缺少的键
-//    keys2.foreach { key =>
-//      if (!keys1.contains(key)) {
-//        missingInJson1.add(key)
-//      }
-//    }
-//    val differentValuesStr = differentValues.map { case (key, (value1, value2)) =>
-//      s"$key: $value1 vs $value2"
-//    }.mkString(", ")
-//
-//    val missingInJson1Str = missingInJson1.mkString(", ")
-//    val missingInJson2Str = missingInJson2.mkString(", ")
-//
-//    val res = s"logkey: $logkey\t不同值: $differentValuesStr\tjson1 缺少的键: $missingInJson1Str\tjson2 缺少的键: $missingInJson2Str"
-//    res
-//  }
+  //  def calculateFeatureDiff(map1: JSONObject, map2: JSONObject): (Map[String, Double], Double) = {
+  //    val keys = map1.keySet().asScala.toSet ++ map2.keySet().asScala.toSet
+  //    var totalDiff = 0.0
+  //    var validCount = 0
+  //    val diffRates = collection.mutable.Map[String, Double]()
+  //
+  //    keys.foreach { key =>
+  //      if (map1.containsKey(key) && map2.containsKey(key)) {
+  //        val value1 = map1.get(key)
+  //        val value2 = map2.get(key)
+  //        (value1, value2) match {
+  //          case (num1: java.lang.Number, num2: java.lang.Number) =>
+  //            val diff = math.abs(num1.doubleValue() - num2.doubleValue())
+  //            val diffRate = if (num1.doubleValue() != 0) diff / num1.doubleValue() else diff
+  //            diffRates(key) = diffRate
+  //            totalDiff += diff
+  //            validCount += 1
+  //          case _ =>
+  //        }
+  //      }
+  //    }
+  //
+  //    val averageDiff = if (validCount > 0) totalDiff / validCount else 0.0
+  //    (diffRates.toMap, averageDiff)
+  //  }
+
+  //  def compareJSONObjects(json1: JSONObject, json2: JSONObject, logkey: String): String = {
+  //    // 存储不同值的键和对应不同的值
+  //    val differentValues = collection.mutable.Map[String, (Any, Any)]()
+  //    // 存储 json1 中缺少的键
+  //    val missingInJson1 = collection.mutable.Set[String]()
+  //    // 存储 json2 中缺少的键
+  //    val missingInJson2 = collection.mutable.Set[String]()
+  //
+  //    // 获取 json1 和 json2 的所有键
+  //    val keys1 = json1.keySet().asScala
+  //    val keys2 = json2.keySet().asScala
+  //
+  //    // 找出不同值的键和对应不同的值
+  //    keys1.foreach { key =>
+  //      if (keys2.contains(key)) {
+  //        val value1 = json1.get(key)
+  //        val value2 = json2.get(key)
+  //        if (value1 != value2) {
+  //          differentValues(key) = (value1, value2)
+  //        }
+  //      } else {
+  //        missingInJson2.add(key)
+  //      }
+  //    }
+  //
+  //    // 找出 json1 中缺少的键
+  //    keys2.foreach { key =>
+  //      if (!keys1.contains(key)) {
+  //        missingInJson1.add(key)
+  //      }
+  //    }
+  //    val differentValuesStr = differentValues.map { case (key, (value1, value2)) =>
+  //      s"$key: $value1 vs $value2"
+  //    }.mkString(", ")
+  //
+  //    val missingInJson1Str = missingInJson1.mkString(", ")
+  //    val missingInJson2Str = missingInJson2.mkString(", ")
+  //
+  //    val res = s"logkey: $logkey\t不同值: $differentValuesStr\tjson1 缺少的键: $missingInJson1Str\tjson2 缺少的键: $missingInJson2Str"
+  //    res
+  //  }
 
 
   //  def func(record: Record, schema: TableSchema): Map[String, String] = {