|
@@ -100,21 +100,33 @@ object diff_data_20250319 {
|
|
|
if (map1.containsKey(key) && map2.containsKey(key)) {
|
|
|
val value1 = map1.get(key)
|
|
|
val value2 = map2.get(key)
|
|
|
- (value1, value2) match {
|
|
|
- case (num1: java.lang.Number, num2: java.lang.Number) =>
|
|
|
+
|
|
|
+ def tryToNumber(value: Any): Option[java.lang.Number] = {
|
|
|
+ value match {
|
|
|
+ case num: java.lang.Number => Some(num)
|
|
|
+ case str: String =>
|
|
|
+ try {
|
|
|
+ Some(str.toDouble)
|
|
|
+ } catch {
|
|
|
+ case _: NumberFormatException => None
|
|
|
+ }
|
|
|
+ case _ => None
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ (tryToNumber(value1), tryToNumber(value2)) match {
|
|
|
+ case (Some(num1), Some(num2)) =>
|
|
|
val diff = math.abs(num1.doubleValue() - num2.doubleValue())
|
|
|
if (diff > 0) {
|
|
|
featureDiffSum(key) = featureDiffSum.getOrElse(key, 0.0) + diff
|
|
|
featureCount(key) = featureCount.getOrElse(key, 0) + 1
|
|
|
}
|
|
|
- // val diffRate = if (num1.doubleValue() != 0) diff / num1.doubleValue() else diff
|
|
|
- // currentDiffRates(key) = diffRate
|
|
|
-
|
|
|
- case (num1: java.lang.String, num2: java.lang.String) =>
|
|
|
- if (num1 != num2) {
|
|
|
+ case _ =>
|
|
|
+ val str1 = value1.toString
|
|
|
+ val str2 = value2.toString
|
|
|
+ if (str1 != str2) {
|
|
|
featureCount(key) = featureCount.getOrElse(key, 0) + 1
|
|
|
}
|
|
|
- case _ =>
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -201,73 +213,73 @@ object diff_data_20250319 {
|
|
|
// val rdd = sc.parallelize(result)
|
|
|
|
|
|
|
|
|
-// def calculateFeatureDiff(map1: JSONObject, map2: JSONObject): (Map[String, Double], Double) = {
|
|
|
-// val keys = map1.keySet().asScala.toSet ++ map2.keySet().asScala.toSet
|
|
|
-// var totalDiff = 0.0
|
|
|
-// var validCount = 0
|
|
|
-// val diffRates = collection.mutable.Map[String, Double]()
|
|
|
-//
|
|
|
-// keys.foreach { key =>
|
|
|
-// if (map1.containsKey(key) && map2.containsKey(key)) {
|
|
|
-// val value1 = map1.get(key)
|
|
|
-// val value2 = map2.get(key)
|
|
|
-// (value1, value2) match {
|
|
|
-// case (num1: java.lang.Number, num2: java.lang.Number) =>
|
|
|
-// val diff = math.abs(num1.doubleValue() - num2.doubleValue())
|
|
|
-// val diffRate = if (num1.doubleValue() != 0) diff / num1.doubleValue() else diff
|
|
|
-// diffRates(key) = diffRate
|
|
|
-// totalDiff += diff
|
|
|
-// validCount += 1
|
|
|
-// case _ =>
|
|
|
-// }
|
|
|
-// }
|
|
|
-// }
|
|
|
-//
|
|
|
-// val averageDiff = if (validCount > 0) totalDiff / validCount else 0.0
|
|
|
-// (diffRates.toMap, averageDiff)
|
|
|
-// }
|
|
|
-
|
|
|
-// def compareJSONObjects(json1: JSONObject, json2: JSONObject, logkey: String): String = {
|
|
|
-// // 存储不同值的键和对应不同的值
|
|
|
-// val differentValues = collection.mutable.Map[String, (Any, Any)]()
|
|
|
-// // 存储 json1 中缺少的键
|
|
|
-// val missingInJson1 = collection.mutable.Set[String]()
|
|
|
-// // 存储 json2 中缺少的键
|
|
|
-// val missingInJson2 = collection.mutable.Set[String]()
|
|
|
-//
|
|
|
-// // 获取 json1 和 json2 的所有键
|
|
|
-// val keys1 = json1.keySet().asScala
|
|
|
-// val keys2 = json2.keySet().asScala
|
|
|
-//
|
|
|
-// // 找出不同值的键和对应不同的值
|
|
|
-// keys1.foreach { key =>
|
|
|
-// if (keys2.contains(key)) {
|
|
|
-// val value1 = json1.get(key)
|
|
|
-// val value2 = json2.get(key)
|
|
|
-// if (value1 != value2) {
|
|
|
-// differentValues(key) = (value1, value2)
|
|
|
-// }
|
|
|
-// } else {
|
|
|
-// missingInJson2.add(key)
|
|
|
-// }
|
|
|
-// }
|
|
|
-//
|
|
|
-// // 找出 json1 中缺少的键
|
|
|
-// keys2.foreach { key =>
|
|
|
-// if (!keys1.contains(key)) {
|
|
|
-// missingInJson1.add(key)
|
|
|
-// }
|
|
|
-// }
|
|
|
-// val differentValuesStr = differentValues.map { case (key, (value1, value2)) =>
|
|
|
-// s"$key: $value1 vs $value2"
|
|
|
-// }.mkString(", ")
|
|
|
-//
|
|
|
-// val missingInJson1Str = missingInJson1.mkString(", ")
|
|
|
-// val missingInJson2Str = missingInJson2.mkString(", ")
|
|
|
-//
|
|
|
-// val res = s"logkey: $logkey\t不同值: $differentValuesStr\tjson1 缺少的键: $missingInJson1Str\tjson2 缺少的键: $missingInJson2Str"
|
|
|
-// res
|
|
|
-// }
|
|
|
+ // def calculateFeatureDiff(map1: JSONObject, map2: JSONObject): (Map[String, Double], Double) = {
|
|
|
+ // val keys = map1.keySet().asScala.toSet ++ map2.keySet().asScala.toSet
|
|
|
+ // var totalDiff = 0.0
|
|
|
+ // var validCount = 0
|
|
|
+ // val diffRates = collection.mutable.Map[String, Double]()
|
|
|
+ //
|
|
|
+ // keys.foreach { key =>
|
|
|
+ // if (map1.containsKey(key) && map2.containsKey(key)) {
|
|
|
+ // val value1 = map1.get(key)
|
|
|
+ // val value2 = map2.get(key)
|
|
|
+ // (value1, value2) match {
|
|
|
+ // case (num1: java.lang.Number, num2: java.lang.Number) =>
|
|
|
+ // val diff = math.abs(num1.doubleValue() - num2.doubleValue())
|
|
|
+ // val diffRate = if (num1.doubleValue() != 0) diff / num1.doubleValue() else diff
|
|
|
+ // diffRates(key) = diffRate
|
|
|
+ // totalDiff += diff
|
|
|
+ // validCount += 1
|
|
|
+ // case _ =>
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ //
|
|
|
+ // val averageDiff = if (validCount > 0) totalDiff / validCount else 0.0
|
|
|
+ // (diffRates.toMap, averageDiff)
|
|
|
+ // }
|
|
|
+
|
|
|
+ // def compareJSONObjects(json1: JSONObject, json2: JSONObject, logkey: String): String = {
|
|
|
+ // // 存储不同值的键和对应不同的值
|
|
|
+ // val differentValues = collection.mutable.Map[String, (Any, Any)]()
|
|
|
+ // // 存储 json1 中缺少的键
|
|
|
+ // val missingInJson1 = collection.mutable.Set[String]()
|
|
|
+ // // 存储 json2 中缺少的键
|
|
|
+ // val missingInJson2 = collection.mutable.Set[String]()
|
|
|
+ //
|
|
|
+ // // 获取 json1 和 json2 的所有键
|
|
|
+ // val keys1 = json1.keySet().asScala
|
|
|
+ // val keys2 = json2.keySet().asScala
|
|
|
+ //
|
|
|
+ // // 找出不同值的键和对应不同的值
|
|
|
+ // keys1.foreach { key =>
|
|
|
+ // if (keys2.contains(key)) {
|
|
|
+ // val value1 = json1.get(key)
|
|
|
+ // val value2 = json2.get(key)
|
|
|
+ // if (value1 != value2) {
|
|
|
+ // differentValues(key) = (value1, value2)
|
|
|
+ // }
|
|
|
+ // } else {
|
|
|
+ // missingInJson2.add(key)
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ //
|
|
|
+ // // 找出 json1 中缺少的键
|
|
|
+ // keys2.foreach { key =>
|
|
|
+ // if (!keys1.contains(key)) {
|
|
|
+ // missingInJson1.add(key)
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // val differentValuesStr = differentValues.map { case (key, (value1, value2)) =>
|
|
|
+ // s"$key: $value1 vs $value2"
|
|
|
+ // }.mkString(", ")
|
|
|
+ //
|
|
|
+ // val missingInJson1Str = missingInJson1.mkString(", ")
|
|
|
+ // val missingInJson2Str = missingInJson2.mkString(", ")
|
|
|
+ //
|
|
|
+ // val res = s"logkey: $logkey\t不同值: $differentValuesStr\tjson1 缺少的键: $missingInJson1Str\tjson2 缺少的键: $missingInJson2Str"
|
|
|
+ // res
|
|
|
+ // }
|
|
|
|
|
|
|
|
|
// def func(record: Record, schema: TableSchema): Map[String, String] = {
|