Quellcode durchsuchen

增加diff校验

xueyiming vor 1 Monat
Ursprung
Commit
8e37a09a73

+ 14 - 14
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/diff_data_20250319.scala

@@ -66,20 +66,20 @@ object diff_data_20250319 {
     //    result = result :+ "alg_recsys_ad_sample_all size =" + odpsData2.count();
 
     // 以 logkey 为键进行转换
-    //    val rdd1Pairs: RDD[(String, JSONObject)] = odpsData1.map(map => (map.getString("logkey"), map))
-    //    val rdd2Pairs: RDD[(String, JSONObject)] = odpsData2.map(map => (map.getString("logkey"), map))
-    //
-    //
-    //    // 进行 join 操作
-    //    val joinedRDD: RDD[(String, (JSONObject, JSONObject))] = rdd1Pairs.join(rdd2Pairs)
-    //
-    //    val firstElement = joinedRDD.first()
-    //    firstElement match {
-    //      case (logkey, (map1, map2)) =>
-    //        println(logkey)
-    //        println(map1)
-    //        println(map2)
-    //    }
+    val rdd1Pairs: RDD[(String, JSONObject)] = odpsData1.map(map => (map.getString("logkey"), map))
+    val rdd2Pairs: RDD[(String, JSONObject)] = odpsData2.map(map => (map.getString("logkey"), map))
+
+
+    // 进行 join 操作
+    val joinedRDD: RDD[(String, (JSONObject, JSONObject))] = rdd1Pairs.join(rdd2Pairs)
+
+    val firstElement = joinedRDD.first()
+    firstElement match {
+      case (logkey, (map1, map2)) =>
+        println(logkey)
+        println(map1)
+        println(map2)
+    }
 
     // 比较相同 logkey 对应的 Map 中相同键的 value
     //    joinedRDD.foreach { case (logkey, (map1, map2)) =>