|
@@ -50,35 +50,36 @@ object diff_data_20250319 {
|
|
|
|
|
|
val rdd1: JSONObject = odpsData1.first()
|
|
val rdd1: JSONObject = odpsData1.first()
|
|
println("rdd1")
|
|
println("rdd1")
|
|
|
|
+ println(rdd1.getString("logkey"))
|
|
println(rdd1.toString)
|
|
println(rdd1.toString)
|
|
|
|
|
|
val rdd2: JSONObject = odpsData2.first()
|
|
val rdd2: JSONObject = odpsData2.first()
|
|
-
|
|
|
|
println("rdd2")
|
|
println("rdd2")
|
|
|
|
+ println(rdd2.getString("logkey"))
|
|
println(rdd2.toString)
|
|
println(rdd2.toString)
|
|
|
|
|
|
|
|
|
|
- var result: List[String] = List.empty
|
|
|
|
-
|
|
|
|
- result = result :+ "ad_easyrec_eval_data_v3_sampled size =" + odpsData1.count();
|
|
|
|
-
|
|
|
|
- result = result :+ "alg_recsys_ad_sample_all size =" + odpsData2.count();
|
|
|
|
|
|
+// var result: List[String] = List.empty
|
|
|
|
+//
|
|
|
|
+// result = result :+ "ad_easyrec_eval_data_v3_sampled size =" + odpsData1.count();
|
|
|
|
+//
|
|
|
|
+// result = result :+ "alg_recsys_ad_sample_all size =" + odpsData2.count();
|
|
|
|
|
|
// 以 logkey 为键进行转换
|
|
// 以 logkey 为键进行转换
|
|
- val rdd1Pairs: RDD[(String, JSONObject)] = odpsData1.map(map => (map.getString("logkey"), map))
|
|
|
|
- val rdd2Pairs: RDD[(String, JSONObject)] = odpsData2.map(map => (map.getString("logkey"), map))
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- // 进行 join 操作
|
|
|
|
- val joinedRDD: RDD[(String, (JSONObject, JSONObject))] = rdd1Pairs.join(rdd2Pairs)
|
|
|
|
-
|
|
|
|
- val firstElement = joinedRDD.first()
|
|
|
|
- firstElement match {
|
|
|
|
- case (logkey, (map1, map2)) =>
|
|
|
|
- println(logkey)
|
|
|
|
- println(map1)
|
|
|
|
- println(map2)
|
|
|
|
- }
|
|
|
|
|
|
+// val rdd1Pairs: RDD[(String, JSONObject)] = odpsData1.map(map => (map.getString("logkey"), map))
|
|
|
|
+// val rdd2Pairs: RDD[(String, JSONObject)] = odpsData2.map(map => (map.getString("logkey"), map))
|
|
|
|
+//
|
|
|
|
+//
|
|
|
|
+// // 进行 join 操作
|
|
|
|
+// val joinedRDD: RDD[(String, (JSONObject, JSONObject))] = rdd1Pairs.join(rdd2Pairs)
|
|
|
|
+//
|
|
|
|
+// val firstElement = joinedRDD.first()
|
|
|
|
+// firstElement match {
|
|
|
|
+// case (logkey, (map1, map2)) =>
|
|
|
|
+// println(logkey)
|
|
|
|
+// println(map1)
|
|
|
|
+// println(map2)
|
|
|
|
+// }
|
|
|
|
|
|
// 比较相同 logkey 对应的 Map 中相同键的 value
|
|
// 比较相同 logkey 对应的 Map 中相同键的 value
|
|
// joinedRDD.foreach { case (logkey, (map1, map2)) =>
|
|
// joinedRDD.foreach { case (logkey, (map1, map2)) =>
|