|
@@ -34,6 +34,23 @@ object diff_data_20250319 {
|
|
|
transfer = func,
|
|
|
numPartition = 64)
|
|
|
|
|
|
+
|
|
|
+ val rdd1 = odpsData1.first()
|
|
|
+ val rdd2 = odpsData2.first()
|
|
|
+
|
|
|
+ println("rdd1")
|
|
|
+ rdd1.foreach {
|
|
|
+ case (key, value) =>
|
|
|
+ println(key + ":" + value)
|
|
|
+ }
|
|
|
+
|
|
|
+ println("rdd2")
|
|
|
+ rdd2.foreach {
|
|
|
+ case (key, value) =>
|
|
|
+ println(key + ":" + value)
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
var result: List[String] = List.empty
|
|
|
|
|
|
result = result :+ "ad_easyrec_eval_data_v3_sampled size =" + odpsData1.count();
|
|
@@ -45,7 +62,6 @@ object diff_data_20250319 {
|
|
|
val rdd2Pairs: RDD[(String, Map[String, String])] = odpsData2.map(map => ((map("apptype"), map("mid"), map("cid"), map("ts"), map("headvideoid")).productIterator.mkString(","), map))
|
|
|
|
|
|
|
|
|
-
|
|
|
|
|
|
val joinedRDD: RDD[(String, (Map[String, String], Map[String, String]))] = rdd1Pairs.join(rdd2Pairs)
|
|
|
|
|
@@ -58,47 +74,47 @@ object diff_data_20250319 {
|
|
|
}
|
|
|
|
|
|
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
|
|
|
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
|
|
|
|
|
|
}
|