Forráskód Böngészése

去掉重复数据

jch 4 napja
szülő
commit
ba55149e91

+ 7 - 3
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_33_bucketDataFromOriginToHive_20250522.scala

@@ -694,9 +694,13 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250522 {
             resultMap += ("logkey" -> logKey)
             resultMap
         }
-        .map(featMap=>{
-          val logkey = featMap.getOrElse("logkey", "")
-          (logkey, featMap)
+        .map(featMap => {
+          val apptype = featMap.getOrElse("apptype", "")
+          val mid = featMap.getOrElse("mid", "")
+          val cid = featMap.getOrElse("cid", "")
+          val pqtid = featMap.getOrElse("pqtid", "")
+          val uniqKey = (apptype, mid, cid, pqtid).productIterator.mkString(",")
+          (uniqKey, featMap)
         })
         .reduceByKey((a, b) => a)
         .map(_._2)