Prechádzať zdrojové kódy

要在采样前过滤重复数据

jch 1 deň pred
rodič
commit
8b7023e297

+ 1 - 11
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_33_bucketDataFromOriginToHive_20250522.scala

@@ -693,17 +693,7 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250522 {
             resultMap += ("has_conversion" -> labelObject.getString("ad_is_conversion"))
             resultMap += ("logkey" -> logKey)
             resultMap
-        }
-//        .map(featMap => {
-//          val apptype = featMap.getOrElse("apptype", "")
-//          val mid = featMap.getOrElse("mid", "")
-//          val pqtid = featMap.getOrElse("pqtid", "")
-//          val uniqKey = (apptype, mid, pqtid).productIterator.mkString(",")
-//          (uniqKey, featMap)
-//        })
-//        .reduceByKey((a, b) => a)
-//        .map(_._2)
-        .coalesce(128)
+        }.coalesce(128)
 
       val partition = s"dt=$dt"
       if (outputTable2.isEmpty) {