Forráskód Böngészése

Update makedata_ad_33_bucketDataFromOriginToHive_20250228: add filterAdverIds

StrayWarrior 3 hete
szülő
commit
4a0444213f

+ 5 - 0
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_33_bucketDataFromOriginToHive_20250228.scala

@@ -39,6 +39,7 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250228 {
     val filterHours = param.getOrElse("filterHours", "00,01,02,03,04,05,06,07").split(",").toSet
     val idDefaultValue = param.getOrElse("idDefaultValue", "1.0").toDouble
     val filterNames = param.getOrElse("filterNames", "").split(",").filter(_.nonEmpty).toSet
+    val filterAdverIds = param.getOrElse("filterAdverIds", "").split(",").filter(_.nonEmpty).toSet
     val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")
     val negSampleRate = param.getOrElse("negSampleRate", "1").toDouble
 
@@ -100,6 +101,10 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250228 {
               val appType = record.getString("apptype")
               !Set("12", "13").contains(appType)
             })
+            .filter(record => {
+              val adverId = record.getString("adverid")
+              !filterAdverIds.contains(adverId)
+            })
             .filter(record => {
               val label = record.getString(whatLabel).toInt
               label > 0 || Random.nextDouble() < negSampleRate