|
@@ -39,6 +39,7 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250228 {
|
|
|
val filterHours = param.getOrElse("filterHours", "00,01,02,03,04,05,06,07").split(",").toSet
|
|
|
val idDefaultValue = param.getOrElse("idDefaultValue", "1.0").toDouble
|
|
|
val filterNames = param.getOrElse("filterNames", "").split(",").filter(_.nonEmpty).toSet
|
|
|
+ val filterAdverIds = param.getOrElse("filterAdverIds", "").split(",").filter(_.nonEmpty).toSet
|
|
|
val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")
|
|
|
val negSampleRate = param.getOrElse("negSampleRate", "1").toDouble
|
|
|
|
|
@@ -100,6 +101,10 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250228 {
|
|
|
val appType = record.getString("apptype")
|
|
|
!Set("12", "13").contains(appType)
|
|
|
})
|
|
|
+ .filter(record => {
|
|
|
+ val adverId = record.getString("adverid")
|
|
|
+ !filterAdverIds.contains(adverId)
|
|
|
+ })
|
|
|
.filter(record => {
|
|
|
val label = record.getString(whatLabel).toInt
|
|
|
label > 0 || Random.nextDouble() < negSampleRate
|