|
@@ -33,6 +33,7 @@ object makedata_ad_33_bucketData_20240729 {
|
|
|
val repartition = param.getOrElse("repartition", "100").toInt
|
|
|
val filterNames = param.getOrElse("filterNames", "").split(",").toSet
|
|
|
val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")
|
|
|
+ val cidCountThreshold = param.getOrElse("cidCountThreshold", "20000").toInt
|
|
|
|
|
|
val loader = getClass.getClassLoader
|
|
|
|
|
@@ -86,7 +87,7 @@ object makedata_ad_33_bucketData_20240729 {
|
|
|
}
|
|
|
val count = cidCountMap.getOrElse(key, 0) + 1
|
|
|
cidCountMap.put(key, count)
|
|
|
- count > 20000
|
|
|
+ count < cidCountThreshold
|
|
|
}.map{
|
|
|
case (logKey, labelKey, features) =>
|
|
|
val label = JSON.parseObject(labelKey).getOrDefault(whatLabel, "0").toString
|