Explorar el Código

feat:修改20240728分桶脚本

zhaohaipeng hace 9 meses
padre
commit
1990f03007

+ 2 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/makedata_ad_33_bucketData_20240729.scala

@@ -33,6 +33,7 @@ object makedata_ad_33_bucketData_20240729 {
     val repartition = param.getOrElse("repartition", "100").toInt
     val filterNames = param.getOrElse("filterNames", "").split(",").toSet
     val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")
+    val cidCountThreshold = param.getOrElse("cidCountThreshold", "20000").toInt
 
     val loader = getClass.getClassLoader
 
@@ -86,7 +87,7 @@ object makedata_ad_33_bucketData_20240729 {
             }
             val count = cidCountMap.getOrElse(key, 0) + 1
             cidCountMap.put(key, count)
-            count > 20000
+            count < cidCountThreshold
         }.map{
           case (logKey, labelKey, features) =>
             val label = JSON.parseObject(labelKey).getOrDefault(whatLabel, "0").toString