Browse Source

mask部分稀疏特征

xueyiming 6 days ago
parent
commit
86cbc74be2

+ 2 - 3
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_33_bucketDataFromOriginToHive_20250522.scala

@@ -45,8 +45,7 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250522 {
     val negSampleRate = param.getOrElse("negSampleRate", "1").toDouble
     // 分割样本集的比例,splitRate部分输出至outputTable,补集输出至outputTable2(如果outputTable2不为空)
     val splitRate = param.getOrElse("splitRate", "0.9").toDouble
-    val maskFeature = param.getOrElse("maskFeature", "1").toInt
-    val maskFeatureRate = param.getOrElse("maskFeatureRate", "0.0005").toDouble
+    val maskFeatureRate = param.getOrElse("maskFeatureRate", "0.0").toDouble
 
 
     val loader = getClass.getClassLoader
@@ -224,7 +223,7 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250522 {
                 featureMap.put("abcode_" + extend.getString("abcode"), idDefaultValue)
               }
 
-              if (maskFeature > 0 && Random.nextDouble() < maskFeatureRate) {
+              if (Random.nextDouble() < maskFeatureRate) {
                 featureMap.put("cid", "")
                 featureMap.put("adid", "")
                 featureMap.put("adverid", "")