|
@@ -45,6 +45,9 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250522 {
|
|
|
val negSampleRate = param.getOrElse("negSampleRate", "1").toDouble
|
|
|
// 分割样本集的比例,splitRate部分输出至outputTable,补集输出至outputTable2(如果outputTable2不为空)
|
|
|
val splitRate = param.getOrElse("splitRate", "0.9").toDouble
|
|
|
+ val maskFeature = param.getOrElse("maskFeature", "1").toInt
|
|
|
+ val maskFeatureRate = param.getOrElse("maskFeatureRate", "0.0005").toDouble
|
|
|
+
|
|
|
|
|
|
val loader = getClass.getClassLoader
|
|
|
val resourceUrlBucket = loader.getResource("20250217_ad_bucket_688.txt")
|
|
@@ -221,16 +224,22 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250522 {
|
|
|
featureMap.put("abcode_" + extend.getString("abcode"), idDefaultValue)
|
|
|
}
|
|
|
|
|
|
- if (reqFeature.containsKey("cid") && reqFeature.getString("cid").nonEmpty) {
|
|
|
- featureMap.put("cid", reqFeature.getString("cid"))
|
|
|
- }
|
|
|
+ if (maskFeature > 0 && Random.nextDouble() < maskFeatureRate) {
|
|
|
+ featureMap.put("cid", "")
|
|
|
+ featureMap.put("adid", "")
|
|
|
+ featureMap.put("adverid", "")
|
|
|
+ } else {
|
|
|
+ if (reqFeature.containsKey("cid") && reqFeature.getString("cid").nonEmpty) {
|
|
|
+ featureMap.put("cid", reqFeature.getString("cid"))
|
|
|
+ }
|
|
|
|
|
|
- if (reqFeature.containsKey("adid") && reqFeature.getString("adid").nonEmpty) {
|
|
|
- featureMap.put("adid", reqFeature.getString("adid"))
|
|
|
- }
|
|
|
+ if (reqFeature.containsKey("adid") && reqFeature.getString("adid").nonEmpty) {
|
|
|
+ featureMap.put("adid", reqFeature.getString("adid"))
|
|
|
+ }
|
|
|
|
|
|
- if (reqFeature.containsKey("adverid") && reqFeature.getString("adverid").nonEmpty) {
|
|
|
- featureMap.put("adverid", reqFeature.getString("adverid"))
|
|
|
+ if (reqFeature.containsKey("adverid") && reqFeature.getString("adverid").nonEmpty) {
|
|
|
+ featureMap.put("adverid", reqFeature.getString("adverid"))
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
if (reqFeature.containsKey("profession") && reqFeature.getString("profession").nonEmpty) {
|