|
@@ -45,6 +45,7 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250522 {
|
|
val negSampleRate = param.getOrElse("negSampleRate", "1").toDouble
|
|
val negSampleRate = param.getOrElse("negSampleRate", "1").toDouble
|
|
// 分割样本集的比例,splitRate部分输出至outputTable,补集输出至outputTable2(如果outputTable2不为空)
|
|
// 分割样本集的比例,splitRate部分输出至outputTable,补集输出至outputTable2(如果outputTable2不为空)
|
|
val splitRate = param.getOrElse("splitRate", "0.9").toDouble
|
|
val splitRate = param.getOrElse("splitRate", "0.9").toDouble
|
|
|
|
+ val maskFeatureRate = param.getOrElse("maskFeatureRate", "0.0").toDouble
|
|
|
|
|
|
val loader = getClass.getClassLoader
|
|
val loader = getClass.getClassLoader
|
|
val resourceUrlBucket = loader.getResource("20250217_ad_bucket_688.txt")
|
|
val resourceUrlBucket = loader.getResource("20250217_ad_bucket_688.txt")
|
|
@@ -584,6 +585,13 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250522 {
|
|
}
|
|
}
|
|
featureMap.put("vid", reqFeature.getString("vid"))
|
|
featureMap.put("vid", reqFeature.getString("vid"))
|
|
|
|
|
|
|
|
+ // 随机mask部分特征供模型训练
|
|
|
|
+ if (Random.nextDouble() < maskFeatureRate) {
|
|
|
|
+ featureMap.put("cid", "")
|
|
|
|
+ featureMap.put("adid", "")
|
|
|
|
+ featureMap.put("adverid", "")
|
|
|
|
+ }
|
|
|
|
+
|
|
/*
|
|
/*
|
|
广告
|
|
广告
|
|
sparse:cid adid adverid targeting_conversion
|
|
sparse:cid adid adverid targeting_conversion
|