فهرست منبع

mask部分稀疏特征

xueyiming 13 ساعت پیش
والد
کامیت
5038ada731

+ 15 - 15
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_33_bucketDataFromOriginToHive_20250522.scala

@@ -47,7 +47,6 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250522 {
     val splitRate = param.getOrElse("splitRate", "0.9").toDouble
     val maskFeatureRate = param.getOrElse("maskFeatureRate", "0.0").toDouble
 
-
     val loader = getClass.getClassLoader
     val resourceUrlBucket = loader.getResource("20250217_ad_bucket_688.txt")
     val buckets =
@@ -223,22 +222,16 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250522 {
                 featureMap.put("abcode_" + extend.getString("abcode"), idDefaultValue)
               }
 
-              if (Random.nextDouble() < maskFeatureRate) {
-                featureMap.put("cid", "")
-                featureMap.put("adid", "")
-                featureMap.put("adverid", "")
-              } else {
-                if (reqFeature.containsKey("cid") && reqFeature.getString("cid").nonEmpty) {
-                  featureMap.put("cid", reqFeature.getString("cid"))
-                }
+              if (reqFeature.containsKey("cid") && reqFeature.getString("cid").nonEmpty) {
+                featureMap.put("cid", reqFeature.getString("cid"))
+              }
 
-                if (reqFeature.containsKey("adid") && reqFeature.getString("adid").nonEmpty) {
-                  featureMap.put("adid", reqFeature.getString("adid"))
-                }
+              if (reqFeature.containsKey("adid") && reqFeature.getString("adid").nonEmpty) {
+                featureMap.put("adid", reqFeature.getString("adid"))
+              }
 
-                if (reqFeature.containsKey("adverid") && reqFeature.getString("adverid").nonEmpty) {
-                  featureMap.put("adverid", reqFeature.getString("adverid"))
-                }
+              if (reqFeature.containsKey("adverid") && reqFeature.getString("adverid").nonEmpty) {
+                featureMap.put("adverid", reqFeature.getString("adverid"))
               }
 
               if (reqFeature.containsKey("profession") && reqFeature.getString("profession").nonEmpty) {
@@ -592,6 +585,13 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250522 {
               }
               featureMap.put("vid", reqFeature.getString("vid"))
 
+              // 随机mask部分特征供模型训练
+              if (Random.nextDouble() < maskFeatureRate) {
+                featureMap.put("cid", "")
+                featureMap.put("adid", "")
+                featureMap.put("adverid", "")
+              }
+
               /*
             广告
               sparse:cid adid adverid targeting_conversion