Browse Source

Update makedata_ad_33_bucketDataFromOriginToHive_20250228: fix order of filters

StrayWarrior 1 month ago
parent
commit
762458037c

+ 4 - 5
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_33_bucketDataFromOriginToHive_20250228.scala

@@ -91,6 +91,10 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250228 {
             .filter(record => {
               AdUtil.isApi(record)
             })
+            .filter(record => {
+              val appType = record.getString("apptype")
+              !Set("12", "13").contains(appType)
+            })
             .filter(record => {
               val label = record.getString(whatLabel).toInt
               label > 0 || Random.nextDouble() < negSampleRate
@@ -498,11 +502,6 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250228 {
             }
           })
           (logKey, labelKey, denseFeatures, sparseFeatures)
-        }.filter {
-          case (logKey, labelKey, denseFeatures, sparseFeatures) =>
-            val logKeyList = logKey.split(",")
-            val apptype = logKeyList(0)
-            !Set("12", "13").contains(apptype)
         }
         .map {
           case (logKey, labelKey, denseFeatures, sparseFeatures) =>