Sfoglia il codice sorgente

Update makedata_ad_33_bucketDataFromOriginToHive_20250228

StrayWarrior 1 mese fa
parent
commit
01c97592c6

+ 6 - 3
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_33_bucketDataFromOriginToHive_20250228.scala

@@ -91,6 +91,10 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250228 {
             .filter(record => {
               AdUtil.isApi(record)
             })
+            .filter(record => {
+              val label = record.getString(whatLabel).toInt
+              label > 1 || Random.nextDouble() < negSampleRate
+            })
             .map(record => {
               val ts = record.getString("ts").toInt
               val cid = record.getString("cid")
@@ -523,9 +527,8 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250228 {
             })
             resultMap += ("has_conversion" -> label)
             resultMap += ("logkey" -> logKey)
-            (label.toInt, resultMap, Random.nextDouble())
-        }.filter(r => r._3 < negSampleRate || r._1 > 0)
-        .map(r => r._2).coalesce(128)
+            resultMap
+        }.coalesce(128)
 
       val partition = s"dt=$dt"
       odpsOps.saveToTable(project, outputTable, partition, recordRdd, write, defaultCreate = true, overwrite = true)