|  | @@ -9,6 +9,7 @@ import org.apache.spark.sql.SparkSession
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  import scala.collection.JavaConversions._
 | 
	
		
			
				|  |  |  import scala.io.Source
 | 
	
		
			
				|  |  | +import scala.util.Random
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  /*
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -56,8 +57,9 @@ object makedata_ad_33_bucketDataToHive_20250110 {
 | 
	
		
			
				|  |  |      val filterNames = param.getOrElse("filterNames", "").split(",").filter(_.nonEmpty).toSet
 | 
	
		
			
				|  |  |      val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")
 | 
	
		
			
				|  |  |      val project = param.getOrElse("project", "loghubods")
 | 
	
		
			
				|  |  | -    val table = param.getOrElse("table", "ad_easyrec_train_data_v2")
 | 
	
		
			
				|  |  | +    val table = param.getOrElse("table", "ad_easyrec_train_data_v2_sampled")
 | 
	
		
			
				|  |  |      val partition = param.getOrElse("partition", "dt=20250208")
 | 
	
		
			
				|  |  | +    val negSampleRate = param.getOrElse("negSampleRate", "1").toDouble
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      val dateRange = MyDateUtils.getDateRange(beginStr, endStr)
 | 
	
		
			
				|  |  |      for (date <- dateRange) {
 | 
	
	
		
			
				|  | @@ -100,8 +102,9 @@ object makedata_ad_33_bucketDataToHive_20250110 {
 | 
	
		
			
				|  |  |              }.toMap
 | 
	
		
			
				|  |  |              resultMap += ("has_conversion" -> label)
 | 
	
		
			
				|  |  |              resultMap += ("logkey" -> logKey)
 | 
	
		
			
				|  |  | -            resultMap
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | +            (label.toInt, resultMap, Random.nextDouble())
 | 
	
		
			
				|  |  | +        }.filter(r => r._3 < negSampleRate || r._1 > 0)
 | 
	
		
			
				|  |  | +        .map(r => r._2)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |        // 4 hive
 | 
	
		
			
				|  |  |        odpsOps.saveToTable(project, table, partition, list, write, defaultCreate = true, overwrite = true)
 |