| 
					
				 | 
			
			
				@@ -21,8 +21,18 @@ object makedata_ad_32_bucket_20240718 { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       .getOrCreate() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     val sc = spark.sparkContext 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 1 读取参数 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val param = ParamUtils.parseArgs(args) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val readPath = param.getOrElse("readPath", "/dw/recommend/model/31_ad_sample_data/20240620*") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/32_bucket_data/") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val fileName = param.getOrElse("fileName", "20240620_100") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val sampleRate = param.getOrElse("sampleRate", "1.0").toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val bucketNum = param.getOrElse("bucketNum", "100").toInt 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val featureNameFile = param.getOrElse("featureNameFile", "20240718_ad_feature_name.txt"); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     val loader = getClass.getClassLoader 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    val resourceUrl = loader.getResource("20240703_ad_feature_name.txt") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val resourceUrl = loader.getResource(featureNameFile) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     val content = 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       if (resourceUrl != null) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         val content = Source.fromURL(resourceUrl).getLines().mkString("\n") 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -38,14 +48,6 @@ object makedata_ad_32_bucket_20240718 { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    // 1 读取参数 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    val param = ParamUtils.parseArgs(args) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    val readPath = param.getOrElse("readPath", "/dw/recommend/model/31_ad_sample_data/20240620*") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    val savePath = param.getOrElse("savePath", "/dw/recommend/model/32_bucket_data/") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    val fileName = param.getOrElse("fileName", "20240620_100") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    val sampleRate = param.getOrElse("sampleRate", "1.0").toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    val bucketNum = param.getOrElse("bucketNum", "100").toInt 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     val data = sc.textFile(readPath) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     println("问题数据数量:" + data.filter(r=>r.split("\t").length != 3).count()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     val data1 = data.map(r => { 
			 |