| 
					
				 | 
			
			
				@@ -1,4 +1,4 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-package com.aliyun.odps.spark.zhp 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+package com.aliyun.odps.spark.zhp.makedata_ad 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import com.alibaba.fastjson.JSON 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import com.aliyun.odps.spark.examples.myUtils.{MyDateUtils, MyHdfsUtils, ParamUtils} 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -24,7 +24,7 @@ object makedata_ad_33_bucketData_20240622 { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     val loader = getClass.getClassLoader 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    val resourceUrlBucket = loader.getResource("20240622_ad_bucket_249.txt") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val resourceUrlBucket = loader.getResource("20240624_ad_bucket_249.txt") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     val buckets = 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       if (resourceUrlBucket != null) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         val buckets = Source.fromURL(resourceUrlBucket).getLines().mkString("\n") 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -51,6 +51,7 @@ object makedata_ad_33_bucketData_20240622 { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     val beginStr = param.getOrElse("beginStr", "20240620") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     val endStr = param.getOrElse("endStr", "20240620") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     val repartition = param.getOrElse("repartition", "200").toInt 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val filterNames = param.getOrElse("filterNames", "").split(",").toSet 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     val dateRange = MyDateUtils.getDateRange(beginStr, endStr) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     for (date <- dateRange) { 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -84,16 +85,24 @@ object makedata_ad_33_bucketData_20240622 { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             case (label, features) => 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				               val featuresBucket = features.map{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 case (name, score) => 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                  if (score > 1E-8) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    if (bucketsMap.contains(name)){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                      val (_, buckets) = bucketsMap(name) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                      val scoreNew = 1.0 / (buckets.length + 1) * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                      name + ":" + scoreNew.toString 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    }else{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                      name + ":" + score.toString 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                  } else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                  var ifFilter = false 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                  if (filterNames.nonEmpty){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    filterNames.foreach(r=> if (!ifFilter && name.startsWith(r)) {ifFilter = true} ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                  if (ifFilter){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     "" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                  }else{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    if (score > 1E-8) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                      if (bucketsMap.contains(name)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        val (_, buckets) = bucketsMap(name) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        val scoreNew = 1.0 / (buckets.length + 1) * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        name + ":" + scoreNew.toString 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                      } else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        name + ":" + score.toString 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                      } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    } else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                      "" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                   } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				               }.filter(_.nonEmpty) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				               result.add(label + "\t" + featuresBucket.mkString("\t")) 
			 |