|  | @@ -33,12 +33,12 @@ object makedata_ad_33_bucketData_20240726 {
 | 
	
		
			
				|  |  |      val repartition = param.getOrElse("repartition", "100").toInt
 | 
	
		
			
				|  |  |      val filterNames = param.getOrElse("filterNames", "").split(",").toSet
 | 
	
		
			
				|  |  |      val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")
 | 
	
		
			
				|  |  | -    val featureNameFile = param.getOrElse("featureNameFile", "20240718_ad_feature_name.txt");
 | 
	
		
			
				|  |  | +    val featureNameFile = param.getOrElse("featureNameFile", "20240718_ad_feature_name_517.txt");
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      val loader = getClass.getClassLoader
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    val resourceUrlBucket = loader.getResource("20240718_ad_bucket_688.txt")
 | 
	
		
			
				|  |  | +    val resourceUrlBucket = loader.getResource("20240718_ad_bucket_517.txt")
 | 
	
		
			
				|  |  |      val buckets =
 | 
	
		
			
				|  |  |        if (resourceUrlBucket != null) {
 | 
	
		
			
				|  |  |          val buckets = Source.fromURL(resourceUrlBucket).getLines().mkString("\n")
 | 
	
	
		
			
				|  | @@ -87,6 +87,13 @@ object makedata_ad_33_bucketData_20240726 {
 | 
	
		
			
				|  |  |            jsons.foreach(r => {
 | 
	
		
			
				|  |  |              features.put(r._1, jsons.getDoubleValue(r._1))
 | 
	
		
			
				|  |  |            })
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +          for (name <- contentList) {
 | 
	
		
			
				|  |  | +            if (!features.contains(name)) {
 | 
	
		
			
				|  |  | +              features.put(name, 0)
 | 
	
		
			
				|  |  | +            }
 | 
	
		
			
				|  |  | +          }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |            (logKey, labelKey, features)
 | 
	
		
			
				|  |  |          })
 | 
	
		
			
				|  |  |          .filter {
 | 
	
	
		
			
				|  | @@ -106,39 +113,32 @@ object makedata_ad_33_bucketData_20240726 {
 | 
	
		
			
				|  |  |            val bucketsMap = bucketsMap_br.value
 | 
	
		
			
				|  |  |            row.foreach {
 | 
	
		
			
				|  |  |              case (label, features) =>
 | 
	
		
			
				|  |  | -              val featuresBucket = new ArrayBuffer[String]()
 | 
	
		
			
				|  |  | -              for (name <- contentList) {
 | 
	
		
			
				|  |  | -                var ifFilter = false
 | 
	
		
			
				|  |  | -                if (filterNames.nonEmpty) {
 | 
	
		
			
				|  |  | -                  filterNames.foreach(r => if (!ifFilter && name.contains(r)) {
 | 
	
		
			
				|  |  | -                    ifFilter = true
 | 
	
		
			
				|  |  | -                  })
 | 
	
		
			
				|  |  | -                }
 | 
	
		
			
				|  |  | -                if (!ifFilter) {
 | 
	
		
			
				|  |  | -                  if (features.contains(name)) {
 | 
	
		
			
				|  |  | -                    val score = features(name)
 | 
	
		
			
				|  |  | +              val featuresBucket = features.map {
 | 
	
		
			
				|  |  | +                case (name, score) =>
 | 
	
		
			
				|  |  | +                  var ifFilter = false
 | 
	
		
			
				|  |  | +                  if (filterNames.nonEmpty) {
 | 
	
		
			
				|  |  | +                    filterNames.foreach(r => if (!ifFilter && name.contains(r)) {
 | 
	
		
			
				|  |  | +                      ifFilter = true
 | 
	
		
			
				|  |  | +                    })
 | 
	
		
			
				|  |  | +                  }
 | 
	
		
			
				|  |  | +                  if (ifFilter) {
 | 
	
		
			
				|  |  | +                    ""
 | 
	
		
			
				|  |  | +                  } else {
 | 
	
		
			
				|  |  |                      if (score > 1E-8) {
 | 
	
		
			
				|  |  |                        if (bucketsMap.contains(name)) {
 | 
	
		
			
				|  |  |                          val (bucketsNum, buckets) = bucketsMap(name)
 | 
	
		
			
				|  |  | -                        val scoreNew = 0.01 + 1.0 / bucketsNum * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0)
 | 
	
		
			
				|  |  | -                        featuresBucket.add(name + ":" + scoreNew.toString)
 | 
	
		
			
				|  |  | +                        val scoreNew = 0.01 + (1.0 / bucketsNum * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0))
 | 
	
		
			
				|  |  | +                        name + ":" + scoreNew.toString
 | 
	
		
			
				|  |  |                        } else {
 | 
	
		
			
				|  |  | -                        featuresBucket.add(name + ":" + score.toString)
 | 
	
		
			
				|  |  | +                        name + ":" + score.toString
 | 
	
		
			
				|  |  |                        }
 | 
	
		
			
				|  |  |                      } else {
 | 
	
		
			
				|  |  | -                      featuresBucket.add(name + ":" + "0.01")
 | 
	
		
			
				|  |  | +                      name + ":" + "0.01"
 | 
	
		
			
				|  |  |                      }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -                  } else {
 | 
	
		
			
				|  |  | -                    featuresBucket.add(name + ":" + "0.01")
 | 
	
		
			
				|  |  |                    }
 | 
	
		
			
				|  |  | -                }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -              }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | +              }.filter(_.nonEmpty)
 | 
	
		
			
				|  |  |                result.add(label + "\t" + featuresBucket.mkString("\t"))
 | 
	
		
			
				|  |  |            }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |            result.iterator
 | 
	
		
			
				|  |  |          })
 | 
	
		
			
				|  |  |  
 |