|  | @@ -64,24 +64,28 @@ object makedata_32_bucket_20240622 {
 | 
	
		
			
				|  |  |        println("特征:" + contentList(i))
 | 
	
		
			
				|  |  |        val data2 = data1.map(r => r.getOrDefault(contentList(i), 0D)).filter(_ > 1E-8).collect().sorted
 | 
	
		
			
				|  |  |        val len = data2.length
 | 
	
		
			
				|  |  | -      val oneBucketNum = (len - 1) / (bucketNum - 1) + 1 // 确保每个桶至少有一个元素
 | 
	
		
			
				|  |  | -      val buffers = new ArrayBuffer[Double]()
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      var lastBucketValue = data2(0) // 记录上一个桶的切分点
 | 
	
		
			
				|  |  | -      for (j <- 0 until len by oneBucketNum) {
 | 
	
		
			
				|  |  | -        val d = data2(j)
 | 
	
		
			
				|  |  | -        if (j > 0 && d != lastBucketValue) {
 | 
	
		
			
				|  |  | -          // 如果当前切分点不同于上一个切分点,则保存当前切分点
 | 
	
		
			
				|  |  | -          buffers += d
 | 
	
		
			
				|  |  | +      if (len == 0){
 | 
	
		
			
				|  |  | +        result.add(contentList(i) + "\t" + bucketNum.toString + "\t" + "0")
 | 
	
		
			
				|  |  | +      }else{
 | 
	
		
			
				|  |  | +        val oneBucketNum = (len - 1) / (bucketNum - 1) + 1 // 确保每个桶至少有一个元素
 | 
	
		
			
				|  |  | +        val buffers = new ArrayBuffer[Double]()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        var lastBucketValue = data2(0) // 记录上一个桶的切分点
 | 
	
		
			
				|  |  | +        for (j <- 0 until len by oneBucketNum) {
 | 
	
		
			
				|  |  | +          val d = data2(j)
 | 
	
		
			
				|  |  | +          if (j > 0 && d != lastBucketValue) {
 | 
	
		
			
				|  |  | +            // 如果当前切分点不同于上一个切分点,则保存当前切分点
 | 
	
		
			
				|  |  | +            buffers += d
 | 
	
		
			
				|  |  | +          }
 | 
	
		
			
				|  |  | +          lastBucketValue = d // 更新上一个桶的切分点
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  | -        lastBucketValue = d // 更新上一个桶的切分点
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -      // 最后一个桶的结束点应该是数组的最后一个元素
 | 
	
		
			
				|  |  | -      if (!buffers.contains(data2.last)) {
 | 
	
		
			
				|  |  | -        buffers += data2.last
 | 
	
		
			
				|  |  | +        // 最后一个桶的结束点应该是数组的最后一个元素
 | 
	
		
			
				|  |  | +        if (!buffers.contains(data2.last)) {
 | 
	
		
			
				|  |  | +          buffers += data2.last
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        result.add(contentList(i) + "\t" + bucketNum.toString + "\t" + buffers.mkString(","))
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  | -      result.add(contentList(i) + "\t" + bucketNum.toString + "\t" + buffers.mkString(","))
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |      val data3 = sc.parallelize(result)
 | 
	
		
			
				|  |  |  
 |