|
@@ -64,24 +64,28 @@ object makedata_32_bucket_20240622 {
|
|
|
println("特征:" + contentList(i))
|
|
|
val data2 = data1.map(r => r.getOrDefault(contentList(i), 0D)).filter(_ > 1E-8).collect().sorted
|
|
|
val len = data2.length
|
|
|
- val oneBucketNum = (len - 1) / (bucketNum - 1) + 1 // 确保每个桶至少有一个元素
|
|
|
- val buffers = new ArrayBuffer[Double]()
|
|
|
-
|
|
|
- var lastBucketValue = data2(0) // 记录上一个桶的切分点
|
|
|
- for (j <- 0 until len by oneBucketNum) {
|
|
|
- val d = data2(j)
|
|
|
- if (j > 0 && d != lastBucketValue) {
|
|
|
- // 如果当前切分点不同于上一个切分点,则保存当前切分点
|
|
|
- buffers += d
|
|
|
+ if (len == 0){
|
|
|
+ result.add(contentList(i) + "\t" + bucketNum.toString + "\t" + "0")
|
|
|
+ }else{
|
|
|
+ val oneBucketNum = (len - 1) / (bucketNum - 1) + 1 // 确保每个桶至少有一个元素
|
|
|
+ val buffers = new ArrayBuffer[Double]()
|
|
|
+
|
|
|
+ var lastBucketValue = data2(0) // 记录上一个桶的切分点
|
|
|
+ for (j <- 0 until len by oneBucketNum) {
|
|
|
+ val d = data2(j)
|
|
|
+ if (j > 0 && d != lastBucketValue) {
|
|
|
+ // 如果当前切分点不同于上一个切分点,则保存当前切分点
|
|
|
+ buffers += d
|
|
|
+ }
|
|
|
+ lastBucketValue = d // 更新上一个桶的切分点
|
|
|
}
|
|
|
- lastBucketValue = d // 更新上一个桶的切分点
|
|
|
- }
|
|
|
|
|
|
- // 最后一个桶的结束点应该是数组的最后一个元素
|
|
|
- if (!buffers.contains(data2.last)) {
|
|
|
- buffers += data2.last
|
|
|
+ // 最后一个桶的结束点应该是数组的最后一个元素
|
|
|
+ if (!buffers.contains(data2.last)) {
|
|
|
+ buffers += data2.last
|
|
|
+ }
|
|
|
+ result.add(contentList(i) + "\t" + bucketNum.toString + "\t" + buffers.mkString(","))
|
|
|
}
|
|
|
- result.add(contentList(i) + "\t" + bucketNum.toString + "\t" + buffers.mkString(","))
|
|
|
}
|
|
|
val data3 = sc.parallelize(result)
|
|
|
|