|
@@ -56,9 +56,9 @@ object makedata_15_bucket_20240608 {
|
|
|
for (i <- contentList.indices){
|
|
|
println("特征:" + contentList(i))
|
|
|
val data2 = data1.map(r => r(i)).filter(_ > 1E-8).collect().sorted
|
|
|
- if (data2.map(_.toString).toSet.size < bucketNum*10){
|
|
|
- println("无法分桶:" + data2.map(_.toString).toSet.size.toString)
|
|
|
- }else{
|
|
|
+// if (data2.map(_.toString).toSet.size < bucketNum*10){
|
|
|
+// println("无法分桶:" + data2.map(_.toString).toSet.size.toString)
|
|
|
+// }else{
|
|
|
val len = data2.length
|
|
|
val oneBucketNum = (len - 1) / (bucketNum - 1) + 1 // 确保每个桶至少有一个元素
|
|
|
val buffers = new ArrayBuffer[Double]()
|
|
@@ -79,7 +79,7 @@ object makedata_15_bucket_20240608 {
|
|
|
}
|
|
|
result.add(contentList(i) + "\t" + buffers.mkString(","))
|
|
|
}
|
|
|
- }
|
|
|
+// }
|
|
|
val data3 = sc.parallelize(result)
|
|
|
|
|
|
|