jch vor 5 Stunden
Ursprung
Commit
d89451b0c8

+ 8 - 3
src/main/scala/com/aliyun/odps/spark/examples/myUtils/DataUtils.scala

@@ -119,11 +119,13 @@ object DataUtils {
 
   def loadUseFeatureNames(loader: ClassLoader, nameFile: String): Set[String] = {
     val names = loadFileData(loader, nameFile)
-    println(names)
-    names.split("\n")
+    val nameSet = names.split("\n")
       .map(r => r.replace(" ", "").replaceAll("\n", ""))
       .filter(r => r.nonEmpty)
       .toSet
+    println("nameSet.size=" + nameSet.size)
+    println(nameSet)
+    nameSet
   }
 
   def loadUseFeatureBuckets(loader: ClassLoader, notUseBucket: Int, nameFile: String): Map[String, (Double, Array[Double])] = {
@@ -132,13 +134,16 @@ object DataUtils {
     }
     val buckets = loadFileData(loader, nameFile)
     println(buckets)
-    buckets.split("\n")
+    val bucketMap = buckets.split("\n")
       .map(r => r.replace(" ", "").replaceAll("\n", ""))
       .filter(r => r.nonEmpty)
       .map(r => {
         val rList = r.split("\t")
         (rList(0), (rList(1).toDouble, rList(2).split(",").map(_.toDouble)))
       }).toMap
+    println("bucketMap.size=" + bucketMap.size)
+    println(bucketMap)
+    bucketMap
   }
 
   def parseLabel(data: String, key: String, default: String = "0"): String = {