浏览代码

扩展特征

jch 5 小时之前
父节点
当前提交
d89451b0c8
共有 1 个文件被更改,包括 8 次插入3 次删除
  1. 8 3
      src/main/scala/com/aliyun/odps/spark/examples/myUtils/DataUtils.scala

+ 8 - 3
src/main/scala/com/aliyun/odps/spark/examples/myUtils/DataUtils.scala

@@ -119,11 +119,13 @@ object DataUtils {
 
 
   def loadUseFeatureNames(loader: ClassLoader, nameFile: String): Set[String] = {
   def loadUseFeatureNames(loader: ClassLoader, nameFile: String): Set[String] = {
     val names = loadFileData(loader, nameFile)
     val names = loadFileData(loader, nameFile)
-    println(names)
-    names.split("\n")
+    val nameSet = names.split("\n")
       .map(r => r.replace(" ", "").replaceAll("\n", ""))
       .map(r => r.replace(" ", "").replaceAll("\n", ""))
       .filter(r => r.nonEmpty)
       .filter(r => r.nonEmpty)
       .toSet
       .toSet
+    println("nameSet.size=" + nameSet.size)
+    println(nameSet)
+    nameSet
   }
   }
 
 
   def loadUseFeatureBuckets(loader: ClassLoader, notUseBucket: Int, nameFile: String): Map[String, (Double, Array[Double])] = {
   def loadUseFeatureBuckets(loader: ClassLoader, notUseBucket: Int, nameFile: String): Map[String, (Double, Array[Double])] = {
@@ -132,13 +134,16 @@ object DataUtils {
     }
     }
     val buckets = loadFileData(loader, nameFile)
     val buckets = loadFileData(loader, nameFile)
     println(buckets)
     println(buckets)
-    buckets.split("\n")
+    val bucketMap = buckets.split("\n")
       .map(r => r.replace(" ", "").replaceAll("\n", ""))
       .map(r => r.replace(" ", "").replaceAll("\n", ""))
       .filter(r => r.nonEmpty)
       .filter(r => r.nonEmpty)
       .map(r => {
       .map(r => {
         val rList = r.split("\t")
         val rList = r.split("\t")
         (rList(0), (rList(1).toDouble, rList(2).split(",").map(_.toDouble)))
         (rList(0), (rList(1).toDouble, rList(2).split(",").map(_.toDouble)))
       }).toMap
       }).toMap
+    println("bucketMap.size=" + bucketMap.size)
+    println(bucketMap)
+    bucketMap
   }
   }
 
 
   def parseLabel(data: String, key: String, default: String = "0"): String = {
   def parseLabel(data: String, key: String, default: String = "0"): String = {