Browse Source

扩展特征

jch 8 giờ trước cách đây
mục cha
commit
118e3b7187

+ 33 - 0
src/main/scala/com/aliyun/odps/spark/examples/myUtils/DataUtils.scala

@@ -176,4 +176,37 @@ object DataUtils {
         }
     }.filter(_.nonEmpty)
   }
+
+  def loadFeatureNames(nameFile: String): Set[String] = {
+    val buffer = Source.fromFile(nameFile)
+    val names = buffer.getLines().mkString("\n")
+    buffer.close()
+    val featSet = names.split("\n")
+      .map(r => r.replace(" ", "").replaceAll("\n", ""))
+      .filter(r => r.nonEmpty)
+      .toSet
+    println("featSet.size=" + featSet.size)
+    println(featSet)
+    featSet
+  }
+
+  def loadFeatureBuckets(notUseBucket: Int, bucketFile: String): Map[String, (Double, Array[Double])] = {
+    if (notUseBucket > 0) {
+      return Map[String, (Double, Array[Double])]()
+    }
+    val buffer = Source.fromFile(bucketFile)
+    val buckets = buffer.getLines().mkString("\n")
+    buffer.close()
+    val bucketMap = buckets.split("\n")
+      .map(r => r.replace(" ", "").replaceAll("\n", ""))
+      .filter(r => r.nonEmpty)
+      .map(r => {
+        val rList = r.split("\t")
+        (rList(0), (rList(1).toDouble, rList(2).split(",").map(_.toDouble)))
+      }).toMap
+    println("bucketMap.size=" + bucketMap.size)
+    println(bucketMap)
+    bucketMap
+  }
+
 }