瀏覽代碼

str和ros特征精度截断

jch 4 月之前
父節點
當前提交
ed795cd022

+ 17 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_61_str2ros_originData_20241209.scala

@@ -303,7 +303,8 @@ object makedata_recsys_61_str2ros_originData_20241209 {
           val level = if (record.isNull("level")) "0" else record.getString("level")
           val logKey = (apptype, pagesource, mid, vid, ts, abcode, level).productIterator.mkString(",")
           val labelKey = labels.toString()
-          val featureKey = featureMap.toString()
+          // val featureKey = featureMap.toString()
+          val featureKey = truncateDecimal(featureMap).toString()
           //6 拼接数据,保存。
           logKey + "\t" + labelKey + "\t" + featureKey
 
@@ -355,6 +356,21 @@ object makedata_recsys_61_str2ros_originData_20241209 {
     map
   }
 
+  def truncateDecimal(obj: JSONObject, scale: Int = 6): JSONObject = {
+    val data = new JSONObject()
+    for (key <- obj.keySet()) {
+      val value = obj.getDoubleValue(key)
+      if (value == value.floor) {
+        data.put(key, value)
+      } else {
+        val newValue = BigDecimal(value).setScale(scale, BigDecimal.RoundingMode.HALF_UP).toDouble
+        data.put(key, newValue)
+      }
+    }
+    data
+  }
+
+
   def funcC34567ForTagsW2V(tags: String, title: String): Tuple4[Double, String, Double, Double] = {
     // 匹配数量 匹配词 语义最高相似度分 语义平均相似度分
     val tagsList = tags.split(",")