Browse Source

i2i样本制作,第4步。 连续值分桶

zhangbo 5 months ago
parent
commit
a789a020ff

+ 80 - 68
src/main/scala/com/aliyun/odps/spark/examples/makedata_dssm/makedata_i2i_04_bucketFile_20241128.scala

@@ -39,85 +39,97 @@ object makedata_i2i_04_bucketFile_20241128 {
       val f33 = rList(11)
       val result = new ArrayBuffer[(String, Double)]()
       Set(f1, f11).toSeq.foreach(f=> {
-        val json = JSON.parseObject(f)
-        if (json.nonEmpty){
-          json.foreach {
-            case (k, v) =>
-              val value = v.toString.toDouble
-              k match {
-                case "str_day1" => result += (("action:str_day1", value))
-                case "rov_day1" => result += (("action:rov_day1", value))
-                case "ros_day1" => result += (("action:ros_day1", value))
-                case "str_day7" => result += (("action:str_day7", value))
-                case "rov_day7" => result += (("action:rov_day7", value))
-                case "ros_day7" => result += (("action:ros_day7", value))
-                case "str_day21" => result += (("action:str_day21", value))
-                case "rov_day21" => result += (("action:rov_day21", value))
-                case "ros_day21" => result += (("action:ros_day21", value))
-                case "str_day336" => result += (("action:str_day336", value))
-                case "rov_day336" => result += (("action:rov_day336", value))
-                case "ros_day336" => result += (("action:ros_day336", value))
-                case "vovd1_day7" => result += (("action:vovd1_day7", value))
-                case "vovd1_day21" => result += (("action:vovd1_day21", value))
-                case "vovd1_day336" => result += (("action:vovd1_day336", value))
-                case _ =>
+        if (f != null && f.nonEmpty){
+          val jsonOpt = Option(JSON.parseObject(f))
+          jsonOpt.foreach { json =>
+            if (json.nonEmpty) { // 确保 JSON 对象非空
+              json.foreach {
+                case (k, v) =>
+                  val value = v.toString.toDouble
+                  k match {
+                    case "str_day1" => result += (("action:str_day1", value))
+                    case "rov_day1" => result += (("action:rov_day1", value))
+                    case "ros_day1" => result += (("action:ros_day1", value))
+                    case "str_day7" => result += (("action:str_day7", value))
+                    case "rov_day7" => result += (("action:rov_day7", value))
+                    case "ros_day7" => result += (("action:ros_day7", value))
+                    case "str_day21" => result += (("action:str_day21", value))
+                    case "rov_day21" => result += (("action:rov_day21", value))
+                    case "ros_day21" => result += (("action:ros_day21", value))
+                    case "str_day336" => result += (("action:str_day336", value))
+                    case "rov_day336" => result += (("action:rov_day336", value))
+                    case "ros_day336" => result += (("action:ros_day336", value))
+                    case "vovd1_day7" => result += (("action:vovd1_day7", value))
+                    case "vovd1_day21" => result += (("action:vovd1_day21", value))
+                    case "vovd1_day336" => result += (("action:vovd1_day336", value))
+                    case _ =>
+                  }
               }
+            }
           }
         }
       })
       Set(f2, f22).toSeq.foreach(f => {
-        val json = JSON.parseObject(f)
-        if (json.nonEmpty) {
-          json.foreach {
-            case (k, v) =>
-              val value = v.toString.toDouble
-              k match {
-                case "str_day1" => result += (("cate1:str_day1", value))
-                case "rov_day1" => result += (("cate1:rov_day1", value))
-                case "ros_day1" => result += (("cate1:ros_day1", value))
-                case "str_day3" => result += (("cate1:str_day3", value))
-                case "rov_day3" => result += (("cate1:rov_day3", value))
-                case "ros_day3" => result += (("cate1:ros_day3", value))
-                case "str_day7" => result += (("cate1:str_day7", value))
-                case "rov_day7" => result += (("cate1:rov_day7", value))
-                case "ros_day7" => result += (("cate1:ros_day7", value))
-                case "str_day30" => result += (("cate1:str_day30", value))
-                case "rov_day30" => result += (("cate1:rov_day30", value))
-                case "ros_day30" => result += (("cate1:ros_day30", value))
-                case "vovd1_day1" => result += (("cate1:vovd1_day1", value))
-                case "vovd1_day3" => result += (("cate1:vovd1_day3", value))
-                case "vovd1_day7" => result += (("cate1:vovd1_day7", value))
-                case "vovd1_day30" => result += (("cate1:vovd1_day30", value))
-                case _ =>
+        if (f != null && f.nonEmpty) {
+          val jsonOpt = Option(JSON.parseObject(f))
+          jsonOpt.foreach { json =>
+            if (json.nonEmpty) { // 确保 JSON 对象非空
+              json.foreach {
+                case (k, v) =>
+                  val value = v.toString.toDouble
+                  k match {
+                    case "str_day1" => result += (("cate1:str_day1", value))
+                    case "rov_day1" => result += (("cate1:rov_day1", value))
+                    case "ros_day1" => result += (("cate1:ros_day1", value))
+                    case "str_day3" => result += (("cate1:str_day3", value))
+                    case "rov_day3" => result += (("cate1:rov_day3", value))
+                    case "ros_day3" => result += (("cate1:ros_day3", value))
+                    case "str_day7" => result += (("cate1:str_day7", value))
+                    case "rov_day7" => result += (("cate1:rov_day7", value))
+                    case "ros_day7" => result += (("cate1:ros_day7", value))
+                    case "str_day30" => result += (("cate1:str_day30", value))
+                    case "rov_day30" => result += (("cate1:rov_day30", value))
+                    case "ros_day30" => result += (("cate1:ros_day30", value))
+                    case "vovd1_day1" => result += (("cate1:vovd1_day1", value))
+                    case "vovd1_day3" => result += (("cate1:vovd1_day3", value))
+                    case "vovd1_day7" => result += (("cate1:vovd1_day7", value))
+                    case "vovd1_day30" => result += (("cate1:vovd1_day30", value))
+                    case _ =>
+                  }
               }
+            }
           }
         }
       })
       Set(f3, f33).toSeq.foreach(f => {
-        val json = JSON.parseObject(f)
-        if (json.nonEmpty) {
-          json.foreach {
-            case (k, v) =>
-              val value = v.toString.toDouble
-              k match {
-                case "str_day1" => result += (("cate2:str_day1", value))
-                case "rov_day1" => result += (("cate2:rov_day1", value))
-                case "ros_day1" => result += (("cate2:ros_day1", value))
-                case "str_day3" => result += (("cate2:str_day3", value))
-                case "rov_day3" => result += (("cate2:rov_day3", value))
-                case "ros_day3" => result += (("cate2:ros_day3", value))
-                case "str_day7" => result += (("cate2:str_day7", value))
-                case "rov_day7" => result += (("cate2:rov_day7", value))
-                case "ros_day7" => result += (("cate2:ros_day7", value))
-                case "str_day30" => result += (("cate2:str_day30", value))
-                case "rov_day30" => result += (("cate2:rov_day30", value))
-                case "ros_day30" => result += (("cate2:ros_day30", value))
-                case "vovd1_day1" => result += (("cate2:vovd1_day1", value))
-                case "vovd1_day3" => result += (("cate2:vovd1_day3", value))
-                case "vovd1_day7" => result += (("cate2:vovd1_day7", value))
-                case "vovd1_day30" => result += (("cate2:vovd1_day30", value))
-                case _ =>
+        if (f != null && f.nonEmpty) {
+          val jsonOpt = Option(JSON.parseObject(f))
+          jsonOpt.foreach { json =>
+            if (json.nonEmpty) { // 确保 JSON 对象非空
+              json.foreach {
+                case (k, v) =>
+                  val value = v.toString.toDouble
+                  k match {
+                    case "str_day1" => result += (("cate2:str_day1", value))
+                    case "rov_day1" => result += (("cate2:rov_day1", value))
+                    case "ros_day1" => result += (("cate2:ros_day1", value))
+                    case "str_day3" => result += (("cate2:str_day3", value))
+                    case "rov_day3" => result += (("cate2:rov_day3", value))
+                    case "ros_day3" => result += (("cate2:ros_day3", value))
+                    case "str_day7" => result += (("cate2:str_day7", value))
+                    case "rov_day7" => result += (("cate2:rov_day7", value))
+                    case "ros_day7" => result += (("cate2:ros_day7", value))
+                    case "str_day30" => result += (("cate2:str_day30", value))
+                    case "rov_day30" => result += (("cate2:rov_day30", value))
+                    case "ros_day30" => result += (("cate2:ros_day30", value))
+                    case "vovd1_day1" => result += (("cate2:vovd1_day1", value))
+                    case "vovd1_day3" => result += (("cate2:vovd1_day3", value))
+                    case "vovd1_day7" => result += (("cate2:vovd1_day7", value))
+                    case "vovd1_day30" => result += (("cate2:vovd1_day30", value))
+                    case _ =>
+                  }
               }
+            }
           }
         }
       })