Prechádzať zdrojové kódy

i2i样本制作,第4步。 连续值分桶

zhangbo 5 mesiacov pred
rodič
commit
82b09de4c9

+ 74 - 65
src/main/scala/com/aliyun/odps/spark/examples/makedata_dssm/makedata_i2i_04_bucketFile_20241128.scala

@@ -39,77 +39,86 @@ object makedata_i2i_04_bucketFile_20241128 {
       val f33 = rList(11)
       val result = new ArrayBuffer[(String, Double)]()
       Set(f1, f11).toSeq.foreach(f=> {
-        JSON.parseObject(f).foreach {
-          case (k, v) =>
-            val value = v.toString.toDouble
-            k match {
-              case "str_day1" => result += (("action:str_day1", value))
-              case "rov_day1" => result += (("action:rov_day1", value))
-              case "ros_day1" => result += (("action:ros_day1", value))
-              case "str_day7" => result += (("action:str_day7", value))
-              case "rov_day7" => result += (("action:rov_day7", value))
-              case "ros_day7" => result += (("action:ros_day7", value))
-              case "str_day21" => result += (("action:str_day21", value))
-              case "rov_day21" => result += (("action:rov_day21", value))
-              case "ros_day21" => result += (("action:ros_day21", value))
-              case "str_day336" => result += (("action:str_day336", value))
-              case "rov_day336" => result += (("action:rov_day336", value))
-              case "ros_day336" => result += (("action:ros_day336", value))
-              case "vovd1_day7" => result += (("action:vovd1_day7", value))
-              case "vovd1_day21" => result += (("action:vovd1_day21", value))
-              case "vovd1_day336" => result += (("action:vovd1_day336", value))
-              case _ =>
-            }
+        val json = JSON.parseObject(f)
+        if (json.nonEmpty){
+          json.foreach {
+            case (k, v) =>
+              val value = v.toString.toDouble
+              k match {
+                case "str_day1" => result += (("action:str_day1", value))
+                case "rov_day1" => result += (("action:rov_day1", value))
+                case "ros_day1" => result += (("action:ros_day1", value))
+                case "str_day7" => result += (("action:str_day7", value))
+                case "rov_day7" => result += (("action:rov_day7", value))
+                case "ros_day7" => result += (("action:ros_day7", value))
+                case "str_day21" => result += (("action:str_day21", value))
+                case "rov_day21" => result += (("action:rov_day21", value))
+                case "ros_day21" => result += (("action:ros_day21", value))
+                case "str_day336" => result += (("action:str_day336", value))
+                case "rov_day336" => result += (("action:rov_day336", value))
+                case "ros_day336" => result += (("action:ros_day336", value))
+                case "vovd1_day7" => result += (("action:vovd1_day7", value))
+                case "vovd1_day21" => result += (("action:vovd1_day21", value))
+                case "vovd1_day336" => result += (("action:vovd1_day336", value))
+                case _ =>
+              }
+          }
         }
       })
       Set(f2, f22).toSeq.foreach(f => {
-        JSON.parseObject(f).foreach {
-          case (k, v) =>
-            val value = v.toString.toDouble
-            k match {
-              case "str_day1" => result += (("cate1:str_day1", value))
-              case "rov_day1" => result += (("cate1:rov_day1", value))
-              case "ros_day1" => result += (("cate1:ros_day1", value))
-              case "str_day3" => result += (("cate1:str_day3", value))
-              case "rov_day3" => result += (("cate1:rov_day3", value))
-              case "ros_day3" => result += (("cate1:ros_day3", value))
-              case "str_day7" => result += (("cate1:str_day7", value))
-              case "rov_day7" => result += (("cate1:rov_day7", value))
-              case "ros_day7" => result += (("cate1:ros_day7", value))
-              case "str_day30" => result += (("cate1:str_day30", value))
-              case "rov_day30" => result += (("cate1:rov_day30", value))
-              case "ros_day30" => result += (("cate1:ros_day30", value))
-              case "vovd1_day1" => result += (("cate1:vovd1_day1", value))
-              case "vovd1_day3" => result += (("cate1:vovd1_day3", value))
-              case "vovd1_day7" => result += (("cate1:vovd1_day7", value))
-              case "vovd1_day30" => result += (("cate1:vovd1_day30", value))
-              case _ =>
-            }
+        val json = JSON.parseObject(f)
+        if (json.nonEmpty) {
+          json.foreach {
+            case (k, v) =>
+              val value = v.toString.toDouble
+              k match {
+                case "str_day1" => result += (("cate1:str_day1", value))
+                case "rov_day1" => result += (("cate1:rov_day1", value))
+                case "ros_day1" => result += (("cate1:ros_day1", value))
+                case "str_day3" => result += (("cate1:str_day3", value))
+                case "rov_day3" => result += (("cate1:rov_day3", value))
+                case "ros_day3" => result += (("cate1:ros_day3", value))
+                case "str_day7" => result += (("cate1:str_day7", value))
+                case "rov_day7" => result += (("cate1:rov_day7", value))
+                case "ros_day7" => result += (("cate1:ros_day7", value))
+                case "str_day30" => result += (("cate1:str_day30", value))
+                case "rov_day30" => result += (("cate1:rov_day30", value))
+                case "ros_day30" => result += (("cate1:ros_day30", value))
+                case "vovd1_day1" => result += (("cate1:vovd1_day1", value))
+                case "vovd1_day3" => result += (("cate1:vovd1_day3", value))
+                case "vovd1_day7" => result += (("cate1:vovd1_day7", value))
+                case "vovd1_day30" => result += (("cate1:vovd1_day30", value))
+                case _ =>
+              }
+          }
         }
       })
       Set(f3, f33).toSeq.foreach(f => {
-        JSON.parseObject(f).foreach {
-          case (k, v) =>
-            val value = v.toString.toDouble
-            k match {
-              case "str_day1" => result += (("cate2:str_day1", value))
-              case "rov_day1" => result += (("cate2:rov_day1", value))
-              case "ros_day1" => result += (("cate2:ros_day1", value))
-              case "str_day3" => result += (("cate2:str_day3", value))
-              case "rov_day3" => result += (("cate2:rov_day3", value))
-              case "ros_day3" => result += (("cate2:ros_day3", value))
-              case "str_day7" => result += (("cate2:str_day7", value))
-              case "rov_day7" => result += (("cate2:rov_day7", value))
-              case "ros_day7" => result += (("cate2:ros_day7", value))
-              case "str_day30" => result += (("cate2:str_day30", value))
-              case "rov_day30" => result += (("cate2:rov_day30", value))
-              case "ros_day30" => result += (("cate2:ros_day30", value))
-              case "vovd1_day1" => result += (("cate2:vovd1_day1", value))
-              case "vovd1_day3" => result += (("cate2:vovd1_day3", value))
-              case "vovd1_day7" => result += (("cate2:vovd1_day7", value))
-              case "vovd1_day30" => result += (("cate2:vovd1_day30", value))
-              case _ =>
-            }
+        val json = JSON.parseObject(f)
+        if (json.nonEmpty) {
+          json.foreach {
+            case (k, v) =>
+              val value = v.toString.toDouble
+              k match {
+                case "str_day1" => result += (("cate2:str_day1", value))
+                case "rov_day1" => result += (("cate2:rov_day1", value))
+                case "ros_day1" => result += (("cate2:ros_day1", value))
+                case "str_day3" => result += (("cate2:str_day3", value))
+                case "rov_day3" => result += (("cate2:rov_day3", value))
+                case "ros_day3" => result += (("cate2:ros_day3", value))
+                case "str_day7" => result += (("cate2:str_day7", value))
+                case "rov_day7" => result += (("cate2:rov_day7", value))
+                case "ros_day7" => result += (("cate2:ros_day7", value))
+                case "str_day30" => result += (("cate2:str_day30", value))
+                case "rov_day30" => result += (("cate2:rov_day30", value))
+                case "ros_day30" => result += (("cate2:ros_day30", value))
+                case "vovd1_day1" => result += (("cate2:vovd1_day1", value))
+                case "vovd1_day3" => result += (("cate2:vovd1_day3", value))
+                case "vovd1_day7" => result += (("cate2:vovd1_day7", value))
+                case "vovd1_day30" => result += (("cate2:vovd1_day30", value))
+                case _ =>
+              }
+          }
         }
       })
       result