| 
					
				 | 
			
			
				@@ -1,4 +1,4 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-package com.aliyun.odps.spark.zhp.makedata_ad 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+package com.aliyun.odps.spark.examples.makedata_ad 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import com.alibaba.fastjson.{JSON, JSONObject} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import com.aliyun.odps.TableSchema 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -32,339 +32,346 @@ object makedata_ad_31_originData_20240620 { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     val project = param.getOrElse("project", "loghubods") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     val table = param.getOrElse("table", "alg_recsys_ad_sample_all") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     val repartition = param.getOrElse("repartition", "100").toInt 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val filterHours = param.getOrElse("filterHours", "00,01,02,03,04,05,06,07").split(",").toSet 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val idDefaultValue = param.getOrElse("idDefaultValue", "1.0").toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     // 2 读取odps+表信息 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     val odpsOps = env.getODPS(sc) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    val noDateSet = Set("00", "01", "02", "03", "04", "05", "06", "07") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    // 3 循环执行数据生产,8点之前的分区抛弃 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 3 循环执行数据生产 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     val timeRange = MyDateUtils.getDateHourRange(beginStr, endStr) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      .filter(item => { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        val hh =item.substring(8, 10) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        !noDateSet.contains(hh) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     for (dt_hh <- timeRange) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       val dt = dt_hh.substring(0, 8) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       val hh = dt_hh.substring(8, 10) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       val partition = s"dt=$dt,hh=$hh" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      println("开始执行partiton:" + partition) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      val odpsData = odpsOps.readTable(project = project, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        table = table, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        partition = partition, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        transfer = func, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        numPartition = tablePart) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        .map(record => { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val ts = record.getString("ts").toInt 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val cid = record.getString("cid") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val featureMap = new JSONObject() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val b1: JSONObject = if (record.isNull("b1_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            JSON.parseObject(record.getString("b1_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val b2: JSONObject = if (record.isNull("b2_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            JSON.parseObject(record.getString("b2_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val b3: JSONObject = if (record.isNull("b3_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            JSON.parseObject(record.getString("b3_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val b4: JSONObject = if (record.isNull("b4_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            JSON.parseObject(record.getString("b4_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val b5: JSONObject = if (record.isNull("b5_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            JSON.parseObject(record.getString("b5_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val b6: JSONObject = if (record.isNull("b6_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            JSON.parseObject(record.getString("b6_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val b7: JSONObject = if (record.isNull("b7_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            JSON.parseObject(record.getString("b7_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val b8: JSONObject = if (record.isNull("b8_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            JSON.parseObject(record.getString("b8_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val b9: JSONObject = if (record.isNull("b9_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            JSON.parseObject(record.getString("b9_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          featureMap.put("cid_" + cid, 1.0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (b1.containsKey("adid") && b1.getString("adid").nonEmpty) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            featureMap.put("adid_" + b1.getString("adid"), 1.0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (b1.containsKey("adverid") && b1.getString("adverid").nonEmpty) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            featureMap.put("adverid_" + b1.getString("adverid"), 1.0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (b1.containsKey("targeting_conversion") && b1.getString("targeting_conversion").nonEmpty) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            featureMap.put("targeting_conversion_" + b1.getString("targeting_conversion"), 1.0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (b1.containsKey("cpa")) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            featureMap.put("cpa", b1.getString("cpa").toDouble) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          for ((bn, prefix1) <- List( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            (b2, "b2"), (b3, "b3"),(b4, "b4"),(b5, "b5"),(b8, "b8") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          )){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            for (prefix2 <- List( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              "3h", "6h", "12h", "1d", "3d", "7d" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            )){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val view = if (bn.isEmpty) 0D else bn.getIntValue("ad_view_" + prefix2).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val click = if (bn.isEmpty) 0D else bn.getIntValue("ad_click_" + prefix2).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val conver = if (bn.isEmpty) 0D else bn.getIntValue("ad_conversion_" + prefix2).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val income = if (bn.isEmpty) 0D else bn.getIntValue("ad_income_" + prefix2).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val f1 = RankExtractorFeature_20240530.calDiv(click, view) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val f2 = RankExtractorFeature_20240530.calDiv(conver, view) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val f3 = RankExtractorFeature_20240530.calDiv(conver, click) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val f4 = conver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val f5 = RankExtractorFeature_20240530.calDiv(income*1000, view) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctr", f1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctcvr", f2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              featureMap.put(prefix1 + "_" + prefix2 + "_" + "cvr", f3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              featureMap.put(prefix1 + "_" + prefix2 + "_" + "conver", f4) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              featureMap.put(prefix1 + "_" + prefix2 + "_" + "ecpm", f5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      if (filterHours.nonEmpty && filterHours.contains(hh)){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        println("不执行partiton:" + partition) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      }else{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        println("开始执行partiton:" + partition) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        val odpsData = odpsOps.readTable(project = project, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            table = table, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            partition = partition, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            transfer = func, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            numPartition = tablePart) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          .map(record => { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val ts = record.getString("ts").toInt 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val cid = record.getString("cid") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val featureMap = new JSONObject() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b1: JSONObject = if (record.isNull("b1_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              JSON.parseObject(record.getString("b1_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b2: JSONObject = if (record.isNull("b2_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              JSON.parseObject(record.getString("b2_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b3: JSONObject = if (record.isNull("b3_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              JSON.parseObject(record.getString("b3_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b4: JSONObject = if (record.isNull("b4_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              JSON.parseObject(record.getString("b4_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b5: JSONObject = if (record.isNull("b5_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              JSON.parseObject(record.getString("b5_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b6: JSONObject = if (record.isNull("b6_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              JSON.parseObject(record.getString("b6_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b7: JSONObject = if (record.isNull("b7_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              JSON.parseObject(record.getString("b7_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b8: JSONObject = if (record.isNull("b8_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              JSON.parseObject(record.getString("b8_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b9: JSONObject = if (record.isNull("b9_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              JSON.parseObject(record.getString("b9_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            featureMap.put("cid_" + cid, idDefaultValue) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (b1.containsKey("adid") && b1.getString("adid").nonEmpty) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              featureMap.put("adid_" + b1.getString("adid"), idDefaultValue) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (b1.containsKey("adverid") && b1.getString("adverid").nonEmpty) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              featureMap.put("adverid_" + b1.getString("adverid"), idDefaultValue) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (b1.containsKey("targeting_conversion") && b1.getString("targeting_conversion").nonEmpty) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              featureMap.put("targeting_conversion_" + b1.getString("targeting_conversion"), idDefaultValue) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          for ((bn, prefix1) <- List( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            (b6, "b6"), (b7, "b7") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          )) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            for (prefix2 <- List( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              "7d", "14d" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            )) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val view = if (bn.isEmpty) 0D else bn.getIntValue("ad_view_" + prefix2).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val click = if (bn.isEmpty) 0D else bn.getIntValue("ad_click_" + prefix2).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val conver = if (bn.isEmpty) 0D else bn.getIntValue("ad_conversion_" + prefix2).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val income = if (bn.isEmpty) 0D else bn.getIntValue("ad_income_" + prefix2).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val f1 = RankExtractorFeature_20240530.calDiv(click, view) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val f2 = RankExtractorFeature_20240530.calDiv(conver, view) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val f3 = RankExtractorFeature_20240530.calDiv(conver, click) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val f4 = conver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val f5 = RankExtractorFeature_20240530.calDiv(income * 1000, view) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctr", f1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctcvr", f2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              featureMap.put(prefix1 + "_" + prefix2 + "_" + "cvr", f3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              featureMap.put(prefix1 + "_" + prefix2 + "_" + "conver", f4) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              featureMap.put(prefix1 + "_" + prefix2 + "_" + "ecpm", f5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (b1.containsKey("cpa")) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              featureMap.put("cpa", b1.getString("cpa").toDouble) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val c1: JSONObject = if (record.isNull("c1_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            JSON.parseObject(record.getString("c1_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val midActionList = if (c1.containsKey("action") && c1.getString("action").nonEmpty){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            c1.getString("action").split(",").map(r=>{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val rList = r.split(":") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              (rList(0), (rList(1).toInt, rList(2).toInt, rList(3).toInt, rList(4).toInt, rList(5))) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            }).sortBy(-_._2._1).toList 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          }else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            new ArrayBuffer[(String, (Int, Int, Int, Int, String))]().toList 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          // u特征 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val viewAll = midActionList.size.toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val clickAll = midActionList.map(_._2._2).sum.toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val converAll = midActionList.map(_._2._3).sum.toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val incomeAll = midActionList.map(_._2._4).sum.toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          featureMap.put("viewAll", viewAll) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          featureMap.put("clickAll", clickAll) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          featureMap.put("converAll", converAll) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          featureMap.put("incomeAll", incomeAll) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          featureMap.put("ctr_all", RankExtractorFeature_20240530.calDiv(clickAll, viewAll)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          featureMap.put("ctcvr_all", RankExtractorFeature_20240530.calDiv(converAll, viewAll)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          featureMap.put("cvr_all", RankExtractorFeature_20240530.calDiv(clickAll, converAll)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          featureMap.put("ecpm_all", RankExtractorFeature_20240530.calDiv(incomeAll * 1000, viewAll)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          // ui特征 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val midTimeDiff = scala.collection.mutable.Map[String, Double]() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          midActionList.foreach{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            case (cid, (ts_history, click, conver, income, title)) => 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              if (!midTimeDiff.contains("timediff_view_" + cid)){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                midTimeDiff.put("timediff_view_" + cid, 1.0 / ((ts - ts_history).toDouble/3600.0/24.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              if (!midTimeDiff.contains("timediff_click_" + cid) && click > 0) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                midTimeDiff.put("timediff_click_" + cid, 1.0 / ((ts - ts_history).toDouble / 3600.0 / 24.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            for ((bn, prefix1) <- List( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              (b2, "b2"), (b3, "b3"), (b4, "b4"), (b5, "b5"), (b8, "b8") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            )) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              for (prefix2 <- List( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "3h", "6h", "12h", "1d", "3d", "7d" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              )) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val view = if (bn.isEmpty) 0D else bn.getIntValue("ad_view_" + prefix2).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val click = if (bn.isEmpty) 0D else bn.getIntValue("ad_click_" + prefix2).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val conver = if (bn.isEmpty) 0D else bn.getIntValue("ad_conversion_" + prefix2).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val income = if (bn.isEmpty) 0D else bn.getIntValue("ad_income_" + prefix2).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val f1 = RankExtractorFeature_20240530.calDiv(click, view) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val f2 = RankExtractorFeature_20240530.calDiv(conver, view) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val f3 = RankExtractorFeature_20240530.calDiv(conver, click) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val f4 = conver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val f5 = RankExtractorFeature_20240530.calDiv(income * 1000, view) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctr", f1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctcvr", f2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put(prefix1 + "_" + prefix2 + "_" + "cvr", f3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put(prefix1 + "_" + prefix2 + "_" + "conver", f4) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put(prefix1 + "_" + prefix2 + "_" + "ecpm", f5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put(prefix1 + "_" + prefix2 + "_" + "click", click) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put(prefix1 + "_" + prefix2 + "_" + "conver*log(view)", conver * RankExtractorFeature_20240530.calLog(view)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put(prefix1 + "_" + prefix2 + "_" + "conver*ctcvr", conver * f2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				               } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              if (!midTimeDiff.contains("timediff_conver_" + cid) && conver > 0) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                midTimeDiff.put("timediff_conver_" + cid, 1.0 / ((ts - ts_history).toDouble / 3600.0 / 24.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            for ((bn, prefix1) <- List( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              (b6, "b6"), (b7, "b7") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            )) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              for (prefix2 <- List( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "7d", "14d" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              )) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val view = if (bn.isEmpty) 0D else bn.getIntValue("ad_view_" + prefix2).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val click = if (bn.isEmpty) 0D else bn.getIntValue("ad_click_" + prefix2).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val conver = if (bn.isEmpty) 0D else bn.getIntValue("ad_conversion_" + prefix2).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val income = if (bn.isEmpty) 0D else bn.getIntValue("ad_income_" + prefix2).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val f1 = RankExtractorFeature_20240530.calDiv(click, view) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val f2 = RankExtractorFeature_20240530.calDiv(conver, view) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val f3 = RankExtractorFeature_20240530.calDiv(conver, click) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val f4 = conver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val f5 = RankExtractorFeature_20240530.calDiv(income * 1000, view) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctr", f1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctcvr", f2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put(prefix1 + "_" + prefix2 + "_" + "cvr", f3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put(prefix1 + "_" + prefix2 + "_" + "conver", f4) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put(prefix1 + "_" + prefix2 + "_" + "ecpm", f5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put(prefix1 + "_" + prefix2 + "_" + "click", click) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put(prefix1 + "_" + prefix2 + "_" + "conver*log(view)", conver * RankExtractorFeature_20240530.calLog(view)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put(prefix1 + "_" + prefix2 + "_" + "conver*ctcvr", conver * f2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				               } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val midActionStatic = scala.collection.mutable.Map[String, Double]() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          midActionList.foreach { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            case (cid, (ts_history, click, conver, income, title)) => 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              midActionStatic.put("actionstatic_view_" + cid, 1.0 + midActionStatic.getOrDefault("actionstatic_view_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              midActionStatic.put("actionstatic_click_" + cid, click + midActionStatic.getOrDefault("actionstatic_click_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              midActionStatic.put("actionstatic_conver_" + cid, conver + midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              midActionStatic.put("actionstatic_income_" + cid, income + midActionStatic.getOrDefault("actionstatic_income_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (midTimeDiff.contains("timediff_view_" + cid)){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            featureMap.put("timediff_view", midTimeDiff.getOrDefault("timediff_view_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (midTimeDiff.contains("timediff_click_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            featureMap.put("timediff_click", midTimeDiff.getOrDefault("timediff_click_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (midTimeDiff.contains("timediff_conver_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            featureMap.put("timediff_conver", midTimeDiff.getOrDefault("timediff_conver_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (midActionStatic.contains("actionstatic_view_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            featureMap.put("actionstatic_view", midActionStatic.getOrDefault("actionstatic_view_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (midActionStatic.contains("actionstatic_click_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            featureMap.put("actionstatic_click", midActionStatic.getOrDefault("actionstatic_click_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (midActionStatic.contains("actionstatic_conver_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            featureMap.put("actionstatic_conver", midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (midActionStatic.contains("actionstatic_income_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            featureMap.put("actionstatic_income", midActionStatic.getOrDefault("actionstatic_income_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (midActionStatic.contains("actionstatic_view_" + cid) && midActionStatic.contains("actionstatic_click_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            featureMap.put("actionstatic_ctr", RankExtractorFeature_20240530.calDiv( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              midActionStatic.getOrDefault("actionstatic_click_" + cid, 0.0), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              midActionStatic.getOrDefault("actionstatic_view_" + cid, 0.0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            )) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (midActionStatic.contains("actionstatic_view_" + cid) && midActionStatic.contains("actionstatic_conver_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            featureMap.put("actionstatic_ctcvr", RankExtractorFeature_20240530.calDiv( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              midActionStatic.getOrDefault("actionstatic_view_" + cid, 0.0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            )) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (midActionStatic.contains("actionstatic_conver_" + cid) && midActionStatic.contains("actionstatic_click_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            featureMap.put("actionstatic_cvr", RankExtractorFeature_20240530.calDiv( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              midActionStatic.getOrDefault("actionstatic_click_" + cid, 0.0), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            )) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val e1: JSONObject = if (record.isNull("e1_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            JSON.parseObject(record.getString("e1_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val e2: JSONObject = if (record.isNull("e2_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            JSON.parseObject(record.getString("e2_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val title = b1.getOrDefault("cidtitle", "").toString 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (title.nonEmpty){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            for ((en, prefix1) <- List((e1, "e1"), (e2, "e2"))){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              for (prefix2 <- List("tags_3d", "tags_7d", "tags_14d")){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                if (en.nonEmpty && en.containsKey(prefix2) && en.getString(prefix2).nonEmpty) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                  val (f1, f2, f3, f4) = funcC34567ForTags(en.getString(prefix2), title) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                  featureMap.put(prefix1 + "_" + prefix2 + "_matchnum", f1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                  featureMap.put(prefix1 + "_" + prefix2 + "_maxscore", f3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                  featureMap.put(prefix1 + "_" + prefix2 + "_avgscore", f4) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val c1: JSONObject = if (record.isNull("c1_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              JSON.parseObject(record.getString("c1_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val midActionList = if (c1.containsKey("action") && c1.getString("action").nonEmpty) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              c1.getString("action").split(",").map(r => { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val rList = r.split(":") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                (rList(0), (rList(1).toInt, rList(2).toInt, rList(3).toInt, rList(4).toInt, rList(5))) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              }).sortBy(-_._2._1).toList 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              new ArrayBuffer[(String, (Int, Int, Int, Int, String))]().toList 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            // u特征 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val viewAll = midActionList.size.toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val clickAll = midActionList.map(_._2._2).sum.toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val converAll = midActionList.map(_._2._3).sum.toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val incomeAll = midActionList.map(_._2._4).sum.toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            featureMap.put("viewAll", viewAll) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            featureMap.put("clickAll", clickAll) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            featureMap.put("converAll", converAll) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            featureMap.put("incomeAll", incomeAll) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            featureMap.put("ctr_all", RankExtractorFeature_20240530.calDiv(clickAll, viewAll)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            featureMap.put("ctcvr_all", RankExtractorFeature_20240530.calDiv(converAll, viewAll)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            featureMap.put("cvr_all", RankExtractorFeature_20240530.calDiv(clickAll, converAll)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            featureMap.put("ecpm_all", RankExtractorFeature_20240530.calDiv(incomeAll * 1000, viewAll)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            // ui特征 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val midTimeDiff = scala.collection.mutable.Map[String, Double]() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            midActionList.foreach { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              case (cid, (ts_history, click, conver, income, title)) => 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if (!midTimeDiff.contains("timediff_view_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                  midTimeDiff.put("timediff_view_" + cid, 1.0 / ((ts - ts_history).toDouble / 3600.0 / 24.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if (!midTimeDiff.contains("timediff_click_" + cid) && click > 0) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                  midTimeDiff.put("timediff_click_" + cid, 1.0 / ((ts - ts_history).toDouble / 3600.0 / 24.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if (!midTimeDiff.contains("timediff_conver_" + cid) && conver > 0) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                  midTimeDiff.put("timediff_conver_" + cid, 1.0 / ((ts - ts_history).toDouble / 3600.0 / 24.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val midActionStatic = scala.collection.mutable.Map[String, Double]() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            midActionList.foreach { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              case (cid, (ts_history, click, conver, income, title)) => 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                midActionStatic.put("actionstatic_view_" + cid, 1.0 + midActionStatic.getOrDefault("actionstatic_view_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                midActionStatic.put("actionstatic_click_" + cid, click + midActionStatic.getOrDefault("actionstatic_click_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                midActionStatic.put("actionstatic_conver_" + cid, conver + midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                midActionStatic.put("actionstatic_income_" + cid, income + midActionStatic.getOrDefault("actionstatic_income_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (midTimeDiff.contains("timediff_view_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              featureMap.put("timediff_view", midTimeDiff.getOrDefault("timediff_view_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (midTimeDiff.contains("timediff_click_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              featureMap.put("timediff_click", midTimeDiff.getOrDefault("timediff_click_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (midTimeDiff.contains("timediff_conver_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              featureMap.put("timediff_conver", midTimeDiff.getOrDefault("timediff_conver_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (midActionStatic.contains("actionstatic_view_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              featureMap.put("actionstatic_view", midActionStatic.getOrDefault("actionstatic_view_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (midActionStatic.contains("actionstatic_click_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              featureMap.put("actionstatic_click", midActionStatic.getOrDefault("actionstatic_click_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (midActionStatic.contains("actionstatic_conver_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              featureMap.put("actionstatic_conver", midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (midActionStatic.contains("actionstatic_income_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              featureMap.put("actionstatic_income", midActionStatic.getOrDefault("actionstatic_income_" + cid, 0.0)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (midActionStatic.contains("actionstatic_view_" + cid) && midActionStatic.contains("actionstatic_click_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              featureMap.put("actionstatic_ctr", RankExtractorFeature_20240530.calDiv( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                midActionStatic.getOrDefault("actionstatic_click_" + cid, 0.0), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                midActionStatic.getOrDefault("actionstatic_view_" + cid, 0.0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              )) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (midActionStatic.contains("actionstatic_view_" + cid) && midActionStatic.contains("actionstatic_conver_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              featureMap.put("actionstatic_ctcvr", RankExtractorFeature_20240530.calDiv( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                midActionStatic.getOrDefault("actionstatic_view_" + cid, 0.0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              )) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (midActionStatic.contains("actionstatic_conver_" + cid) && midActionStatic.contains("actionstatic_click_" + cid)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              featureMap.put("actionstatic_cvr", RankExtractorFeature_20240530.calDiv( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                midActionStatic.getOrDefault("actionstatic_click_" + cid, 0.0), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              )) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val e1: JSONObject = if (record.isNull("e1_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              JSON.parseObject(record.getString("e1_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val e2: JSONObject = if (record.isNull("e2_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              JSON.parseObject(record.getString("e2_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val title = b1.getOrDefault("cidtitle", "").toString 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (title.nonEmpty) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              for ((en, prefix1) <- List((e1, "e1"), (e2, "e2"))) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                for (prefix2 <- List("tags_3d", "tags_7d", "tags_14d")) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                  if (en.nonEmpty && en.containsKey(prefix2) && en.getString(prefix2).nonEmpty) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    val (f1, f2, f3, f4) = funcC34567ForTags(en.getString(prefix2), title) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    featureMap.put(prefix1 + "_" + prefix2 + "_matchnum", f1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    featureMap.put(prefix1 + "_" + prefix2 + "_maxscore", f3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    featureMap.put(prefix1 + "_" + prefix2 + "_avgscore", f4) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				               } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val d1: JSONObject = if (record.isNull("d1_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            JSON.parseObject(record.getString("d1_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val d2: JSONObject = if (record.isNull("d2_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            JSON.parseObject(record.getString("d2_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (d1.nonEmpty){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            for (prefix <- List("3h", "6h", "12h", "1d", "3d", "7d")) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val view = if (!d1.containsKey("ad_view_" + prefix)) 0D else d1.getIntValue("ad_view_" + prefix).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val click = if (!d1.containsKey("ad_click_" + prefix)) 0D else d1.getIntValue("ad_click_" + prefix).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val conver = if (!d1.containsKey("ad_conversion_" + prefix)) 0D else d1.getIntValue("ad_conversion_" + prefix).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val income = if (!d1.containsKey("ad_income_" + prefix)) 0D else d1.getIntValue("ad_income_" + prefix).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val f1 = RankExtractorFeature_20240530.calDiv(click, view) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val f2 = RankExtractorFeature_20240530.calDiv(conver, view) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val f3 = RankExtractorFeature_20240530.calDiv(conver, click) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val f4 = conver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val f5 = RankExtractorFeature_20240530.calDiv(income * 1000, view) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              featureMap.put("d1_feature" + "_" + prefix + "_" + "ctr", f1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              featureMap.put("d1_feature" + "_" + prefix + "_" + "ctcvr", f2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              featureMap.put("d1_feature" + "_" + prefix + "_" + "cvr", f3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              featureMap.put("d1_feature" + "_" + prefix + "_" + "conver", f4) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              featureMap.put("d1_feature" + "_" + prefix + "_" + "ecpm", f5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val d1: JSONObject = if (record.isNull("d1_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              JSON.parseObject(record.getString("d1_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val d2: JSONObject = if (record.isNull("d2_feature")) new JSONObject() else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              JSON.parseObject(record.getString("d2_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (d1.nonEmpty) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              for (prefix <- List("3h", "6h", "12h", "1d", "3d", "7d")) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val view = if (!d1.containsKey("ad_view_" + prefix)) 0D else d1.getIntValue("ad_view_" + prefix).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val click = if (!d1.containsKey("ad_click_" + prefix)) 0D else d1.getIntValue("ad_click_" + prefix).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val conver = if (!d1.containsKey("ad_conversion_" + prefix)) 0D else d1.getIntValue("ad_conversion_" + prefix).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val income = if (!d1.containsKey("ad_income_" + prefix)) 0D else d1.getIntValue("ad_income_" + prefix).toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val f1 = RankExtractorFeature_20240530.calDiv(click, view) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val f2 = RankExtractorFeature_20240530.calDiv(conver, view) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val f3 = RankExtractorFeature_20240530.calDiv(conver, click) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val f4 = conver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val f5 = RankExtractorFeature_20240530.calDiv(income * 1000, view) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put("d1_feature" + "_" + prefix + "_" + "ctr", f1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put("d1_feature" + "_" + prefix + "_" + "ctcvr", f2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put("d1_feature" + "_" + prefix + "_" + "cvr", f3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put("d1_feature" + "_" + prefix + "_" + "conver", f4) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                featureMap.put("d1_feature" + "_" + prefix + "_" + "ecpm", f5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val vidRankMaps = scala.collection.mutable.Map[String, scala.collection.immutable.Map[String, Double]]() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          if (d2.nonEmpty){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            d2.foreach(r => { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val key = r._1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              val value = d2.getString(key).split(",").map(r=> { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                val rList = r.split(":") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                (rList(0), rList(2).toDouble) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              }).toMap 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              vidRankMaps.put(key, value) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          for (prefix1 <- List("ctr", "ctcvr", "ecpm")) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            for (prefix2 <- List("1d", "3d", "7d", "14d")) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              if (vidRankMaps.contains(prefix1 + "_" + prefix2)){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                val rank = vidRankMaps(prefix1 + "_" + prefix2).getOrDefault(cid, 0.0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                if (rank >= 1.0){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                  featureMap.put("vid_rank_" + prefix1 + "_" + prefix2, 1.0 / rank) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val vidRankMaps = scala.collection.mutable.Map[String, scala.collection.immutable.Map[String, Double]]() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (d2.nonEmpty) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              d2.foreach(r => { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val key = r._1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                val value = d2.getString(key).split(",").map(r => { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                  val rList = r.split(":") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                  (rList(0), rList(2).toDouble) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                }).toMap 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                vidRankMaps.put(key, value) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            for (prefix1 <- List("ctr", "ctcvr", "ecpm")) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              for (prefix2 <- List("1d", "3d", "7d", "14d")) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if (vidRankMaps.contains(prefix1 + "_" + prefix2)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                  val rank = vidRankMaps(prefix1 + "_" + prefix2).getOrDefault(cid, 0.0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                  if (rank >= 1.0) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    featureMap.put("vid_rank_" + prefix1 + "_" + prefix2, 1.0 / rank) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				               } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          /* 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          广告 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            sparse:cid adid adverid targeting_conversion 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            cpa --> 1个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            adverid下的 3h 6h 12h 1d 3d 7d 、 ctr ctcvr cvr conver ecpm  --> 30个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            cid下的 3h 6h 12h 1d 3d 7d 、 ctr ctcvr cvr ecpm conver --> 30个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            地理//cid下的 3h 6h 12h 1d 3d 7d 、 ctr ctcvr cvr ecpm conver --> 30个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            app//cid下的 3h 6h 12h 1d 3d 7d 、 ctr ctcvr cvr ecpm conver --> 30个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            手机品牌//cid下的 3h 6h 12h 1d 3d 7d 、 ctr ctcvr cvr ecpm conver --> 30个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            系统 无数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            week//cid下的 7d 14d、 ctr ctcvr cvr ecpm conver --> 10个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            hour//cid下的 7d 14d、 ctr ctcvr cvr ecpm conver --> 10个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          用户 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            用户历史 点击/转化 的title tag;3d 7d 14d; cid的title; 数量/最高分/平均分 --> 18个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            用户历史 14d 看过/点过/转化次数/income; ctr cvr ctcvr ecpm;  --> 8个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            用户到cid的ui特征 --> 10个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              1/用户最近看过这个cid的时间间隔 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              1/用户最近点过这个cid的时间间隔 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              1/用户最近转过这个cid的时间间隔 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              用户看过这个cid多少次 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              用户点过这个cid多少次 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              用户转过这个cid多少次 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              用户对这个cid花了多少钱 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              用户对这个cid的ctr ctcvr cvr 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          视频 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            title与cid的 sim-score-1/-2 无数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            vid//cid下的 3h 6h 12h 1d 3d 7d 、 ctr ctcvr cvr ecpm conver --> 30个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            vid//cid下的 1d 3d 7d 14d、 ctr ctcvr ecpm 的rank值 倒数 --> 12个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-           */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          //4 处理label信息。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val labels = new JSONObject 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          for (labelKey <- List("ad_is_click", "ad_is_conversion")){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if (!record.isNull(labelKey)){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              labels.put(labelKey, record.getString(labelKey)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            /* 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            广告 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              sparse:cid adid adverid targeting_conversion 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              cpa --> 1个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              adverid下的 3h 6h 12h 1d 3d 7d 、 ctr ctcvr cvr conver ecpm  --> 30个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              cid下的 3h 6h 12h 1d 3d 7d 、 ctr ctcvr cvr ecpm conver --> 30个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              地理//cid下的 3h 6h 12h 1d 3d 7d 、 ctr ctcvr cvr ecpm conver --> 30个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              app//cid下的 3h 6h 12h 1d 3d 7d 、 ctr ctcvr cvr ecpm conver --> 30个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              手机品牌//cid下的 3h 6h 12h 1d 3d 7d 、 ctr ctcvr cvr ecpm conver --> 30个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              系统 无数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              week//cid下的 7d 14d、 ctr ctcvr cvr ecpm conver --> 10个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              hour//cid下的 7d 14d、 ctr ctcvr cvr ecpm conver --> 10个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            用户 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              用户历史 点击/转化 的title tag;3d 7d 14d; cid的title; 数量/最高分/平均分 --> 18个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              用户历史 14d 看过/点过/转化次数/income; ctr cvr ctcvr ecpm;  --> 8个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              用户到cid的ui特征 --> 10个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                1/用户最近看过这个cid的时间间隔 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                1/用户最近点过这个cid的时间间隔 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                1/用户最近转过这个cid的时间间隔 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                用户看过这个cid多少次 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                用户点过这个cid多少次 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                用户转过这个cid多少次 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                用户对这个cid花了多少钱 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                用户对这个cid的ctr ctcvr cvr 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            视频 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              title与cid的 sim-score-1/-2 无数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              vid//cid下的 3h 6h 12h 1d 3d 7d 、 ctr ctcvr cvr ecpm conver --> 30个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              vid//cid下的 1d 3d 7d 14d、 ctr ctcvr ecpm 的rank值 倒数 --> 12个 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+             */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            //4 处理label信息。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val labels = new JSONObject 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            for (labelKey <- List("ad_is_click", "ad_is_conversion")) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              if (!record.isNull(labelKey)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                labels.put(labelKey, record.getString(labelKey)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          //5 处理log key表头。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val apptype = record.getString("apptype") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val mid = record.getString("mid") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val headvideoid = record.getString("headvideoid") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val logKey = (apptype, mid, cid, ts, headvideoid).productIterator.mkString(",") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val labelKey = labels.toString() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          val featureKey = featureMap.toString() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          //6 拼接数据,保存。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          logKey + "\t" + labelKey + "\t" + featureKey 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      // 4 保存数据到hdfs 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      val savePartition = dt + hh 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      val hdfsPath = savePath + "/" + savePartition 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        println("删除路径并开始数据写入:" + hdfsPath) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        MyHdfsUtils.delete_hdfs_path(hdfsPath) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        odpsData.coalesce(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      }else{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        println("路径不合法,无法写入:" + hdfsPath) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            //5 处理log key表头。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val apptype = record.getString("apptype") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val mid = record.getString("mid") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val headvideoid = record.getString("headvideoid") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val logKey = (apptype, mid, cid, ts, headvideoid).productIterator.mkString(",") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val labelKey = labels.toString() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val featureKey = featureMap.toString() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            //6 拼接数据,保存。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            logKey + "\t" + labelKey + "\t" + featureKey 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        // 4 保存数据到hdfs 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        val savePartition = dt + hh 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        val hdfsPath = savePath + "/" + savePartition 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          println("删除路径并开始数据写入:" + hdfsPath) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          MyHdfsUtils.delete_hdfs_path(hdfsPath) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          odpsData.coalesce(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          println("路径不合法,无法写入:" + hdfsPath) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 |