| 
														
															@@ -33,6 +33,34 @@ object makedata_recsys_82_originData_20250221 { 
														 | 
													
												
											
												
													
														| 
														 | 
														
															       .reduceByKey((a, b) => if (a.size() > b.size()) a else b) 
														 | 
														
														 | 
														
															       .reduceByKey((a, b) => if (a.size() > b.size()) a else b) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															   } 
														 | 
														
														 | 
														
															   } 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+  private def getVidMidRdd(logRdd: RDD[Record]): RDD[(String, String)] = { 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    logRdd 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+      .map(record => { 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        val mid = record.getString("mid") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        (mid, record) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+      }) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+      .filter(_._1.nonEmpty) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+      .flatMap(raw => { 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        val result = new ArrayBuffer[(String, String)] 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        for (hVid <- ConvertUtils.getVidList(raw._2.getString("c9_feature"))) { 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+          result += ((hVid, raw._1)) // (vid, mid) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        } 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        result 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+      }) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+  } 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+ 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+  private def getMidSeqRdd(vidMidRdd: RDD[(String, String)], videoRdd: RDD[(String, java.util.Map[String, String])]): RDD[(String, List[java.util.Map[String, String]])] = { 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    vidMidRdd 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+      .join(videoRdd) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+      .map(raw => { 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        (raw._2._1, raw._2._2) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+      }) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+      .groupByKey() 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+      .map(raw => { 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        (raw._1, raw._2.toList) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+      }) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+  } 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+ 
														 | 
													
												
											
												
													
														| 
														 | 
														
															   private def joinVideoMap(logRdd: RDD[Record], videoRdd: RDD[(String, java.util.Map[String, String])]): RDD[(Record, List[java.util.Map[String, String]])] = { 
														 | 
														
														 | 
														
															   private def joinVideoMap(logRdd: RDD[Record], videoRdd: RDD[(String, java.util.Map[String, String])]): RDD[(Record, List[java.util.Map[String, String]])] = { 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     val midLogRdd = logRdd 
														 | 
														
														 | 
														
															     val midLogRdd = logRdd 
														 | 
													
												
											
												
													
														| 
														 | 
														
															       .map(record => { 
														 | 
														
														 | 
														
															       .map(record => { 
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -117,6 +145,12 @@ object makedata_recsys_82_originData_20250221 { 
														 | 
													
												
											
												
													
														| 
														 | 
														
															       // d. 样本重采样 
														 | 
														
														 | 
														
															       // d. 样本重采样 
														 | 
													
												
											
												
													
														| 
														 | 
														
															       val resampleData = DataUtils.resample(whatLabel, fuSampleRate, odpsData) 
														 | 
														
														 | 
														
															       val resampleData = DataUtils.resample(whatLabel, fuSampleRate, odpsData) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+      // e. get vid mid rdd 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+      val vidMidRdd = getVidMidRdd(resampleData) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+ 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+      // f. get mid seq rdd 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+      val midSeqRdd = getMidSeqRdd(vidMidRdd, uniqVideo) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+ 
														 | 
													
												
											
												
													
														| 
														 | 
														
															       // e. 历史行为关联video 
														 | 
														
														 | 
														
															       // e. 历史行为关联video 
														 | 
													
												
											
												
													
														| 
														 | 
														
															       val seqSampleData = joinVideoMap(resampleData, uniqVideo) 
														 | 
														
														 | 
														
															       val seqSampleData = joinVideoMap(resampleData, uniqVideo) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 |