| 
					
				 | 
			
			
				@@ -33,6 +33,34 @@ object makedata_recsys_82_originData_20250221 { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       .reduceByKey((a, b) => if (a.size() > b.size()) a else b) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  private def getVidMidRdd(logRdd: RDD[Record]): RDD[(String, String)] = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    logRdd 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      .map(record => { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        val mid = record.getString("mid") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        (mid, record) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      .filter(_._1.nonEmpty) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      .flatMap(raw => { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        val result = new ArrayBuffer[(String, String)] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for (hVid <- ConvertUtils.getVidList(raw._2.getString("c9_feature"))) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          result += ((hVid, raw._1)) // (vid, mid) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        result 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  private def getMidSeqRdd(vidMidRdd: RDD[(String, String)], videoRdd: RDD[(String, java.util.Map[String, String])]): RDD[(String, List[java.util.Map[String, String]])] = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    vidMidRdd 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      .join(videoRdd) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      .map(raw => { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        (raw._2._1, raw._2._2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      .groupByKey() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      .map(raw => { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        (raw._1, raw._2.toList) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   private def joinVideoMap(logRdd: RDD[Record], videoRdd: RDD[(String, java.util.Map[String, String])]): RDD[(Record, List[java.util.Map[String, String]])] = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     val midLogRdd = logRdd 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       .map(record => { 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -117,6 +145,12 @@ object makedata_recsys_82_originData_20250221 { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       // d. 样本重采样 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       val resampleData = DataUtils.resample(whatLabel, fuSampleRate, odpsData) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      // e. get vid mid rdd 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      val vidMidRdd = getVidMidRdd(resampleData) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      // f. get mid seq rdd 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      val midSeqRdd = getMidSeqRdd(vidMidRdd, uniqVideo) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       // e. 历史行为关联video 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       val seqSampleData = joinVideoMap(resampleData, uniqVideo) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 |