|  | @@ -40,7 +40,6 @@ object makedata_recsys_82_originData_20250221 {
 | 
	
		
			
				|  |  |          (mid, record)
 | 
	
		
			
				|  |  |        })
 | 
	
		
			
				|  |  |        .filter(_._1.nonEmpty)
 | 
	
		
			
				|  |  | -      .reduceByKey((a, b) => a)
 | 
	
		
			
				|  |  |        .flatMap(raw => {
 | 
	
		
			
				|  |  |          val result = new ArrayBuffer[(String, String)]
 | 
	
		
			
				|  |  |          for (hVid <- ConvertUtils.getVidList(raw._2.getString("c9_feature"))) {
 | 
	
	
		
			
				|  | @@ -52,6 +51,7 @@ object makedata_recsys_82_originData_20250221 {
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |    private def getMidSeqRdd(vidMidRdd: RDD[(String, String)], videoRdd: RDD[(String, java.util.Map[String, String])]): RDD[(String, List[java.util.Map[String, String]])] = {
 | 
	
		
			
				|  |  |      vidMidRdd
 | 
	
		
			
				|  |  | +      .reduceByKey((a, b) => a)
 | 
	
		
			
				|  |  |        .join(videoRdd)
 | 
	
		
			
				|  |  |        .map(raw => {
 | 
	
		
			
				|  |  |          (raw._2._1, raw._2._2)
 | 
	
	
		
			
				|  | @@ -62,32 +62,13 @@ object makedata_recsys_82_originData_20250221 {
 | 
	
		
			
				|  |  |        })
 | 
	
		
			
				|  |  |    }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -  private def joinVideoMap(logRdd: RDD[Record], videoRdd: RDD[(String, java.util.Map[String, String])]): RDD[(Record, List[java.util.Map[String, String]])] = {
 | 
	
		
			
				|  |  | -    val midLogRdd = logRdd
 | 
	
		
			
				|  |  | +  private def joinMidSeq(logRdd: RDD[Record], midSeqRdd: RDD[(String, List[java.util.Map[String, String]])]): RDD[(Record, List[java.util.Map[String, String]])] = {
 | 
	
		
			
				|  |  | +    logRdd
 | 
	
		
			
				|  |  |        .map(record => {
 | 
	
		
			
				|  |  |          val mid = record.getString("mid")
 | 
	
		
			
				|  |  |          (mid, record)
 | 
	
		
			
				|  |  |        })
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    val midSeqRdd = midLogRdd
 | 
	
		
			
				|  |  | -      .filter(_._1.nonEmpty)
 | 
	
		
			
				|  |  | -      .flatMap(raw => {
 | 
	
		
			
				|  |  | -        val result = new ArrayBuffer[(String, String)]
 | 
	
		
			
				|  |  | -        for (hVid <- ConvertUtils.getVidList(raw._2.getString("c9_feature"))) {
 | 
	
		
			
				|  |  | -          result += ((hVid, raw._1)) // (vid, mid)
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        result
 | 
	
		
			
				|  |  | -      })
 | 
	
		
			
				|  |  | -      .join(videoRdd) // (vid, (mid, map))
 | 
	
		
			
				|  |  | -      .map(raw => {
 | 
	
		
			
				|  |  | -        (raw._2._1, raw._2._2)
 | 
	
		
			
				|  |  | -      })
 | 
	
		
			
				|  |  | -      .groupByKey()
 | 
	
		
			
				|  |  | -      .map(raw => {
 | 
	
		
			
				|  |  | -        (raw._1, raw._2.toList)
 | 
	
		
			
				|  |  | -      })
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    midLogRdd.leftOuterJoin(midSeqRdd)
 | 
	
		
			
				|  |  | +      .leftOuterJoin(midSeqRdd)
 | 
	
		
			
				|  |  |        .map(raw => {
 | 
	
		
			
				|  |  |          (raw._2._1, raw._2._2.orNull)
 | 
	
		
			
				|  |  |        })
 | 
	
	
		
			
				|  | @@ -152,13 +133,13 @@ object makedata_recsys_82_originData_20250221 {
 | 
	
		
			
				|  |  |        // f. get mid seq rdd
 | 
	
		
			
				|  |  |        val midSeqRdd = getMidSeqRdd(vidMidRdd, uniqVideo)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -      // e. 历史行为关联video
 | 
	
		
			
				|  |  | -      val seqSampleData = joinVideoMap(resampleData, uniqVideo)
 | 
	
		
			
				|  |  | +      // g. 历史行为关联video
 | 
	
		
			
				|  |  | +      val seqSampleData = joinMidSeq(resampleData, midSeqRdd)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -      // d. 特征转换
 | 
	
		
			
				|  |  | +      // h. 特征转换
 | 
	
		
			
				|  |  |        val featureData = getFeature(seqSampleData)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -      // f. 保存数据
 | 
	
		
			
				|  |  | +      // i. 保存数据
 | 
	
		
			
				|  |  |        val hdfsPath = "%s/%s%s".format(savePath, dt, hh)
 | 
	
		
			
				|  |  |        DataUtils.saveData(featureData, hdfsPath, repartition)
 | 
	
		
			
				|  |  |      }
 |