jch hace 2 meses
padre
commit
4961845f64

+ 4 - 3
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_82_originData_20250221.scala

@@ -37,12 +37,14 @@ object makedata_recsys_82_originData_20250221 {
     logRdd
       .map(record => {
         val mid = record.getString("mid")
-        (mid, record)
+        val c9 = record.getString("c9_feature")
+        (mid, c9)
       })
       .filter(_._1.nonEmpty)
+      .reduceByKey((a, b) => a)
       .flatMap(raw => {
         val result = new ArrayBuffer[(String, String)]
-        for (hVid <- ConvertUtils.getVidList(raw._2.getString("c9_feature"))) {
+        for (hVid <- ConvertUtils.getVidList(raw._2)) {
           result += ((hVid, raw._1)) // (vid, mid)
         }
         result
@@ -51,7 +53,6 @@ object makedata_recsys_82_originData_20250221 {
 
   private def getMidSeqRdd(vidMidRdd: RDD[(String, String)], videoRdd: RDD[(String, java.util.Map[String, String])]): RDD[(String, List[java.util.Map[String, String]])] = {
     vidMidRdd
-      .reduceByKey((a, b) => a)
       .join(videoRdd)
       .map(raw => {
         (raw._2._1, raw._2._2)