Bladeren bron

新模型特征+分析汤姆森数据

zhangbo 11 maanden geleden
bovenliggende
commit
f701da0d48

+ 13 - 12
src/main/scala/com/aliyun/odps/spark/examples/ana/ana_01_cidvidpk.scala

@@ -83,13 +83,13 @@ object ana_01_cidvidpk {
           })
           var rankId = ranks_json.get(0)._1
           var score = ranks_json.get(0)._2
-          for (i <- 1 until ranks_json.size){
-            val item = ranks_json.get(i)
-            if (item._2 > score){
-              rankId = item._1
-              score = item._2
-            }
-          }
+//          for (i <- 1 until ranks_json.size){
+//            val item = ranks_json.get(i)
+//            if (item._2 > score){
+//              rankId = item._1
+//              score = item._2
+//            }
+//          }
           (apptype, abcode, vid, recalls_json, rankId)
       }.flatMap({
         case (apptype, abcode, vid, recalls_json, rankId) =>
@@ -102,13 +102,14 @@ object ana_01_cidvidpk {
           val x1 = 1
           val x2 = if (recallId.equals(rankId)) 1 else 0
           val x3 = if (cidsSelect.subsetOf(recalls_json)) 1 else 0
-          val x4 = if (cidsSelect.subsetOf(recalls_json) && recallId.equals(rankId)) 1 else 0
-          ((apptype, abcode, vid, recallId), (x1, x2, x3, x4))
+          val x4 = if (cidsSelect.subsetOf(recalls_json) && cidsSelect.contains(rankId)) 1 else 0
+          val x5 = if (cidsSelect.subsetOf(recalls_json) && recallId.equals(rankId)) 1 else 0
+          ((apptype, abcode, vid, recallId), (x1, x2, x3, x4, x5))
       }).aggregateByKey(
-        (0, 0, 0, 0)
+        (0, 0, 0, 0, 0)
       )(
-        seqOp = (runningSum, x) => (runningSum._1 + x._1, runningSum._2 + x._2, runningSum._3 + x._3, runningSum._4 + x._4),
-        combOp = (sum1, sum2) => (sum1._1 + sum2._1, sum1._2 + sum2._2, sum1._3 + sum2._3, sum1._4 + sum2._4)
+        seqOp = (runningSum, x) => (runningSum._1 + x._1, runningSum._2 + x._2, runningSum._3 + x._3, runningSum._4 + x._4, runningSum._5 + x._5),
+        combOp = (sum1, sum2) => (sum1._1 + sum2._1, sum1._2 + sum2._2, sum1._3 + sum2._3, sum1._4 + sum2._4, sum1._5 + sum2._5)
       )
 
     data.collect().foreach(r => println("结果\t" + r._1.productIterator.mkString("\t") + "\t" + r._2.productIterator.mkString("\t")))

+ 1 - 0
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本【分析】

@@ -2,6 +2,7 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --class com.aliyun.odps.spark.examples.ana.ana_01_cidvidpk \
 --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 32 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+tablePart:5 \
 beginStr:2024060208 endStr:2024060223 \
 vidSelect:21006075 cidsSelect:1902,1310 apptype:0 \
 > p01_ana.log 2>&1 &