Kaynağa Gözat

新模型特征+分析汤姆森数据

zhangbo 11 ay önce
ebeveyn
işleme
550915857e

+ 3 - 1
src/main/scala/com/aliyun/odps/spark/examples/ana/ana_01_cidvidpk.scala

@@ -36,6 +36,7 @@ object ana_01_cidvidpk {
     val vidSelect = param.getOrElse("vidSelect", "")
     val cidsSelect = param.getOrElse("cidsSelect", "").split(",").toSet
     val apptypeSelect = param.getOrElse("apptype", "")
+    val partitionPrefix = param.getOrElse("partitionPrefix", "dt=")
 
     // 2 读取odps+表信息
     val odpsOps = env.getODPS(sc)
@@ -47,13 +48,14 @@ object ana_01_cidvidpk {
         "3000", "3500", "4000", "4500", "5000", "5500"
       )){
         val partition = dt_hh + mi
+        println("partition:" + partition)
         partitions.add(partition)
       }
     }
     val rdds = partitions.map(p => {
       odpsOps.readTable(project = project,
         table = table,
-        partition = p,
+        partition = partitionPrefix + p,
         transfer = func,
         numPartition = tablePart)
     }).reduce((r1, r2) => r1.union(r2))

+ 7 - 0
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本【分析】

@@ -0,0 +1,7 @@
+nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.ana.ana_01_cidvidpk \
+--master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 32 \
+./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+beginStr:2024060208 endStr:2024060223 \
+vidSelect:21006075 cidsSelect:1902,1310 apptype:0 \
+> p01_ana.log 2>&1 &