Browse Source

分桶文件更新

zhangbo 10 months ago
parent
commit
7f8d051ab3

+ 1 - 2
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_14_valueData_20240608.scala

@@ -62,7 +62,6 @@ object makedata_14_valueData_20240608 {
         val contentList = contentList_bc.value
         row.foreach {
           case (logKey, labelKey, featureKey) =>
-//            val label = JSON.parseObject(labelKey).getOrDefault("is_return", "0").toString
             val featureJson = JSON.parseObject(featureKey)
 
             val featureValues = contentList.map(key => {
@@ -82,7 +81,7 @@ object makedata_14_valueData_20240608 {
       if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
         println("删除路径并开始数据写入:" + hdfsPath)
         MyHdfsUtils.delete_hdfs_path(hdfsPath)
-        data1.coalesce(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
+        data1.repartition(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
       } else {
         println("路径不合法,无法写入:" + hdfsPath)
       }

+ 6 - 6
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本

@@ -69,19 +69,19 @@ savePath:/dw/recommend/model/04_str_data/ beginStr:20240311 endStr:20240312 feat
 
 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
 --class com.aliyun.odps.spark.examples.makedata.makedata_13_originData_20240529 \
---master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
+--master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 32 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-tablePart:32 \
-beginStr:2024060716 endStr:2024060723 \
+tablePart:32 repartition:32 \
+beginStr:2024060712 endStr:2024060716 \
 table:alg_recsys_sample_all \
-> p13_data060716.log 2>&1 &
+> p13_data060712.log 2>&1 &
 
 
 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
 --class com.aliyun.odps.spark.examples.makedata.makedata_14_valueData_20240608 \
 --master yarn --driver-memory 1G --executor-memory 3G --executor-cores 1 --num-executors 32 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-beginStr:20240606 endStr:20240606 repartition:1000 \
+beginStr:20240606 endStr:20240606 repartition:200 \
 > p14_data.log 2>&1 &
 
 
@@ -99,7 +99,7 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --class com.aliyun.odps.spark.examples.makedata.makedata_16_bucketData_20240609 \
 --master yarn --driver-memory 1G --executor-memory 4G --executor-cores 1 --num-executors 16 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-beginStr:20240606 endStr:20240606 repartition:1000 \
+beginStr:20240607 endStr:20240607 repartition:400 \
 > p16_data.log 2>&1 &
 
 

+ 2 - 2
zhangbo/01_train.sh

@@ -11,5 +11,5 @@ HADOOP="/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop"
 $HADOOP fs -text ${train_path}/${day}/* | /root/sunmingze/alphaFM/bin/fm_train -m model/${model_name}_${day}.txt -dim ${bias} -core 8
 # -v_l1 ${v_l1} -v_l2 ${v_l2}
 
-# nohup sh 01_train.sh 20240606 /dw/recommend/model/16_train_data/ model_fuck 1,1,8 >p1_model_fuck.log 2>&1 &
-
+# nohup sh 01_train.sh 20240606 /dw/recommend/model/16_train_data/ model_aka8 1,1,8 >p1_model_aka8.log 2>&1 &
+# nohup sh 01_train.sh 20240606 /dw/recommend/model/16_train_data/ model_aka0 1,1,0 >p1_model_aka0.log 2>&1 &