Sfoglia il codice sorgente

删除sparse特征

zhangbo 10 mesi fa
parent
commit
c603d2add6

+ 3 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/makedata_ad_33_bucketData_20240622.scala

@@ -86,7 +86,9 @@ object makedata_ad_33_bucketData_20240622 {
               val featuresBucket = features.map{
                 case (name, score) =>
                   var ifFilter = false
-                  filterNames.foreach(r=> if (!ifFilter && name.startsWith(r)) {ifFilter = true} )
+                  if (filterNames.nonEmpty){
+                    filterNames.foreach(r=> if (!ifFilter && name.startsWith(r)) {ifFilter = true} )
+                  }
                   if (ifFilter){
                     ""
                   }else{

+ 6 - 4
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本-广告

@@ -6,10 +6,10 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
 tablePart:64 repartition:32 \
-beginStr:2024062308 endStr:2024062311 \
+beginStr:2024062412 endStr:2024062423 \
 savePath:/dw/recommend/model/31_ad_sample_data/ \
 table:alg_recsys_ad_sample_all \
-> p31_2024062308.log 2>&1 &
+> p31_2024062412.log 2>&1 &
 
 
 
@@ -26,10 +26,12 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --class com.aliyun.odps.spark.examples.makedata_ad.makedata_ad_33_bucketData_20240622 \
 --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-beginStr:20240623 endStr:20240623 repartition:400 \
+beginStr:20240623 endStr:20240624 repartition:400 \
+> p33_data.log 2>&1 &
+
+
 filterNames:cid_,adid_,adverid_,targeting_conversion_ \
 savePath:/dw/recommend/model/33_ad_train_data_nosparse/ \
-> p33_data.log 2>&1 &
 
 
 /dw/recommend/model/31_ad_sample_data/

+ 1 - 2
zhangbo/01_train.sh

@@ -16,9 +16,8 @@ $HADOOP fs -text ${train_path}/${day}/* | /root/sunmingze/alphaFM/bin/fm_train -
 # nohup sh 01_train.sh 20240606 /dw/recommend/model/16_train_data/ model_aka4 1,1,4 >p1_model_aka4.log 2>&1 &
 
 
-
+# nohup sh 01_train.sh 20240620 /dw/recommend/model/33_ad_train_data_nosparse/ model_bkb0_3 1,1,0 >p1_model_bkb0.log 2>&1 &
 # nohup sh 01_train.sh 20240620 /dw/recommend/model/33_ad_train_data/ model_bkb8_2 1,1,8 >p1_model_bkb8_2.log 2>&1 &
-# nohup sh 01_train.sh 20240620 /dw/recommend/model/33_ad_train_data/ model_bkb0_2 1,1,0 >p1_model_bkb0_2.log 2>&1 &
 # nohup sh 01_train.sh 20240620 /dw/recommend/model/33_ad_train_data/ model_bkb4 1,1,4 >p1_model_bkb4.log 2>&1 &
 # nohup sh 01_train.sh 20240620 /dw/recommend/model/33_ad_train_data/ model_bkb12 1,1,12 >p1_model_bkb12.log 2>&1 &
 # nohup sh 01_train.sh 20240620 /dw/recommend/model/33_ad_train_data/ model_bkb16 1,1,16 >p1_model_bkb16.log 2>&1 &

+ 3 - 1
zhangbo/02_train_go.sh

@@ -25,4 +25,6 @@ done
 # nohup sh 02_train_go.sh 20240615 20240616 model_aka8 /dw/recommend/model/16_train_data/ 1,1,8 >p2_model_aka8.log 2>&1 &
 
 
-# nohup sh 02_train_go.sh 20240622 20240623 model_bkb0 /dw/recommend/model/33_ad_train_data/ 1,1,0 >p2_model_bkb0.log 2>&1 &
+# nohup sh 02_train_go.sh 20240623 20240624 model_bkb0 /dw/recommend/model/33_ad_train_data/ 1,1,0 >p2_model_bkb0.log 2>&1 &
+
+# nohup sh 02_train_go.sh 20240621 20240623 model_bkb0_3 /dw/recommend/model/33_ad_train_data_nosparse/ 1,1,0 >p2_model_bkb0.log 2>&1 &

+ 1 - 1
zhangbo/03_predict.sh

@@ -35,7 +35,7 @@ cat predict/${output_file}_$day.txt | /root/sunmingze/AUC/AUC
 
 
 
-# nohup sh 03_predict.sh 20240623 /dw/recommend/model/33_ad_train_data/ model_bkb0_20240622.txt model_bkb0_20240622 0 >p3_model_bkb0.log 2>&1 &
+# nohup sh 03_predict.sh 20240622 /dw/recommend/model/33_ad_train_data_nosparse/ model_bkb0_3_20240621.txt model_bkb0_3_20240621 0 >p3_model_bkb0.log 2>&1 &
 # nohup sh 03_predict.sh 20240621 /dw/recommend/model/33_ad_train_data/ model_bkb4_20240620.txt model_bkb4_20240620 4 >p3_model_bkb4.log 2>&1 &
 # nohup sh 03_predict.sh 20240623 /dw/recommend/model/33_ad_train_data/ model_bkb8_20240622.txt model_bkb8_20240622 8 >p3_model_bkb8.log 2>&1 &
 # nohup sh 03_predict.sh 20240621 /dw/recommend/model/33_ad_train_data/ model_bkb12_20240620.txt model_bkb12_20240620 12 >p3_model_bkb12.log 2>&1 &