zhangbo 1 year ago
parent
commit
1648d8ae5e

+ 28 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_07_strData.scala

@@ -113,6 +113,33 @@ object makedata_07_strData {
           bytesFeatureExtractor.makeFeature4String(feaMap)
           val featureMap = bytesFeatureExtractor.featureMap
           label + "\t" + featureMap.entries().map(r => r.getValue.getIdentifier + ":1").mkString("\t")
+        }else if ("v4".equals(featureVersion)){
+          val feaSet = Set(
+            "ctx_week", "ctx_hour", "ctx_region", "ctx_city",
+            "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_system",
+            "u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
+            "u_3day_exp_cnt", "u_3day_click_cnt", "u_3day_share_cnt", "u_3day_return_cnt",
+            "total_time", "play_count_total",
+            "i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
+            "i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt",
+            "u_1day_ctr", "u_1day_str", "u_1day_rov", "u_1day_ros",
+            "u_3day_ctr", "u_3day_str", "u_3day_rov", "u_3day_ros",
+            "i_1day_ctr", "i_1day_str", "i_1day_rov", "i_1day_ros",
+            "i_3day_ctr", "i_3day_str", "i_3day_rov", "i_3day_ros",
+
+            "i_1day_ctr_rt", "i_1day_str_rt", "i_1day_ros_rt", "i_1day_rov_rt",
+            "i_1h_ctr_rt", "i_1h_str_rt", "i_1h_ros_rt", "i_1h_rov_rt"
+          )
+          val feaMap = new util.HashMap[String, String]()
+          feaSet.foreach(r => {
+            if (feaJson.containsKey(r)) {
+              feaMap.put(r, feaJson.getString(r))
+            }
+          })
+          val bytesFeatureExtractor = new OfflineVlogShareLRFeatureExtractorV2()
+          bytesFeatureExtractor.makeFeature4String(feaMap)
+          val featureMap = bytesFeatureExtractor.featureMap
+          label + "\t" + featureMap.entries().map(r => r.getValue.getIdentifier + ":1").mkString("\t")
         }
 
       })
@@ -121,7 +148,7 @@ object makedata_07_strData {
       if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")){
         println("删除路径并开始数据写入:" + hdfsPath)
         MyHdfsUtils.delete_hdfs_path(hdfsPath)
-        data.saveAsTextFile(hdfsPath, classOf[GzipCodec])
+        data.repartition(100).saveAsTextFile(hdfsPath, classOf[GzipCodec])
       }else{
         println("路径不合法,无法写入:" + hdfsPath)
       }

+ 6 - 1
zhangbo/01_train.sh

@@ -1,3 +1,7 @@
+#!/bin/sh
+set -e
+set -x
+
 day=$1
 train_path=$2
 model_name=$3
@@ -12,4 +16,5 @@ $HADOOP fs -text ${train_path}/dt=$day/* | /root/sunmingze/alphaFM/bin/fm_train
 
 
 # nohup sh 01_train.sh 20240114 /dw/recommend/model/01_str_data model_str_big >p1.log 2>&1 &
-# nohup sh 01_train.sh 20240114 /dw/recommend/model/02_str_data model_str_small >p2.log 2>&1 &
+# nohup sh 01_train.sh 20240114 /dw/recommend/model/02_str_data model_str_small >p2.log 2>&1 &
+# nohup sh 01_train.sh 20240114 /dw/recommend/model/03_str_data model_str_mid >p3.log 2>&1 &

+ 5 - 0
zhangbo/03_predict.sh

@@ -1,3 +1,7 @@
+#!/bin/sh
+set -e
+set -x
+
 day=$1
 train_path=$2
 model_name=$3
@@ -9,6 +13,7 @@ cat predict/${output_file}_$day.txt | /root/sunmingze/AUC/AUC
 # str:
 # nohup sh 03_predict.sh 20240115 /dw/recommend/model/01_str_data/ model_str_big_20240114.txt model_str_big >p1_pred.log 2>&1 &
 # nohup sh 03_predict.sh 20240115 /dw/recommend/model/02_str_data/ model_str_small_20240114.txt model_str_small >p2_pred.log 2>&1 &
+# nohup sh 03_predict.sh 20240115 /dw/recommend/model/03_str_data/ model_str_mid_20240114.txt model_str_mid >p3_pred.log 2>&1 &
 
 
 # ros: