Browse Source

i2i样本制作,第2步。

zhangbo 5 months ago
parent
commit
aea448f394

+ 7 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_dssm/makedata_i2i_02_joinFeatureData_20241128.scala

@@ -32,6 +32,7 @@ object makedata_i2i_02_joinFeatureData_20241128 {
     val project = param.getOrElse("project", "loghubods")
     val repartition = param.getOrElse("repartition", "100").toInt
     val filterHours = param.getOrElse("filterHours", "25").split(",").toSet
+    val ifDebug = param.getOrElse("ifDebug", "false").toBoolean
     // 2 读取odps+表信息
     val odpsOps = env.getODPS(sc)
     // 3 循环执行数据生产
@@ -133,11 +134,16 @@ object makedata_i2i_02_joinFeatureData_20241128 {
               val feature_left_cate2 = category1.getOrElse(cate2_left, "{}")
               val feature_right_cate1 = category2.getOrElse(cate1_right, "{}")
               val feature_right_cate2 = category2.getOrElse(cate2_right, "{}")
-              (logKey, label, vid_left, vid_right, feature_left, feature_right, feature_left_action, feature_right_action,
+              result.add(
+                (logKey, label, vid_left, vid_right, feature_left, feature_right, feature_left_action, feature_right_action,
                 feature_left_cate1, feature_right_cate1, feature_left_cate2, feature_right_cate2)
+              )
           }
           result.iterator
         })
+        if (ifDebug){
+          println("数据量:" + sampleData1.count())
+        }
         val hdfsPath = savePath + "/" + savePartition
         if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
           println("删除路径并开始数据写入:" + hdfsPath)

+ 10 - 1
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本-I2I

@@ -3,4 +3,13 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
 beginStr:2024112612 endStr:2024112612 negCnt:20 \
-tablePart:64 savePath:/dw/recommend/model/51_dssm_i2i_sample/ > p51.log 2>&1 &
+tablePart:64 savePath:/dw/recommend/model/51_dssm_i2i_sample/ > p51.log 2>&1 &
+
+nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata_dssm.makedata_i2i_02_joinFeatureData_20241128 \
+--master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 32 \
+./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+beginStr:2024112612 endStr:2024112612 \
+tablePart:64 \
+readPath:/dw/recommend/model/51_dssm_i2i_sample/ \
+savePath:/dw/recommend/model/52_dssm_i2i_joinfeature/ > p52.log 2>&1 &