Selaa lähdekoodia

i2i样本制作,第2步。

zhangbo 5 kuukautta sitten
vanhempi
commit
3a7038eeab

+ 2 - 2
src/main/scala/com/aliyun/odps/spark/examples/makedata_dssm/makedata_i2i_02_joinFeatureData_20241128.scala

@@ -131,8 +131,8 @@ object makedata_i2i_02_joinFeatureData_20241128 {
               val cate1_right = JSON.parseObject(feature_right).getOrDefault("category1", "无").toString
               val cate2_right = JSON.parseObject(feature_right).getOrDefault("category2_1", "无").toString
               val feature_left_cate1 = category1.getOrElse(cate1_left, "{}")
-              val feature_left_cate2 = category1.getOrElse(cate2_left, "{}")
-              val feature_right_cate1 = category2.getOrElse(cate1_right, "{}")
+              val feature_left_cate2 = category2.getOrElse(cate2_left, "{}")
+              val feature_right_cate1 = category1.getOrElse(cate1_right, "{}")
               val feature_right_cate2 = category2.getOrElse(cate2_right, "{}")
               result.add(
                 (logKey, label, vid_left, vid_right, feature_left, feature_right, feature_left_action, feature_right_action,

+ 29 - 1
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本-I2I

@@ -12,4 +12,32 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 beginStr:2024112612 endStr:2024112612 \
 tablePart:64 \
 readPath:/dw/recommend/model/51_dssm_i2i_sample/ \
-savePath:/dw/recommend/model/52_dssm_i2i_joinfeature/ > p52.log 2>&1 &
+savePath:/dw/recommend/model/52_dssm_i2i_joinfeature/ > p52.log 2>&1 &
+
+nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata_dssm.makedata_i2i_03_onehotFile_20241128 \
+--master yarn --driver-memory 16G --executor-memory 2G --executor-cores 1 --num-executors 32 \
+./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+dt:20241128 \
+tablePart:64 \
+readPath:/dw/recommend/model/53_dssm_i2i_onehot/20241128 \
+savePath:/dw/recommend/model/53_dssm_i2i_onehot/20241128 > p53.log 2>&1 &
+
+数据量:3415597打印各个特征多少枚举值:
+vid       3407301
+video_style     6517
+valid_time      728
+captions_color  656
+cate2   67
+audience_age_group      65
+audience_value_type     61
+font_size       49
+cover_persons_num       44
+cate1   42
+audience_gender 37
+sentiment_tendency      11
+video_type      8
+background_music_type   6
+captions        3
+has_end_credit_guide    2
+