فهرست منبع

检查数据 用metafeaturemap

zhangbo 10 ماه پیش
والد
کامیت
f4739868a1

+ 20 - 20
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_13_originData_20240529_check.scala

@@ -57,34 +57,34 @@ object makedata_13_originData_20240529_check {
           val featureMap = new JSONObject()
 
           // a 视频特征
-          val b1: JSONObject = if (record.containsKey("alg_vid_feature_all_exp")) new JSONObject() else
+          val b1: JSONObject = if (!record.containsKey("alg_vid_feature_all_exp")) new JSONObject() else
             JSON.parseObject(record.getString("alg_vid_feature_all_exp"))
-          val b2: JSONObject = if (record.containsKey("alg_vid_feature_all_share")) new JSONObject() else
+          val b2: JSONObject = if (!record.containsKey("alg_vid_feature_all_share")) new JSONObject() else
             JSON.parseObject(record.getString("alg_vid_feature_all_share"))
-          val b3: JSONObject = if (record.containsKey("alg_vid_feature_all_return")) new JSONObject() else
+          val b3: JSONObject = if (!record.containsKey("alg_vid_feature_all_return")) new JSONObject() else
             JSON.parseObject(record.getString("alg_vid_feature_all_return"))
-          val b6: JSONObject = if (record.containsKey("alg_vid_feature_exp2share")) new JSONObject() else
+          val b6: JSONObject = if (!record.containsKey("alg_vid_feature_exp2share")) new JSONObject() else
             JSON.parseObject(record.getString("alg_vid_feature_exp2share"))
-          val b7: JSONObject = if (record.containsKey("alg_vid_feature_share2return")) new JSONObject() else
+          val b7: JSONObject = if (!record.containsKey("alg_vid_feature_share2return")) new JSONObject() else
             JSON.parseObject(record.getString("alg_vid_feature_share2return"))
 
-          val b8: JSONObject = if (record.containsKey("alg_vid_feature_feed_noflow_exp")) new JSONObject() else
+          val b8: JSONObject = if (!record.containsKey("alg_vid_feature_feed_noflow_exp")) new JSONObject() else
             JSON.parseObject(record.getString("alg_vid_feature_feed_noflow_exp"))
-          val b9: JSONObject = if (record.containsKey("alg_vid_feature_feed_noflow_root_share")) new JSONObject() else
+          val b9: JSONObject = if (!record.containsKey("alg_vid_feature_feed_noflow_root_share")) new JSONObject() else
             JSON.parseObject(record.getString("alg_vid_feature_feed_noflow_root_share"))
-          val b10: JSONObject = if (record.containsKey("alg_vid_feature_feed_noflow_root_return")) new JSONObject() else
+          val b10: JSONObject = if (!record.containsKey("alg_vid_feature_feed_noflow_root_return")) new JSONObject() else
             JSON.parseObject(record.getString("alg_vid_feature_feed_noflow_root_return"))
-          val b11: JSONObject = if (record.containsKey("alg_vid_feature_feed_flow_exp")) new JSONObject() else
+          val b11: JSONObject = if (!record.containsKey("alg_vid_feature_feed_flow_exp")) new JSONObject() else
             JSON.parseObject(record.getString("alg_vid_feature_feed_flow_exp"))
-          val b12: JSONObject = if (record.containsKey("alg_vid_feature_feed_flow_root_share")) new JSONObject() else
+          val b12: JSONObject = if (!record.containsKey("alg_vid_feature_feed_flow_root_share")) new JSONObject() else
             JSON.parseObject(record.getString("alg_vid_feature_feed_flow_root_share"))
-          val b13: JSONObject = if (record.containsKey("alg_vid_feature_feed_flow_root_return")) new JSONObject() else
+          val b13: JSONObject = if (!record.containsKey("alg_vid_feature_feed_flow_root_return")) new JSONObject() else
             JSON.parseObject(record.getString("alg_vid_feature_feed_flow_root_return"))
-          val b17: JSONObject = if (record.containsKey("alg_vid_feature_feed_province_exp")) new JSONObject() else
+          val b17: JSONObject = if (!record.containsKey("alg_vid_feature_feed_province_exp")) new JSONObject() else
             JSON.parseObject(record.getString("alg_vid_feature_feed_province_exp"))
-          val b18: JSONObject = if (record.containsKey("alg_vid_feature_feed_province_root_share")) new JSONObject() else
+          val b18: JSONObject = if (!record.containsKey("alg_vid_feature_feed_province_root_share")) new JSONObject() else
             JSON.parseObject(record.getString("alg_vid_feature_feed_province_root_share"))
-          val b19: JSONObject = if (record.containsKey("alg_vid_feature_feed_province_root_return")) new JSONObject() else
+          val b19: JSONObject = if (!record.containsKey("alg_vid_feature_feed_province_root_return")) new JSONObject() else
             JSON.parseObject(record.getString("alg_vid_feature_feed_province_root_return"))
 
 
@@ -113,12 +113,12 @@ object makedata_13_originData_20240529_check {
             }
           }
 
-          val video_info: JSONObject = if (record.containsKey("alg_vid_feature_basic_info")) new JSONObject() else
+          val video_info: JSONObject = if (!record.containsKey("alg_vid_feature_basic_info")) new JSONObject() else
             JSON.parseObject(record.getString("alg_vid_feature_basic_info"))
           featureMap.put("total_time", if (video_info.containsKey("total_time")) video_info.getIntValue("total_time").toDouble else 0D)
           featureMap.put("bit_rate", if (video_info.containsKey("bit_rate")) video_info.getIntValue("bit_rate").toDouble else 0D)
 
-          val c1: JSONObject = if (record.containsKey("alg_mid_feature_play")) new JSONObject() else
+          val c1: JSONObject = if (!record.containsKey("alg_mid_feature_play")) new JSONObject() else
             JSON.parseObject(record.getString("alg_mid_feature_play"))
           if (c1.nonEmpty) {
             featureMap.put("playcnt_6h", if (c1.containsKey("playcnt_6h")) c1.getIntValue("playcnt_6h").toDouble else 0D)
@@ -126,7 +126,7 @@ object makedata_13_originData_20240529_check {
             featureMap.put("playcnt_3d", if (c1.containsKey("playcnt_3d")) c1.getIntValue("playcnt_3d").toDouble else 0D)
             featureMap.put("playcnt_7d", if (c1.containsKey("playcnt_7d")) c1.getIntValue("playcnt_7d").toDouble else 0D)
           }
-          val c2: JSONObject = if (record.containsKey("alg_mid_feature_share_and_return")) new JSONObject() else
+          val c2: JSONObject = if (!record.containsKey("alg_mid_feature_share_and_return")) new JSONObject() else
             JSON.parseObject(record.getString("alg_mid_feature_share_and_return"))
           if (c2.nonEmpty) {
             featureMap.put("share_pv_12h", if (c2.containsKey("share_pv_12h")) c2.getIntValue("share_pv_12h").toDouble else 0D)
@@ -146,7 +146,7 @@ object makedata_13_originData_20240529_check {
               ("c5_feature", "alg_mid_feature_play_tags"),
               ("c6_feature", "alg_mid_feature_play_tags"),
               ("c7_feature", "alg_mid_feature_play_tags"))) {
-              val c34567: JSONObject = if (record.containsKey(key_feature._2)) new JSONObject() else
+              val c34567: JSONObject = if (!record.containsKey(key_feature._2)) new JSONObject() else
                 JSON.parseObject(record.getString(key_feature._2))
               for (key_time <- List("tags_1d", "tags_3d", "tags_7d")) {
                 val tags = if (c34567.containsKey(key_time)) c34567.getString(key_time) else ""
@@ -163,7 +163,7 @@ object makedata_13_originData_20240529_check {
           val vid = if (record_.isNull("vid")) "" else record_.getString("vid")
           if (!vid.equals("")) {
             for (key_feature <- List(("c8_feature", "alg_mid_feature_sharecf"), ("c9_feature", "alg_mid_feature_returncf"))) {
-              val c89: JSONObject = if (record.containsKey(key_feature._2)) new JSONObject() else
+              val c89: JSONObject = if (!record.containsKey(key_feature._2)) new JSONObject() else
                 JSON.parseObject(record.getString(key_feature._2))
               for (key_action <- List("share", "return")) {
                 val cfListStr = if (c89.containsKey(key_action)) c89.getString(key_action) else ""
@@ -183,7 +183,7 @@ object makedata_13_originData_20240529_check {
             }
           }
 
-          val d1: JSONObject = if (record.containsKey("alg_recsys_feature_cf_i2i_new")) new JSONObject() else
+          val d1: JSONObject = if (!record.containsKey("alg_recsys_feature_cf_i2i_new")) new JSONObject() else
             JSON.parseObject(record.getString("alg_recsys_feature_cf_i2i_new"))
           if (d1.nonEmpty) {
             featureMap.put("d1_exp", if (d1.containsKey("exp")) d1.getString("exp").toDouble else 0D)

+ 16 - 1
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本

@@ -108,4 +108,19 @@ beginStr:20240614 endStr:20240614 repartition:1000 \
 
 /dw/recommend/model/13_sample_data/
 /dw/recommend/model/14_feature_data/
-/dw/recommend/model/16_train_data/
+/dw/recommend/model/16_train_data/
+
+
+
+nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata.makedata_13_originData_20240529_check \
+--master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
+./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+tablePart:64 repartition:32 \
+beginStr:2024061500 endStr:2024061523 \
+savePath:/dw/recommend/model/13_sample_data_check_print/ \
+table:alg_recsys_sample_all_new \
+> p13_2024061500_check.log 2>&1 &
+
+/dw/recommend/model/13_sample_data_check/
+/dw/recommend/model/13_sample_data_check_print/