Kaynağa Gözat

过滤异常数据

jch 1 ay önce
ebeveyn
işleme
4e7f410407

+ 6 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_83_originData_20250317.scala

@@ -175,9 +175,14 @@ object makedata_recsys_83_originData_20250317 {
         val labels = DataUtils.getLabels(labelNames, record).toString
         val features = ConvertV2.getFeature(record, videoSeq, 6).toString
         val scoresMap = DataUtils.getSubJson(record, "extend_alg", "scoresMap").toString
-        logKey + "\t" + labels + "\t" + scoresMap + "\t" + features
+        if (features.nonEmpty) {
+          logKey + "\t" + labels + "\t" + scoresMap + "\t" + features
+        } else {
+          ""
+        }
       })
     })
+      .filter(_.nonEmpty)
   }
 
   def main(args: Array[String]): Unit = {

+ 57 - 54
src/main/scala/com/aliyun/odps/spark/examples/myUtils/ConvertV2.java

@@ -11,60 +11,63 @@ public class ConvertV2 {
     public static JSONObject getFeature(Map<String, String> record, List<Map<String, String>> videoSeq, int scale) {
         Map<String, Double> featMap = new HashMap<>();
 
-        // origin info
-        String ts = record.get("ts");
-        long currentMs = Long.parseLong(ts) * 1000;
-        String uid = record.getOrDefault("uid", "");
-        String mid = record.getOrDefault("mid", "");
-        String vid = record.get("vid");
-        String appType = record.getOrDefault("apptype", "");
-        String hotSencetype = record.getOrDefault("hotsencetype", "");
-        Map<String, String> extendMap = ConvertUtils.getRecordCol(record, "extend");
-        record.put("user_channel", FeatureUtils.getUserChannel(record.get("rootsourceid"), extendMap.get("group_name")));
-        record.put("user_level", FeatureUtils.getUserLevel(extendMap.get("rootsessionid"), record.get("subsessionid")));
-        Map<String, String> headInfo = ConvertUtils.getRecordCol(record, "v2_feature");
-        Map<String, String> rankInfo = ConvertUtils.getRecordCol(record, "v1_feature");
-        Map<String, String> creativeInfo = ConvertUtils.getRecordCol(record, "creative");
-        Map<String, Map<String, String>> userOriginInfo = getUserOriginInfo(record);
-        Map<String, Map<String, Map<String, String>>> videoOriginInfo = getVideoOriginInfo(record);
-
-        // parse info
-        Map<String, String> c9Map = userOriginInfo.get("alg_recsys_feature_user_share_return_stat");
-        UserShareReturnProfile userProfile = JSON.parseObject(ConvertUtils.toJson(c9Map), UserShareReturnProfile.class);
-        Map<String, Map<String, String>> historyVideoMap = ConvertUtils.list2Map(videoSeq);
-
-        Map<String, Map<String, String[]>> c7Map = FeatureTransformV2.parseUCFScore(userOriginInfo.get("alg_mid_feature_sharecf"));
-        Map<String, Map<String, String[]>> c8Map = FeatureTransformV2.parseUCFScore(userOriginInfo.get("alg_mid_feature_returncf"));
-
-        // context feature
-        FeatureTransformV2.getContextFeature(currentMs, appType, hotSencetype, featMap);
-        FeatureTransformV2.getCreativeBaseFeature("e1", creativeInfo, featMap);
-
-        // head video feature
-        FeatureTransformV2.getVideoBaseFeature("h", currentMs, headInfo, featMap);
-
-        // user feature
-        FeatureTransformV2.getUserFeature(userOriginInfo, featMap);
-        FeatureTransformV2.getUserProfileFeature(userProfile, record, featMap);
-        FeatureTransformV2.getMid(uid, mid, currentMs, userProfile, featMap);
-
-        // user & video feature
-        FeatureTransformV2.getUserTagsCrossVideoFeature("c5", rankInfo, userOriginInfo.get("alg_mid_feature_return_tags"), featMap);
-        FeatureTransformV2.getUserTagsCrossVideoFeature("c6", rankInfo, userOriginInfo.get("alg_mid_feature_share_tags"), featMap);
-        FeatureTransformV2.getUserCFFeature("c7", vid, c7Map, featMap);
-        FeatureTransformV2.getUserCFFeature("c8", vid, c8Map, featMap);
-
-        // rank video feature
-        FeatureTransformV2.getVideoBaseFeature("r", currentMs, rankInfo, featMap);
-        FeatureTransformV2.getVideoFeature(vid, videoOriginInfo, featMap);
-
-        // head&rank cross feature
-        FeatureTransformV2.getHeadRankVideoCrossFeature(headInfo, rankInfo, featMap);
-        FeatureTransformV2.getCreativeCrossFeature("e1", creativeInfo, rankInfo, featMap);
-
-        // user profile & rank cross
-        FeatureTransformV2.getProfileVideoCrossFeature(currentMs, userProfile, rankInfo, historyVideoMap, featMap);
-
+        try {
+            // origin info
+            String ts = record.get("ts");
+            long currentMs = Long.parseLong(ts) * 1000;
+            String uid = record.getOrDefault("uid", "");
+            String mid = record.getOrDefault("mid", "");
+            String vid = record.get("vid");
+            String appType = record.getOrDefault("apptype", "");
+            String hotSencetype = record.getOrDefault("hotsencetype", "");
+            Map<String, String> extendMap = ConvertUtils.getRecordCol(record, "extend");
+            record.put("user_channel", FeatureUtils.getUserChannel(record.get("rootsourceid"), extendMap.get("group_name")));
+            record.put("user_level", FeatureUtils.getUserLevel(extendMap.get("rootsessionid"), record.get("subsessionid")));
+            Map<String, String> headInfo = ConvertUtils.getRecordCol(record, "v2_feature");
+            Map<String, String> rankInfo = ConvertUtils.getRecordCol(record, "v1_feature");
+            Map<String, String> creativeInfo = ConvertUtils.getRecordCol(record, "creative");
+            Map<String, Map<String, String>> userOriginInfo = getUserOriginInfo(record);
+            Map<String, Map<String, Map<String, String>>> videoOriginInfo = getVideoOriginInfo(record);
+
+            // parse info
+            Map<String, String> c9Map = userOriginInfo.get("alg_recsys_feature_user_share_return_stat");
+            UserShareReturnProfile userProfile = JSON.parseObject(ConvertUtils.toJson(c9Map), UserShareReturnProfile.class);
+            Map<String, Map<String, String>> historyVideoMap = ConvertUtils.list2Map(videoSeq);
+
+            Map<String, Map<String, String[]>> c7Map = FeatureTransformV2.parseUCFScore(userOriginInfo.get("alg_mid_feature_sharecf"));
+            Map<String, Map<String, String[]>> c8Map = FeatureTransformV2.parseUCFScore(userOriginInfo.get("alg_mid_feature_returncf"));
+
+            // context feature
+            FeatureTransformV2.getContextFeature(currentMs, appType, hotSencetype, featMap);
+            FeatureTransformV2.getCreativeBaseFeature("e1", creativeInfo, featMap);
+
+            // head video feature
+            FeatureTransformV2.getVideoBaseFeature("h", currentMs, headInfo, featMap);
+
+            // user feature
+            FeatureTransformV2.getUserFeature(userOriginInfo, featMap);
+            FeatureTransformV2.getUserProfileFeature(userProfile, record, featMap);
+            FeatureTransformV2.getMid(uid, mid, currentMs, userProfile, featMap);
+
+            // user & video feature
+            FeatureTransformV2.getUserTagsCrossVideoFeature("c5", rankInfo, userOriginInfo.get("alg_mid_feature_return_tags"), featMap);
+            FeatureTransformV2.getUserTagsCrossVideoFeature("c6", rankInfo, userOriginInfo.get("alg_mid_feature_share_tags"), featMap);
+            FeatureTransformV2.getUserCFFeature("c7", vid, c7Map, featMap);
+            FeatureTransformV2.getUserCFFeature("c8", vid, c8Map, featMap);
+
+            // rank video feature
+            FeatureTransformV2.getVideoBaseFeature("r", currentMs, rankInfo, featMap);
+            FeatureTransformV2.getVideoFeature(vid, videoOriginInfo, featMap);
+
+            // head&rank cross feature
+            FeatureTransformV2.getHeadRankVideoCrossFeature(headInfo, rankInfo, featMap);
+            FeatureTransformV2.getCreativeCrossFeature("e1", creativeInfo, rankInfo, featMap);
+
+            // user profile & rank cross
+            FeatureTransformV2.getProfileVideoCrossFeature(currentMs, userProfile, rankInfo, historyVideoMap, featMap);
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
         return ConvertUtils.filterAndTruncate(featMap, scale);
     }