소스 검색

去掉无用信息

jch 6 일 전
부모
커밋
92f9c77491
1개의 변경된 파일0개의 추가작업 그리고 384개의 파일을 삭제
  1. 0 384
      src/main/scala/com/aliyun/odps/spark/examples/myUtils/FeatureTransform.java

+ 0 - 384
src/main/scala/com/aliyun/odps/spark/examples/myUtils/FeatureTransform.java

@@ -1,384 +0,0 @@
-package com.aliyun.odps.spark.examples.myUtils;
-
-import examples.utils.SimilarityUtils;
-
-import java.util.*;
-
-public class FeatureTransform {
-    private static final int seqMaxN = 2;
-    private static final int seqLastN = 2;
-    private static final double smoothPlus = 5.0;
-    private static final List<String> c1Periods = Arrays.asList("72h", "168h");
-    private static final List<String> b1Periods = Arrays.asList("1h", "3h", "24h", "72h", "168h");
-    private static final List<String> b2Periods = Arrays.asList("1h", "3h", "24h");
-    private static final List<String> b3Periods = Arrays.asList("24h", "168h");
-    private static final List<String> b4Periods = Arrays.asList("1h", "12h");
-    private static final List<String> b5Periods = Arrays.asList("72h", "168h");
-    private static final List<String> b6Periods = Arrays.asList("1h", "24h");
-    private static final List<String> b7Periods = Arrays.asList("24h", "168h");
-    private static final List<String> b8Periods = Arrays.asList("24h");
-    private static final List<String> b9Periods = Arrays.asList("24h");
-    private static final List<String> b10Periods = Arrays.asList("1h", "12h");
-    private static final List<String> b11Periods = Arrays.asList("12h", "168h");
-    private static final List<String> b13Periods = Arrays.asList("24h", "168h");
-    private static final List<String> videoCateAttrs = Arrays.asList(FeatureUtils.cate1Attr, FeatureUtils.cate2Attr, FeatureUtils.festive1Attr);
-    private static final List<String> videoSimAttrs = Arrays.asList("title", "cate2", "cate2_list", "keywords");
-    private static final List<String> hVideoSimAttrs = Arrays.asList("title");
-    private static final List<String> cfList = Arrays.asList("share", "return");
-
-    public static void getContextFeature(long currentMs, Map<String, Double> featureMap) {
-        Calendar calendar = Calendar.getInstance();
-        calendar.setTimeInMillis(currentMs);
-
-        int week = calendar.get(Calendar.DAY_OF_WEEK);
-        int hour = calendar.get(Calendar.HOUR_OF_DAY) + 1;
-        featureMap.put("week", week * 1.0);
-        featureMap.put("hour", hour * 1.0);
-    }
-
-    public static void getUserFeature(Map<String, Map<String, String>> userOriginInfo, Map<String, Double> featMap) {
-        oneTypeStatFeature("c1", "return_1_uv", c1Periods, userOriginInfo.get("mid_global_feature_20250212"), featMap);
-    }
-
-    public static void getUserProfileFeature(UserShareReturnProfile profile, Map<String, String> userInfo, Map<String, Double> featMap) {
-        if (null != profile) {
-            long s_pv = profile.getS_pv();              // share_pv(分享pv)
-            long s_cnt = profile.getS_cnt();            // share_cnt(分享次数)
-            long r_pv = profile.getR_pv();              // return_pv(回流pv)
-            long r_uv = profile.getR_uv();              // return_uv(回流uv)
-            long m_s_cnt = profile.getM_s_cnt();        // max_share_cnt(最大分享次数)
-            long m_r_uv = profile.getM_r_uv();          // max_return_uv(最大回流uv)
-            if (s_pv > 0) {
-                double s_pv_s = FeatureUtils.log1(s_pv);
-                double s_cnt_s = FeatureUtils.log1(s_cnt);
-                double r_pv_s = FeatureUtils.log1(r_pv);
-                double r_uv_s = FeatureUtils.log1(r_uv);
-                double m_s_cnt_s = FeatureUtils.log1(m_s_cnt);
-                double m_r_uv_s = FeatureUtils.log1(m_r_uv);
-                double ros_one = FeatureUtils.wilsonScore(r_pv, s_pv);
-                double ros = FeatureUtils.plusSmooth(r_uv, s_pv, smoothPlus);
-                double ros_minus = FeatureUtils.plusSmooth(r_uv, r_pv, smoothPlus);
-                featMap.put("c9_s_pv", s_pv_s);
-                featMap.put("c9_s_cnt", s_cnt_s);
-                featMap.put("c9_r_pv", r_pv_s);
-                featMap.put("c9_r_uv", r_uv_s);
-                featMap.put("c9_m_s_cnt", m_s_cnt_s);
-                featMap.put("c9_m_r_uv", m_r_uv_s);
-                featMap.put("c9_ros_one", ros_one);
-                featMap.put("c9_ros", ros);
-                featMap.put("c9_ros_minus", ros_minus);
-            }
-        }
-    }
-
-    public static void getUserTagsCrossVideoFeature(String prefix, Map<String, String> videoInfo, Map<String, String> infoMap, Map<String, Double> featMap) {
-        if (null == videoInfo || videoInfo.isEmpty() || null == infoMap || infoMap.isEmpty()) {
-            return;
-        }
-        String title = videoInfo.getOrDefault("title", "");
-        if (title.isEmpty()) {
-            return;
-        }
-        for (String period : Arrays.asList("tags_1d", "tags_3d", "tags_7d")) {
-            String tags = infoMap.getOrDefault(period, "");
-            if (!tags.isEmpty()) {
-                Double[] doubles = FeatureUtils.funcC34567ForTagsNew(tags, title);
-                featMap.put(prefix + "_" + period + "_matchnum", doubles[0]);
-                featMap.put(prefix + "_" + period + "_maxscore", doubles[1]);
-                featMap.put(prefix + "_" + period + "_avgscore", doubles[2]);
-            }
-        }
-    }
-
-    public static void getUserCFFeature(String prefix, String vid, Map<String, Map<String, String[]>> infoMap, Map<String, Double> featMap) {
-        if (vid.isEmpty() || null == infoMap || infoMap.isEmpty()) {
-            return;
-        }
-        for (String cfType : cfList) {
-            if (infoMap.containsKey(cfType)) {
-                Map<String, String[]> cfScoresMap = infoMap.get(cfType);
-                if (null != cfScoresMap && cfScoresMap.containsKey(vid)) {
-                    String[] scores = cfScoresMap.get(vid);
-                    Double score1 = Double.parseDouble(scores[0]);
-                    Double score2 = Double.parseDouble(scores[1]);
-                    Double score3 = Double.parseDouble(scores[2]) <= 0 ? 0D : 1.0 / Double.parseDouble(scores[2]);
-                    featMap.put(prefix + "_" + cfType + "_score", score1);
-                    featMap.put(prefix + "_" + cfType + "_num", score2);
-                    featMap.put(prefix + "_" + cfType + "_rank", score3);
-                }
-            }
-        }
-    }
-
-    public static void getVideoFeature(String vid, Map<String, Map<String, Map<String, String>>> videoOriginInfo, Map<String, Double> featMap) {
-        oneTypeStatFeature("b1", "return_1_uv", b1Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_global_feature_20250212"), featMap);
-        oneTypeStatFeature("b2", "return_n_uv", b2Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b3", "return_n_uv", b3Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_recommend_flowpool_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b4", "return_n_uv", b4Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_apptype_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b5", "return_n_uv", b5Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_province_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b6", "return_n_uv", b6Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_brand_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b7", "return_n_uv", b7Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_hotsencetype_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b8", "return_n_uv", b8Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_merge_cate1_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b9", "return_n_uv", b9Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_merge_cate2_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b10", "return_n_uv", b10Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_channel_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b11", "return_n_uv", b11Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_festive_recommend_exp_feature_20250212"), featMap);
-        //getRateStatFeature("b12", "", dayPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_long_period_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b13", "return_n_uv", b13Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_video_unionid_recommend_exp_feature_20250212"), featMap);
-
-        // head video cf
-        headVideoCFD1Feature("d1", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("scene_type_vid_cf_feature_20250212"), featMap);
-        headVideoCFD2Feature("d2", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("vid_click_cf_feature_20250212"), featMap);
-        headVideoCFD3Feature("d3", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_recsys_feature_cf_i2i_v2"), featMap);
-    }
-
-    public static void getVideoBaseFeature(String prefix, long currentMs, Map<String, String> videoInfo, Map<String, Double> featMap) {
-        if (null == videoInfo || videoInfo.isEmpty()) {
-            return;
-        }
-        featMap.put(prefix + "@total_time", Double.parseDouble(videoInfo.getOrDefault("total_time", "0")));
-        featMap.put(prefix + "@bit_rate", Double.parseDouble(videoInfo.getOrDefault("bit_rate", "0")));
-
-        // cate
-        getVideoCateFeature(prefix, videoInfo, featMap);
-        if (videoInfo.containsKey("title")) {
-            int id = FeatureUtils.judgeVideoTimeType(videoInfo.get("title"));
-            if (id > 0) {
-                String key = String.format("%s@%s@%d", prefix, "tt", id);
-                featMap.put(key, 1.0);
-            }
-        }
-
-        // time
-        try {
-            if (videoInfo.containsKey("gmt_create_timestamp")) {
-                String createMsStr = videoInfo.get("gmt_create_timestamp");
-                long createMs = Long.parseLong(createMsStr);
-                double createTime = FeatureUtils.getTimeDiff(currentMs, createMs);
-                featMap.put(prefix + "@ts", 1 - createTime);
-            }
-        } catch (Exception ignored) {
-        }
-    }
-
-    public static void getHeadRankVideoCrossFeature(Map<String, String> headInfo, Map<String, String> rankInfo, Map<String, Double> featMap) {
-        getTwoVideoCrossFeature("hr_sim", videoSimAttrs, headInfo, rankInfo, featMap);
-    }
-
-    public static void getProfileVideoCrossFeature(long currentMs, UserShareReturnProfile profile, Map<String, String> rankVideo, Map<String, Map<String, String>> hVideoMap, Map<String, Double> featMap) {
-        if (null == profile) {
-            return;
-        }
-        getRSCrossFeature("c9_mss", currentMs, seqMaxN, profile.getM_s_s(), rankVideo, hVideoMap, featMap);
-        getRSCrossFeature("c9_mrs", currentMs, seqMaxN, profile.getM_r_s(), rankVideo, hVideoMap, featMap);
-        getRSCrossFeature("c9_lss", currentMs, seqLastN, profile.getL_s_s(), rankVideo, hVideoMap, featMap);
-        getRSCrossFeature("c9_lrs", currentMs, seqLastN, profile.getL_r_s(), rankVideo, hVideoMap, featMap);
-
-        if (null == rankVideo || rankVideo.isEmpty()) {
-            return;
-        }
-        getVideoAttrSRCrossFeature("c9_c1s", rankVideo.getOrDefault("merge_first_level_cate", ""), profile.getC1_s(), featMap);
-        getVideoAttrSRCrossFeature("c9_c2s", rankVideo.getOrDefault("merge_second_level_cate", ""), profile.getC2_s(), featMap);
-        getVideoAttrSRCrossFeature("c9_l1s", rankVideo.getOrDefault("festive_label1", ""), profile.getL1_s(), featMap);
-        getVideoAttrSRCrossFeature("c9_l2s", rankVideo.getOrDefault("festive_label2", ""), profile.getL2_s(), featMap);
-    }
-
-    private static void getRSCrossFeature(String prefix, long currentMs, int maxN, List<UserSRBO> list, Map<String, String> rankVideo, Map<String, Map<String, String>> hVideoMap, Map<String, Double> featMap) {
-        if (null != list && !list.isEmpty()) {
-            for (int i = 0; i < list.size() && i < maxN; i++) {
-                UserSRBO u = list.get(i);
-                if (null != u) {
-                    long id = u.getId();
-                    long cnt = u.getCnt();
-                    long uv = u.getUv();
-                    long ts = u.getTs();
-                    if (id > 0) {
-                        String vid = id + "";
-                        String baseKey = String.format("%s@%d", prefix, i + 1);
-                        if (cnt > 0) {
-                            featMap.put(baseKey + "@cnt", FeatureUtils.log1(cnt));
-                        }
-                        if (uv > 0) {
-                            featMap.put(baseKey + "@uv", FeatureUtils.log1(uv));
-                        }
-                        if (ts > 0) {
-                            featMap.put(baseKey + "@ts", 1 - FeatureUtils.getTimeDiff(currentMs, ts * 1000));
-                        }
-                        if (null != hVideoMap && hVideoMap.containsKey(vid)) {
-                            Map<String, String> hVideo = hVideoMap.get(vid);
-                            getTwoVideoCrossFeature(baseKey, hVideoSimAttrs, hVideo, rankVideo, featMap);
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    private static void getVideoAttrSRCrossFeature(String prefix, String attr, Map<String, VideoAttrSRBO> attrMap, Map<String, Double> featMap) {
-        if (null == attrMap || attrMap.isEmpty()) {
-            return;
-        }
-        attr = attr.trim();
-        if (attrMap.containsKey(attr)) {
-            VideoAttrSRBO bo = attrMap.get(attr);
-            if (null != bo) {
-                long sp = bo.getSp();    // share_pv
-                long rp = bo.getRp();    // return_n_pv_noself
-                long ru = bo.getRu();    // return_n_uv_noself
-                long mu = bo.getMu();    // max_return_uv
-                if (sp > 0) {
-                    double sp_s = FeatureUtils.log1(sp);
-                    double rp_s = FeatureUtils.log1(rp);
-                    double ru_s = FeatureUtils.log1(ru);
-                    double mu_s = FeatureUtils.log1(mu);
-
-                    double ros_one = FeatureUtils.wilsonScore(rp, sp);
-                    double ros = FeatureUtils.plusSmooth(ru, sp, smoothPlus);
-                    double ros_minus = FeatureUtils.plusSmooth(ru, rp, smoothPlus);
-
-                    featMap.put(prefix + "@sp", sp_s);
-                    featMap.put(prefix + "@rp", rp_s);
-                    featMap.put(prefix + "@ru", ru_s);
-                    featMap.put(prefix + "@mu", mu_s);
-                    featMap.put(prefix + "@ros_one", ros_one);
-                    featMap.put(prefix + "@ros", ros);
-                    featMap.put(prefix + "@ros_minus", ros_minus);
-                }
-            }
-        }
-    }
-
-    private static void getVideoCateFeature(String prefix, Map<String, String> videoInfo, Map<String, Double> featMap) {
-        if (null == videoInfo || videoInfo.isEmpty()) {
-            return;
-        }
-        for (String attr : videoCateAttrs) {
-            String attrVal = videoInfo.getOrDefault(attr, "");
-            attrVal = attrVal.trim();
-            int attrId = FeatureUtils.getAttrId(attr, attrVal);
-            if (attrId > 0) {
-                String key = String.format("%s@%s@%d", prefix, attr, attrId);
-                featMap.put(key, 1.0);
-            }
-        }
-    }
-
-    private static void getTwoVideoCrossFeature(String prefix, List<String> attrs, Map<String, String> video1, Map<String, String> video2, Map<String, Double> featMap) {
-        if (null == video1 || video1.isEmpty() || null == video2 || video2.isEmpty()) {
-            return;
-        }
-        for (String attr : attrs) {
-            String attr1 = video1.getOrDefault(attr, "");
-            String attr2 = video2.getOrDefault(attr, "");
-            if (!"".equals(attr1) && !"unknown".equals(attr1) && !"".equals(attr2) && !"unknown".equals(attr2)) {
-                double simScore = SimilarityUtils.word2VecSimilarity(attr1, attr2);
-                featMap.put(prefix + "_" + attr, simScore);
-            }
-        }
-    }
-
-    private static void headVideoCFD1Feature(String prefix, Map<String, String> infoMap, Map<String, Double> featMap) {
-        double ros_cf_score = getOneInfo("ros_cf_score", infoMap);
-        double ros_cf_rank = getOneInfo("ros_cf_rank", infoMap);
-        double rov_cf_score = getOneInfo("rov_cf_score", infoMap);
-        double rov_cf_rank = getOneInfo("rov_cf_rank", infoMap);
-        featMap.put(prefix + "_ros_cf_score", ros_cf_score);
-        featMap.put(prefix + "_ros_cf_rank", ros_cf_rank);
-        featMap.put(prefix + "_rov_cf_score", rov_cf_score);
-        featMap.put(prefix + "_rov_cf_rank", rov_cf_rank);
-    }
-
-    private static void headVideoCFD2Feature(String prefix, Map<String, String> infoMap, Map<String, Double> featMap) {
-        double score = getOneInfo("score", infoMap);
-        double rank = getOneInfo("rank", infoMap);
-        double onlines = getOneInfo("onlines", infoMap);
-        featMap.put(prefix + "_score", score);
-        featMap.put(prefix + "_rank", rank);
-        featMap.put(prefix + "_onlines", onlines);
-    }
-
-    private static void headVideoCFD3Feature(String prefix, Map<String, String> infoMap, Map<String, Double> featMap) {
-        double exp = getOneInfo("exp", infoMap);
-        double return_n = getOneInfo("return_n", infoMap);
-        double rovn = getOneInfo("rovn", infoMap);
-        featMap.put(prefix + "_exp", FeatureUtils.log1(exp));
-        featMap.put(prefix + "_return_n", FeatureUtils.log1(return_n));
-        featMap.put(prefix + "_rovn", rovn);
-    }
-
-    public static Map<String, Map<String, String[]>> parseUCFScore(Map<String, String> mapInfo) {
-        Map<String, Map<String, String[]>> allScoresMap = new HashMap<>();
-        for (String cfType : cfList) {
-            String data = mapInfo.getOrDefault(cfType, "");
-            if (!data.isEmpty()) {
-                Map<String, String[]> oneScoresMap = new HashMap<>();
-                String[] entries = data.split(",");
-                for (String entry : entries) {
-                    String[] rList = entry.split(":");
-                    if (rList.length >= 4) { // 确保分割后有四个元素
-                        String key = rList[0];
-                        String value1 = rList[1];
-                        String value2 = rList[2];
-                        String value3 = rList[3];
-                        String[] strs = {value1, value2, value3};
-                        oneScoresMap.put(key, strs);
-                    }
-                }
-                if (!oneScoresMap.isEmpty()) {
-                    allScoresMap.put(cfType, oneScoresMap);
-                }
-            }
-        }
-        return allScoresMap;
-    }
-
-    private static void oneTypeStatFeature(String prefix, String uvPrefix, List<String> periods, Map<String, String> infoMap, Map<String, Double> featMap) {
-        if (null == infoMap || infoMap.isEmpty()) {
-            return;
-        }
-        for (String period : periods) {
-            double exp = getOneInfo("exp_" + period, infoMap);
-            if (!FeatureUtils.greaterThanZero(exp)) {
-                continue;
-            }
-            double is_share = getOneInfo("is_share_" + period, infoMap);
-            double share_cnt = getOneInfo("share_cnt_" + period, infoMap);
-            double is_return_1 = getOneInfo("is_return_1_" + period, infoMap);
-            double return_n_uv = getOneInfo(uvPrefix + "_" + period, infoMap);
-
-            double exp_s = FeatureUtils.log1(exp);
-            double is_share_s = FeatureUtils.log1(is_share);
-            double share_cnt_s = FeatureUtils.log1(share_cnt);
-            double is_return_1_s = FeatureUtils.log1(is_return_1);
-            double return_n_uv_s = FeatureUtils.log1(return_n_uv);
-
-            double str = FeatureUtils.wilsonScore(is_share, exp);
-            double str_plus = FeatureUtils.wilsonScore(is_return_1, exp);
-            double ros_one = FeatureUtils.wilsonScore(is_return_1, is_share);
-
-            double rovn = FeatureUtils.plusSmooth(return_n_uv, exp, smoothPlus);
-            double ros = FeatureUtils.plusSmooth(return_n_uv, is_share, smoothPlus);
-            double ros_n = FeatureUtils.plusSmooth(return_n_uv, share_cnt, smoothPlus);
-            double ros_minus = FeatureUtils.plusSmooth(return_n_uv, is_return_1, smoothPlus);
-
-            featMap.put(prefix + "_" + period + "_" + "exp", exp_s);
-            featMap.put(prefix + "_" + period + "_" + "is_share", is_share_s);
-            featMap.put(prefix + "_" + period + "_" + "share_cnt", share_cnt_s);
-            featMap.put(prefix + "_" + period + "_" + "is_return_1", is_return_1_s);
-            featMap.put(prefix + "_" + period + "_" + "return_n_uv", return_n_uv_s);
-            featMap.put(prefix + "_" + period + "_" + "str", str);
-            featMap.put(prefix + "_" + period + "_" + "str_plus", str_plus);
-            featMap.put(prefix + "_" + period + "_" + "ros_one", ros_one);
-            featMap.put(prefix + "_" + period + "_" + "rovn", rovn);
-            featMap.put(prefix + "_" + period + "_" + "ros", ros);
-            featMap.put(prefix + "_" + period + "_" + "ros_n", ros_n);
-            featMap.put(prefix + "_" + period + "_" + "ros_minus", ros_minus);
-        }
-    }
-
-    private static double getOneInfo(String name, Map<String, String> map) {
-        if (null == map) {
-            return 0.0;
-        }
-        return map.isEmpty() ? 0 : Double.parseDouble(map.getOrDefault(name, "0.0"));
-    }
-}