Explorar el Código

样本重新制作: 小时级别特征。

zhangbo hace 1 año
padre
commit
94541db714

+ 198 - 198
src/main/java/examples/dataloader/AdRedisFeatureConstructor.java

@@ -1,198 +1,198 @@
-package examples.dataloader;
-
-
-import com.aliyun.odps.account.Account;
-import com.aliyun.odps.account.AliyunAccount;
-import com.aliyun.odps.data.Record;
-import com.tzld.piaoquan.ad.engine.commons.base.AdActionFeature;
-import com.tzld.piaoquan.ad.engine.commons.base.AdItemFeature;
-import com.tzld.piaoquan.ad.engine.commons.base.UserAdFeature;
-
-
-import java.util.HashMap;
-import java.util.Map;
-
-public class AdRedisFeatureConstructor {
-
-    private static final String BUCKET_NAME = "ali-recommend";
-    private static final Map<String, String> ODPS_CONFIG = new HashMap<String, String>();
-
-    static {
-        ODPS_CONFIG.put("ENDPOINT", "http://service.cn.maxcompute.aliyun.com/api");
-        ODPS_CONFIG.put("ACCESSID", "LTAIWYUujJAm7CbH");
-        ODPS_CONFIG.put("ACCESSKEY", "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P");
-    }
-
-    ;
-
-    private static final Account account = new AliyunAccount(ODPS_CONFIG.get("ACCESSID"), ODPS_CONFIG.get("ACCESSKEY"));
-
-
-    public static UserAdFeature constructUserFeature(Record record) {
-        UserAdFeature userFeature = new UserAdFeature();
-        userFeature.setMid(record.getString("mids"));
-
-        // 1day features
-        AdActionFeature userAd1dayActionFeature = new AdActionFeature();
-        userAd1dayActionFeature.setOriginAdView(record.getString("ad_view_1day"));
-        userAd1dayActionFeature.setOriginAdClick(record.getString("ad_click_1day"));
-        userAd1dayActionFeature.setOriginAdConversion(record.getString("ad_conversion_1day"));
-        userAd1dayActionFeature.setOriginCtr(record.getString("ad_ctr_1day"));
-        userAd1dayActionFeature.setOriginCvr(record.getString("ad_cvr_1day"));
-        userFeature.setDay1_cnt_features(userAd1dayActionFeature);
-
-
-        // 3day features
-        AdActionFeature userAd3dayActionFeature = new AdActionFeature();
-        userAd1dayActionFeature.setOriginAdView(record.getString("ad_view_3day"));
-        userAd1dayActionFeature.setOriginAdClick(record.getString("ad_click_3day"));
-        userAd1dayActionFeature.setOriginAdConversion(record.getString("ad_conversion_3day"));
-        userAd1dayActionFeature.setOriginCtr(record.getString("ad_ctr_3day"));
-        userAd1dayActionFeature.setOriginCvr(record.getString("ad_cvr_3day"));
-        userFeature.setDay3_cnt_features(userAd3dayActionFeature);
-
-
-        // 7day features
-        AdActionFeature userAd7dayActionFeature = new AdActionFeature();
-        userAd1dayActionFeature.setOriginAdView(record.getString("ad_view_7day"));
-        userAd1dayActionFeature.setOriginAdClick(record.getString("ad_click7day"));
-        userAd1dayActionFeature.setOriginAdConversion(record.getString("ad_conversion_7day"));
-        userAd1dayActionFeature.setOriginCtr(record.getString("ad_ctr_7day"));
-        userAd1dayActionFeature.setOriginCvr(record.getString("ad_cvr_7day"));
-        userFeature.setDay7_cnt_features(userAd7dayActionFeature);
-
-
-        // 3month features
-        AdActionFeature userAd3MonthActionFeature = new AdActionFeature();
-        userAd3MonthActionFeature.setOriginAdView(record.getString("ad_view_3month"));
-        userAd3MonthActionFeature.setOriginAdClick(record.getString("ad_click_3month"));
-        userAd3MonthActionFeature.setOriginAdConversion(record.getString("ad_conversion_3month"));
-        userAd3MonthActionFeature.setOriginCtr(record.getString("ad_ctr_3month"));
-        userAd3MonthActionFeature.setOriginCvr(record.getString("ad_cvr_3month"));
-        userFeature.setMonth3_cnt_features(userAd3MonthActionFeature);
-
-
-        return userFeature;
-    }
-
-
-    public static AdItemFeature constructItemFeature(Record record) {
-        AdItemFeature itemFeature = new AdItemFeature();
-        itemFeature.setAdId(record.getString("creativeid"));
-        // itemFeature.setAdCode(record.getString("adcode"));
-        itemFeature.setCampaignId(record.getString("campaignid"));
-        itemFeature.setAdvertiserId(record.getString("advertiserid"));
-        itemFeature.setCreativeId(record.getString("creativeid"));
-
-        // ad 维度特征
-        AdActionFeature adIdActionFeature1day = new AdActionFeature();
-        adIdActionFeature1day.setOriginAdView(record.getString("view_ad_1day"));
-        adIdActionFeature1day.setOriginAdClick(record.getString("click_ad_1day"));
-        adIdActionFeature1day.setOriginAdConversion(record.getString("conversion_ad_1day"));
-        adIdActionFeature1day.setOriginCtr(record.getString("ctr_ad_1day"));
-        adIdActionFeature1day.setOriginCvr(record.getString("cvr_ad_1day"));
-        itemFeature.setDay1_cnt_features(adIdActionFeature1day);
-
-        AdActionFeature adIdActionFeature3day = new AdActionFeature();
-        adIdActionFeature3day.setOriginAdView(record.getString("view_ad_3day"));
-        adIdActionFeature3day.setOriginAdClick(record.getString("click_ad_3day"));
-        adIdActionFeature3day.setOriginAdConversion(record.getString("conversion_ad_3day"));
-        adIdActionFeature3day.setOriginCtr(record.getString("ctr_ad_3day"));
-        adIdActionFeature3day.setOriginCvr(record.getString("cvr_ad_3day"));
-        itemFeature.setDay3_cnt_features(adIdActionFeature3day);
-
-        AdActionFeature adIdActionFeature7day = new AdActionFeature();
-        adIdActionFeature7day.setOriginAdView(record.getString("view_ad_7day"));
-        adIdActionFeature7day.setOriginAdClick(record.getString("click_ad_7day"));
-        adIdActionFeature7day.setOriginAdConversion(record.getString("conversion_ad_7day"));
-        adIdActionFeature7day.setOriginCtr(record.getString("ctr_ad_7day"));
-        adIdActionFeature7day.setOriginCvr(record.getString("cvr_ad_7day"));
-        itemFeature.setDay7_cnt_features(adIdActionFeature7day);
-
-        AdActionFeature adIdActionFeature3month = new AdActionFeature();
-        adIdActionFeature3month.setOriginAdView(record.getString("view_ad_3month"));
-        adIdActionFeature3month.setOriginAdClick(record.getString("click_ad_3month"));
-        adIdActionFeature3month.setOriginAdConversion(record.getString("conversion_ad_3month"));
-        adIdActionFeature3month.setOriginCtr(record.getString("ctr_ad_3month"));
-        adIdActionFeature3month.setOriginCvr(record.getString("cvr_ad_3month"));
-        itemFeature.setMonth3_cnt_features(adIdActionFeature3month);
-
-        // TODO creativeId等维度特征
-        // creative 维度特征
-        AdActionFeature creativeActionFeature1day = new AdActionFeature();
-        creativeActionFeature1day.setOriginAdView(record.getString("view_creative_1day"));
-        creativeActionFeature1day.setOriginAdClick(record.getString("click_creative_1day"));
-        creativeActionFeature1day.setOriginAdConversion(record.getString("conversion_creative_1day"));
-        creativeActionFeature1day.setOriginCtr(record.getString("ctr_creative_1day"));
-        creativeActionFeature1day.setOriginCvr(record.getString("cvr_creative_1day"));
-        itemFeature.setCreative_1day_cnt_features(creativeActionFeature1day);
-
-        AdActionFeature creativeActionFeature3day = new AdActionFeature();
-        creativeActionFeature3day.setOriginAdView(record.getString("view_creative_3day"));
-        creativeActionFeature3day.setOriginAdClick(record.getString("click_creative_3day"));
-        creativeActionFeature3day.setOriginAdConversion(record.getString("conversion_creative_3day"));
-        creativeActionFeature3day.setOriginCtr(record.getString("ctr_creative_3day"));
-        creativeActionFeature3day.setOriginCvr(record.getString("cvr_creative_3day"));
-        itemFeature.setCreative_3day_cnt_features(creativeActionFeature3day);
-
-        AdActionFeature creativeActionFeature7day = new AdActionFeature();
-        creativeActionFeature7day.setOriginAdView(record.getString("view_creative_7day"));
-        creativeActionFeature7day.setOriginAdClick(record.getString("click_creative_7day"));
-        creativeActionFeature7day.setOriginAdConversion(record.getString("conversion_creative_7day"));
-        creativeActionFeature7day.setOriginCtr(record.getString("ctr_creative_7day"));
-        creativeActionFeature7day.setOriginCvr(record.getString("cvr_creative_7day"));
-        itemFeature.setCreative_7day_cnt_features(creativeActionFeature7day);
-
-
-        AdActionFeature creativeActionFeature3month = new AdActionFeature();
-        creativeActionFeature3month.setOriginAdView(record.getString("view_creative_3month"));
-        creativeActionFeature3month.setOriginAdClick(record.getString("click_creative_3month"));
-        creativeActionFeature3month.setOriginAdConversion(record.getString("conversion_creative_3month"));
-        creativeActionFeature3month.setOriginCtr(record.getString("ctr_creative_3month"));
-        creativeActionFeature3month.setOriginCvr(record.getString("cvr_creative_3month"));
-        itemFeature.setCreative_3month_cnt_features(creativeActionFeature3month);
-
-
-
-        // TODO advertiser维度
-        // advertiser 维度特征
-        AdActionFeature advidActionFeature1day = new AdActionFeature();
-        advidActionFeature1day.setOriginAdView(record.getString("view_advertiser_1day"));
-        advidActionFeature1day.setOriginAdClick(record.getString("click_advertiser_1day"));
-        advidActionFeature1day.setOriginAdConversion(record.getString("conversion_advertiser_1day"));
-        advidActionFeature1day.setOriginCtr(record.getString("ctr_advertiser_1day"));
-        advidActionFeature1day.setOriginCvr(record.getString("cvr_advertiser_1day"));
-        itemFeature.setAdvertiser_1day_cnt_features(advidActionFeature1day);
-
-        AdActionFeature advidActionFeature3day = new AdActionFeature();
-        advidActionFeature3day.setOriginAdView(record.getString("view_advertiser_3day"));
-        advidActionFeature3day.setOriginAdClick(record.getString("click_advertiser_3day"));
-        advidActionFeature3day.setOriginAdConversion(record.getString("conversion_advertiser_3day"));
-        advidActionFeature3day.setOriginCtr(record.getString("ctr_advertiser_3day"));
-        advidActionFeature3day.setOriginCvr(record.getString("cvr_advertiser_3day"));
-        itemFeature.setAdvertiser_3day_cnt_features(advidActionFeature3day);
-
-        AdActionFeature advidActionFeature7day = new AdActionFeature();
-        advidActionFeature7day.setOriginAdView(record.getString("view_advertiser_7day"));
-        advidActionFeature7day.setOriginAdClick(record.getString("click_advertiser_7day"));
-        advidActionFeature7day.setOriginAdConversion(record.getString("conversion_advertiser_7day"));
-        advidActionFeature7day.setOriginCtr(record.getString("ctr_advertiser_7day"));
-        advidActionFeature7day.setOriginCvr(record.getString("cvr_advertiser_7day"));
-        itemFeature.setAdvertiser_7day_cnt_features(advidActionFeature7day);
-
-
-        AdActionFeature advidActionFeature3month = new AdActionFeature();
-        advidActionFeature3month.setOriginAdView(record.getString("view_advertiser_3month"));
-        advidActionFeature3month.setOriginAdClick(record.getString("click_advertiser_3month"));
-        advidActionFeature3month.setOriginAdConversion(record.getString("conversion_advertiser_3month"));
-        advidActionFeature3month.setOriginCtr(record.getString("ctr_advertiser_3month"));
-        advidActionFeature3month.setOriginCvr(record.getString("cvr_advertiser_3month"));
-        itemFeature.setAdvertiser_3month_cnt_features(advidActionFeature3month);
-
-
-
-        return itemFeature;
-    }
-
-
-}
+//package examples.dataloader;
+//
+//
+//import com.aliyun.odps.account.Account;
+//import com.aliyun.odps.account.AliyunAccount;
+//import com.aliyun.odps.data.Record;
+//import com.tzld.piaoquan.ad.engine.commons.base.AdActionFeature;
+//import com.tzld.piaoquan.ad.engine.commons.base.AdItemFeature;
+//import com.tzld.piaoquan.ad.engine.commons.base.UserAdFeature;
+//
+//
+//import java.util.HashMap;
+//import java.util.Map;
+//
+//public class AdRedisFeatureConstructor {
+//
+//    private static final String BUCKET_NAME = "ali-recommend";
+//    private static final Map<String, String> ODPS_CONFIG = new HashMap<String, String>();
+//
+//    static {
+//        ODPS_CONFIG.put("ENDPOINT", "http://service.cn.maxcompute.aliyun.com/api");
+//        ODPS_CONFIG.put("ACCESSID", "LTAIWYUujJAm7CbH");
+//        ODPS_CONFIG.put("ACCESSKEY", "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P");
+//    }
+//
+//    ;
+//
+//    private static final Account account = new AliyunAccount(ODPS_CONFIG.get("ACCESSID"), ODPS_CONFIG.get("ACCESSKEY"));
+//
+//
+//    public static UserAdFeature constructUserFeature(Record record) {
+//        UserAdFeature userFeature = new UserAdFeature();
+//        userFeature.setMid(record.getString("mids"));
+//
+//        // 1day features
+//        AdActionFeature userAd1dayActionFeature = new AdActionFeature();
+//        userAd1dayActionFeature.setOriginAdView(record.getString("ad_view_1day"));
+//        userAd1dayActionFeature.setOriginAdClick(record.getString("ad_click_1day"));
+//        userAd1dayActionFeature.setOriginAdConversion(record.getString("ad_conversion_1day"));
+//        userAd1dayActionFeature.setOriginCtr(record.getString("ad_ctr_1day"));
+//        userAd1dayActionFeature.setOriginCvr(record.getString("ad_cvr_1day"));
+//        userFeature.setDay1_cnt_features(userAd1dayActionFeature);
+//
+//
+//        // 3day features
+//        AdActionFeature userAd3dayActionFeature = new AdActionFeature();
+//        userAd1dayActionFeature.setOriginAdView(record.getString("ad_view_3day"));
+//        userAd1dayActionFeature.setOriginAdClick(record.getString("ad_click_3day"));
+//        userAd1dayActionFeature.setOriginAdConversion(record.getString("ad_conversion_3day"));
+//        userAd1dayActionFeature.setOriginCtr(record.getString("ad_ctr_3day"));
+//        userAd1dayActionFeature.setOriginCvr(record.getString("ad_cvr_3day"));
+//        userFeature.setDay3_cnt_features(userAd3dayActionFeature);
+//
+//
+//        // 7day features
+//        AdActionFeature userAd7dayActionFeature = new AdActionFeature();
+//        userAd1dayActionFeature.setOriginAdView(record.getString("ad_view_7day"));
+//        userAd1dayActionFeature.setOriginAdClick(record.getString("ad_click7day"));
+//        userAd1dayActionFeature.setOriginAdConversion(record.getString("ad_conversion_7day"));
+//        userAd1dayActionFeature.setOriginCtr(record.getString("ad_ctr_7day"));
+//        userAd1dayActionFeature.setOriginCvr(record.getString("ad_cvr_7day"));
+//        userFeature.setDay7_cnt_features(userAd7dayActionFeature);
+//
+//
+//        // 3month features
+//        AdActionFeature userAd3MonthActionFeature = new AdActionFeature();
+//        userAd3MonthActionFeature.setOriginAdView(record.getString("ad_view_3month"));
+//        userAd3MonthActionFeature.setOriginAdClick(record.getString("ad_click_3month"));
+//        userAd3MonthActionFeature.setOriginAdConversion(record.getString("ad_conversion_3month"));
+//        userAd3MonthActionFeature.setOriginCtr(record.getString("ad_ctr_3month"));
+//        userAd3MonthActionFeature.setOriginCvr(record.getString("ad_cvr_3month"));
+//        userFeature.setMonth3_cnt_features(userAd3MonthActionFeature);
+//
+//
+//        return userFeature;
+//    }
+//
+//
+//    public static AdItemFeature constructItemFeature(Record record) {
+//        AdItemFeature itemFeature = new AdItemFeature();
+//        itemFeature.setAdId(record.getString("creativeid"));
+//        // itemFeature.setAdCode(record.getString("adcode"));
+//        itemFeature.setCampaignId(record.getString("campaignid"));
+//        itemFeature.setAdvertiserId(record.getString("advertiserid"));
+//        itemFeature.setCreativeId(record.getString("creativeid"));
+//
+//        // ad 维度特征
+//        AdActionFeature adIdActionFeature1day = new AdActionFeature();
+//        adIdActionFeature1day.setOriginAdView(record.getString("view_ad_1day"));
+//        adIdActionFeature1day.setOriginAdClick(record.getString("click_ad_1day"));
+//        adIdActionFeature1day.setOriginAdConversion(record.getString("conversion_ad_1day"));
+//        adIdActionFeature1day.setOriginCtr(record.getString("ctr_ad_1day"));
+//        adIdActionFeature1day.setOriginCvr(record.getString("cvr_ad_1day"));
+//        itemFeature.setDay1_cnt_features(adIdActionFeature1day);
+//
+//        AdActionFeature adIdActionFeature3day = new AdActionFeature();
+//        adIdActionFeature3day.setOriginAdView(record.getString("view_ad_3day"));
+//        adIdActionFeature3day.setOriginAdClick(record.getString("click_ad_3day"));
+//        adIdActionFeature3day.setOriginAdConversion(record.getString("conversion_ad_3day"));
+//        adIdActionFeature3day.setOriginCtr(record.getString("ctr_ad_3day"));
+//        adIdActionFeature3day.setOriginCvr(record.getString("cvr_ad_3day"));
+//        itemFeature.setDay3_cnt_features(adIdActionFeature3day);
+//
+//        AdActionFeature adIdActionFeature7day = new AdActionFeature();
+//        adIdActionFeature7day.setOriginAdView(record.getString("view_ad_7day"));
+//        adIdActionFeature7day.setOriginAdClick(record.getString("click_ad_7day"));
+//        adIdActionFeature7day.setOriginAdConversion(record.getString("conversion_ad_7day"));
+//        adIdActionFeature7day.setOriginCtr(record.getString("ctr_ad_7day"));
+//        adIdActionFeature7day.setOriginCvr(record.getString("cvr_ad_7day"));
+//        itemFeature.setDay7_cnt_features(adIdActionFeature7day);
+//
+//        AdActionFeature adIdActionFeature3month = new AdActionFeature();
+//        adIdActionFeature3month.setOriginAdView(record.getString("view_ad_3month"));
+//        adIdActionFeature3month.setOriginAdClick(record.getString("click_ad_3month"));
+//        adIdActionFeature3month.setOriginAdConversion(record.getString("conversion_ad_3month"));
+//        adIdActionFeature3month.setOriginCtr(record.getString("ctr_ad_3month"));
+//        adIdActionFeature3month.setOriginCvr(record.getString("cvr_ad_3month"));
+//        itemFeature.setMonth3_cnt_features(adIdActionFeature3month);
+//
+//        // TODO creativeId等维度特征
+//        // creative 维度特征
+//        AdActionFeature creativeActionFeature1day = new AdActionFeature();
+//        creativeActionFeature1day.setOriginAdView(record.getString("view_creative_1day"));
+//        creativeActionFeature1day.setOriginAdClick(record.getString("click_creative_1day"));
+//        creativeActionFeature1day.setOriginAdConversion(record.getString("conversion_creative_1day"));
+//        creativeActionFeature1day.setOriginCtr(record.getString("ctr_creative_1day"));
+//        creativeActionFeature1day.setOriginCvr(record.getString("cvr_creative_1day"));
+//        itemFeature.setCreative_1day_cnt_features(creativeActionFeature1day);
+//
+//        AdActionFeature creativeActionFeature3day = new AdActionFeature();
+//        creativeActionFeature3day.setOriginAdView(record.getString("view_creative_3day"));
+//        creativeActionFeature3day.setOriginAdClick(record.getString("click_creative_3day"));
+//        creativeActionFeature3day.setOriginAdConversion(record.getString("conversion_creative_3day"));
+//        creativeActionFeature3day.setOriginCtr(record.getString("ctr_creative_3day"));
+//        creativeActionFeature3day.setOriginCvr(record.getString("cvr_creative_3day"));
+//        itemFeature.setCreative_3day_cnt_features(creativeActionFeature3day);
+//
+//        AdActionFeature creativeActionFeature7day = new AdActionFeature();
+//        creativeActionFeature7day.setOriginAdView(record.getString("view_creative_7day"));
+//        creativeActionFeature7day.setOriginAdClick(record.getString("click_creative_7day"));
+//        creativeActionFeature7day.setOriginAdConversion(record.getString("conversion_creative_7day"));
+//        creativeActionFeature7day.setOriginCtr(record.getString("ctr_creative_7day"));
+//        creativeActionFeature7day.setOriginCvr(record.getString("cvr_creative_7day"));
+//        itemFeature.setCreative_7day_cnt_features(creativeActionFeature7day);
+//
+//
+//        AdActionFeature creativeActionFeature3month = new AdActionFeature();
+//        creativeActionFeature3month.setOriginAdView(record.getString("view_creative_3month"));
+//        creativeActionFeature3month.setOriginAdClick(record.getString("click_creative_3month"));
+//        creativeActionFeature3month.setOriginAdConversion(record.getString("conversion_creative_3month"));
+//        creativeActionFeature3month.setOriginCtr(record.getString("ctr_creative_3month"));
+//        creativeActionFeature3month.setOriginCvr(record.getString("cvr_creative_3month"));
+//        itemFeature.setCreative_3month_cnt_features(creativeActionFeature3month);
+//
+//
+//
+//        // TODO advertiser维度
+//        // advertiser 维度特征
+//        AdActionFeature advidActionFeature1day = new AdActionFeature();
+//        advidActionFeature1day.setOriginAdView(record.getString("view_advertiser_1day"));
+//        advidActionFeature1day.setOriginAdClick(record.getString("click_advertiser_1day"));
+//        advidActionFeature1day.setOriginAdConversion(record.getString("conversion_advertiser_1day"));
+//        advidActionFeature1day.setOriginCtr(record.getString("ctr_advertiser_1day"));
+//        advidActionFeature1day.setOriginCvr(record.getString("cvr_advertiser_1day"));
+//        itemFeature.setAdvertiser_1day_cnt_features(advidActionFeature1day);
+//
+//        AdActionFeature advidActionFeature3day = new AdActionFeature();
+//        advidActionFeature3day.setOriginAdView(record.getString("view_advertiser_3day"));
+//        advidActionFeature3day.setOriginAdClick(record.getString("click_advertiser_3day"));
+//        advidActionFeature3day.setOriginAdConversion(record.getString("conversion_advertiser_3day"));
+//        advidActionFeature3day.setOriginCtr(record.getString("ctr_advertiser_3day"));
+//        advidActionFeature3day.setOriginCvr(record.getString("cvr_advertiser_3day"));
+//        itemFeature.setAdvertiser_3day_cnt_features(advidActionFeature3day);
+//
+//        AdActionFeature advidActionFeature7day = new AdActionFeature();
+//        advidActionFeature7day.setOriginAdView(record.getString("view_advertiser_7day"));
+//        advidActionFeature7day.setOriginAdClick(record.getString("click_advertiser_7day"));
+//        advidActionFeature7day.setOriginAdConversion(record.getString("conversion_advertiser_7day"));
+//        advidActionFeature7day.setOriginCtr(record.getString("ctr_advertiser_7day"));
+//        advidActionFeature7day.setOriginCvr(record.getString("cvr_advertiser_7day"));
+//        itemFeature.setAdvertiser_7day_cnt_features(advidActionFeature7day);
+//
+//
+//        AdActionFeature advidActionFeature3month = new AdActionFeature();
+//        advidActionFeature3month.setOriginAdView(record.getString("view_advertiser_3month"));
+//        advidActionFeature3month.setOriginAdClick(record.getString("click_advertiser_3month"));
+//        advidActionFeature3month.setOriginAdConversion(record.getString("conversion_advertiser_3month"));
+//        advidActionFeature3month.setOriginCtr(record.getString("ctr_advertiser_3month"));
+//        advidActionFeature3month.setOriginCvr(record.getString("cvr_advertiser_3month"));
+//        itemFeature.setAdvertiser_3month_cnt_features(advidActionFeature3month);
+//
+//
+//
+//        return itemFeature;
+//    }
+//
+//
+//}

+ 223 - 223
src/main/java/examples/dataloader/AdSampleConstructor.java

@@ -1,223 +1,223 @@
-package examples.dataloader;
-
-
-import com.aliyun.odps.account.Account;
-import com.aliyun.odps.account.AliyunAccount;
-import com.aliyun.odps.data.Record;
-import com.tzld.piaoquan.ad.engine.commons.base.AdActionFeature;
-import com.tzld.piaoquan.ad.engine.commons.base.AdRequestContext;
-import com.tzld.piaoquan.ad.engine.commons.base.UserAdFeature;
-import com.tzld.piaoquan.ad.engine.commons.base.AdItemFeature;
-
-
-import java.util.HashMap;
-import java.util.Map;
-
-public class AdSampleConstructor {
-
-    private static final String BUCKET_NAME = "ali-recommend";
-    private static final Map<String, String> ODPS_CONFIG = new HashMap<String, String>();
-
-    static {
-        ODPS_CONFIG.put("ENDPOINT", "http://service.cn.maxcompute.aliyun.com/api");
-        ODPS_CONFIG.put("ACCESSID", "LTAIWYUujJAm7CbH");
-        ODPS_CONFIG.put("ACCESSKEY", "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P");
-    }
-
-    private static final Account account = new AliyunAccount(ODPS_CONFIG.get("ACCESSID"), ODPS_CONFIG.get("ACCESSKEY"));
-
-
-    public static AdRequestContext constructRequestContext(Record record) {
-        AdRequestContext requestContext = new AdRequestContext();
-        requestContext.setApptype(record.getString("apptype"));
-        requestContext.setMachineinfoBrand(record.getString("machineinfo_brand"));
-        requestContext.setMachineinfoModel(record.getString("machineinfo_model"));
-        requestContext.setMachineinfoSdkversion(record.getString("machineinfo_sdkversion"));
-        requestContext.setMachineinfoWchatversion(record.getString("machineinfo_wechatversion"));
-
-
-        requestContext.setDay(record.getString("ctx_day"));
-        requestContext.setWeek(record.getString("ctx_week"));
-        requestContext.setHour(record.getString("ctx_hour"));
-        requestContext.setRegion(record.getString("province"));
-        requestContext.setCity(record.getString("city"));
-        return requestContext;
-    }
-
-
-    public static UserAdFeature constructUserFeature(Record record) {
-        UserAdFeature userFeature = new UserAdFeature();
-        userFeature.setMid(record.get("machinecode").toString());
-
-        // 1day features
-        AdActionFeature user1dayActionFeature = new AdActionFeature();
-        user1dayActionFeature.setAdView(record.getString("user_view_1day"));
-        user1dayActionFeature.setAdClick(record.getString("user_click_1day"));
-        user1dayActionFeature.setAdConversion(record.getString("user_conversion_1day"));
-        user1dayActionFeature.setCtr(record.getString("user_ctr_1day"));
-        user1dayActionFeature.setCvr(record.getString("user_cvr_1day"));
-        userFeature.setDay1_cnt_features(user1dayActionFeature);
-
-        // 3day features
-        AdActionFeature user3dayActionFeature = new AdActionFeature();
-        user3dayActionFeature.setAdView(record.getString("user_view_3day"));
-        user3dayActionFeature.setAdClick(record.getString("user_click_3day"));
-        user3dayActionFeature.setAdConversion(record.getString("user_conversion_3day"));
-        user3dayActionFeature.setCtr(record.getString("user_ctr_3day"));
-        user3dayActionFeature.setCvr(record.getString("user_cvr_3day"));
-        userFeature.setDay3_cnt_features(user3dayActionFeature);
-
-
-        // 7day features
-        AdActionFeature user7dayActionFeature = new AdActionFeature();
-        user7dayActionFeature.setAdView(record.getString("user_view_7day"));
-        user7dayActionFeature.setAdClick(record.getString("user_click7day"));
-        user7dayActionFeature.setAdConversion(record.getString("user_conversion_7day"));
-        user7dayActionFeature.setCtr(record.getString("user_ctr_7day"));
-        user7dayActionFeature.setCvr(record.getString("user_cvr_7day"));
-        userFeature.setDay7_cnt_features(user7dayActionFeature);
-
-        // 3month features
-        AdActionFeature user3MonthActionFeature = new AdActionFeature();
-        user3MonthActionFeature.setAdView(record.getString("user_view_3month"));
-        user3MonthActionFeature.setAdClick(record.getString("user_click_3month"));
-        user3MonthActionFeature.setAdConversion(record.getString("user_conversion_3month"));
-        user3MonthActionFeature.setCtr(record.getString("user_ctr_3month"));
-        user3MonthActionFeature.setCvr(record.getString("user_cvr_3month"));
-        userFeature.setMonth3_cnt_features(user3MonthActionFeature);
-
-        return userFeature;
-    }
-
-
-    public static AdItemFeature constructItemFeature(Record record) {
-        AdItemFeature itemFeature = new AdItemFeature();
-
-
-        itemFeature.setAdId(record.getString("adid"));
-        // itemFeature.setAdCode(record.getString("adcode"));
-        itemFeature.setAdvertiserId(record.getString("advertiserid"));
-        itemFeature.setCampaignId(record.getString("campaignid"));
-        itemFeature.setCreativeId(record.getString("creativeid"));
-
-        // 1day features
-        AdActionFeature user1dayActionFeature = new AdActionFeature();
-        user1dayActionFeature.setAdView(record.getString("ad_view_1day"));
-        user1dayActionFeature.setAdClick(record.getString("ad_click_1day"));
-        user1dayActionFeature.setAdConversion(record.getString("ad_conversion_1day"));
-        user1dayActionFeature.setCtr(record.getString("ad_ctr_1day"));
-        user1dayActionFeature.setCvr(record.getString("ad_cvr_1day"));
-        itemFeature.setDay1_cnt_features(user1dayActionFeature);
-
-        // 3day features
-        AdActionFeature user3dayActionFeature = new AdActionFeature();
-        user3dayActionFeature.setAdView(record.getString("ad_view_3day"));
-        user3dayActionFeature.setAdClick(record.getString("ad_click_3day"));
-        user3dayActionFeature.setAdConversion(record.getString("ad_conversion_3day"));
-        user3dayActionFeature.setCtr(record.getString("ad_ctr_3day"));
-        user3dayActionFeature.setCvr(record.getString("ad_cvr_3day"));
-        itemFeature.setDay3_cnt_features(user3dayActionFeature);
-
-
-        // 7day features
-        AdActionFeature user7dayActionFeature = new AdActionFeature();
-        user7dayActionFeature.setAdView(record.getString("ad_view_7day"));
-        user7dayActionFeature.setAdClick(record.getString("ad_click_7day"));
-        user7dayActionFeature.setAdConversion(record.getString("ad_conversion_7day"));
-        user7dayActionFeature.setCtr(record.getString("ad_ctr_7day"));
-        user7dayActionFeature.setCvr(record.getString("ad_cvr_7day"));
-        itemFeature.setDay7_cnt_features(user7dayActionFeature);
-
-        // 3month features
-        AdActionFeature user3MonthActionFeature = new AdActionFeature();
-        user3MonthActionFeature.setAdView(record.getString("ad_view_3month"));
-        user3MonthActionFeature.setAdClick(record.getString("ad_click_3month"));
-        user3MonthActionFeature.setAdConversion(record.getString("ad_conversion_3month"));
-        user3MonthActionFeature.setCtr(record.getString("ad_ctr_3month"));
-        user3MonthActionFeature.setCvr(record.getString("ad_cvr_3month"));
-        itemFeature.setMonth3_cnt_features(user3MonthActionFeature);
-
-
-        //TODO  CREATIVE 维度  需要在样本中补齐
-        AdActionFeature creative1dayFeature = new AdActionFeature();
-        creative1dayFeature.setAdView(record.getString("view_creative_1day"));
-        creative1dayFeature.setAdClick(record.getString("click_creative_1day"));
-        creative1dayFeature.setAdConversion(record.getString("conversion_creative_1day"));
-        creative1dayFeature.setCtr(record.getString("ctr_creative_1day"));
-        creative1dayFeature.setCvr(record.getString("cvr_creative_1day"));
-        itemFeature.setCreative_1day_cnt_features(creative1dayFeature);
-
-        // 3day features
-        AdActionFeature creative3dayFeature = new AdActionFeature();
-        creative3dayFeature.setAdView(record.getString("view_creative_3day"));
-        creative3dayFeature.setAdClick(record.getString("click_creative_3day"));
-        creative3dayFeature.setAdConversion(record.getString("conversion_creative_3day"));
-        creative3dayFeature.setCtr(record.getString("ctr_creative_3day"));
-        creative3dayFeature.setCvr(record.getString("cvr_creative_3day"));
-        itemFeature.setCreative_3day_cnt_features(creative3dayFeature);
-
-
-        // 7day features
-        AdActionFeature creative7dayFeature = new AdActionFeature();
-        creative7dayFeature.setAdView(record.getString("view_creative_7day"));
-        creative7dayFeature.setAdClick(record.getString("click_creative_7day"));
-        creative7dayFeature.setAdConversion(record.getString("conversion_creative_7day"));
-        creative7dayFeature.setCtr(record.getString("ctr_creative_7day"));
-        creative7dayFeature.setCvr(record.getString("cvr_creative_7day"));
-        itemFeature.setCreative_7day_cnt_features(creative7dayFeature);
-
-        // 3month features
-        AdActionFeature creative3MonthFeature = new AdActionFeature();
-        creative3MonthFeature.setAdView(record.getString("view_creative_3month"));
-        creative3MonthFeature.setAdClick(record.getString("click_creative_3month"));
-        creative3MonthFeature.setAdConversion(record.getString("conversion_creative_3month"));
-        creative3MonthFeature.setCtr(record.getString("ctr_creative_3month"));
-        creative3MonthFeature.setCvr(record.getString("cvr_creative_3month"));
-        itemFeature.setCreative_3month_cnt_features(creative3MonthFeature);
-
-
-        // advertiser id
-        // 1day features
-        AdActionFeature advertiser1dayFeature = new AdActionFeature();
-        advertiser1dayFeature.setAdView(record.getString("advertiser_view_1day"));
-        advertiser1dayFeature.setAdClick(record.getString("advertiser_click_1day"));
-        advertiser1dayFeature.setAdConversion(record.getString("advertiser_conversion_1day"));
-        advertiser1dayFeature.setCtr(record.getString("advertiser_ctr_1day"));
-        advertiser1dayFeature.setCvr(record.getString("advertiser_cvr_1day"));
-        itemFeature.setAdvertiser_1day_cnt_features(advertiser1dayFeature);
-
-        // 3day features
-        AdActionFeature advertiser3dayFeature = new AdActionFeature();
-        advertiser3dayFeature.setAdView(record.getString("advertiser_view_3day"));
-        advertiser3dayFeature.setAdClick(record.getString("advertiser_click_3day"));
-        advertiser3dayFeature.setAdConversion(record.getString("advertiser_conversion_3day"));
-        advertiser3dayFeature.setCtr(record.getString("advertiser_ctr_3day"));
-        advertiser3dayFeature.setCvr(record.getString("advertiser_cvr_3day"));
-        itemFeature.setAdvertiser_3day_cnt_features(advertiser3dayFeature);
-
-
-        // 7day features
-        AdActionFeature advertiser7dayFeature = new AdActionFeature();
-        advertiser7dayFeature.setAdView(record.getString("advertiser_view_7day"));
-        advertiser7dayFeature.setAdClick(record.getString("advertiser_click_7day"));
-        advertiser7dayFeature.setAdConversion(record.getString("advertiser_conversion_7day"));
-        advertiser7dayFeature.setCtr(record.getString("advertiser_ctr_7day"));
-        advertiser7dayFeature.setCvr(record.getString("advertiser_cvr_7day"));
-        itemFeature.setAdvertiser_7day_cnt_features(advertiser7dayFeature);
-
-        // 3month features
-        AdActionFeature advertiser3monthFeature = new AdActionFeature();
-        advertiser3monthFeature.setAdView(record.getString("advertiser_view_3month"));
-        advertiser3monthFeature.setAdClick(record.getString("advertiser_view_3month"));
-        advertiser3monthFeature.setAdConversion(record.getString("advertiser_conversion_3month"));
-        advertiser3monthFeature.setCtr(record.getString("advertiser_ctr_3month"));
-        advertiser3monthFeature.setCvr(record.getString("advertiser_cvr_3month"));
-        itemFeature.setAdvertiser_3month_cnt_features(advertiser3monthFeature);
-
-
-
-        return itemFeature;
-    }
-
-
-}
+//package examples.dataloader;
+//
+//
+//import com.aliyun.odps.account.Account;
+//import com.aliyun.odps.account.AliyunAccount;
+//import com.aliyun.odps.data.Record;
+//import com.tzld.piaoquan.ad.engine.commons.base.AdActionFeature;
+//import com.tzld.piaoquan.ad.engine.commons.base.AdRequestContext;
+//import com.tzld.piaoquan.ad.engine.commons.base.UserAdFeature;
+//import com.tzld.piaoquan.ad.engine.commons.base.AdItemFeature;
+//
+//
+//import java.util.HashMap;
+//import java.util.Map;
+//
+//public class AdSampleConstructor {
+//
+//    private static final String BUCKET_NAME = "ali-recommend";
+//    private static final Map<String, String> ODPS_CONFIG = new HashMap<String, String>();
+//
+//    static {
+//        ODPS_CONFIG.put("ENDPOINT", "http://service.cn.maxcompute.aliyun.com/api");
+//        ODPS_CONFIG.put("ACCESSID", "LTAIWYUujJAm7CbH");
+//        ODPS_CONFIG.put("ACCESSKEY", "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P");
+//    }
+//
+//    private static final Account account = new AliyunAccount(ODPS_CONFIG.get("ACCESSID"), ODPS_CONFIG.get("ACCESSKEY"));
+//
+//
+//    public static AdRequestContext constructRequestContext(Record record) {
+//        AdRequestContext requestContext = new AdRequestContext();
+//        requestContext.setApptype(record.getString("apptype"));
+//        requestContext.setMachineinfoBrand(record.getString("machineinfo_brand"));
+//        requestContext.setMachineinfoModel(record.getString("machineinfo_model"));
+//        requestContext.setMachineinfoSdkversion(record.getString("machineinfo_sdkversion"));
+//        requestContext.setMachineinfoWchatversion(record.getString("machineinfo_wechatversion"));
+//
+//
+//        requestContext.setDay(record.getString("ctx_day"));
+//        requestContext.setWeek(record.getString("ctx_week"));
+//        requestContext.setHour(record.getString("ctx_hour"));
+//        requestContext.setRegion(record.getString("province"));
+//        requestContext.setCity(record.getString("city"));
+//        return requestContext;
+//    }
+//
+//
+//    public static UserAdFeature constructUserFeature(Record record) {
+//        UserAdFeature userFeature = new UserAdFeature();
+//        userFeature.setMid(record.get("machinecode").toString());
+//
+//        // 1day features
+//        AdActionFeature user1dayActionFeature = new AdActionFeature();
+//        user1dayActionFeature.setAdView(record.getString("user_view_1day"));
+//        user1dayActionFeature.setAdClick(record.getString("user_click_1day"));
+//        user1dayActionFeature.setAdConversion(record.getString("user_conversion_1day"));
+//        user1dayActionFeature.setCtr(record.getString("user_ctr_1day"));
+//        user1dayActionFeature.setCvr(record.getString("user_cvr_1day"));
+//        userFeature.setDay1_cnt_features(user1dayActionFeature);
+//
+//        // 3day features
+//        AdActionFeature user3dayActionFeature = new AdActionFeature();
+//        user3dayActionFeature.setAdView(record.getString("user_view_3day"));
+//        user3dayActionFeature.setAdClick(record.getString("user_click_3day"));
+//        user3dayActionFeature.setAdConversion(record.getString("user_conversion_3day"));
+//        user3dayActionFeature.setCtr(record.getString("user_ctr_3day"));
+//        user3dayActionFeature.setCvr(record.getString("user_cvr_3day"));
+//        userFeature.setDay3_cnt_features(user3dayActionFeature);
+//
+//
+//        // 7day features
+//        AdActionFeature user7dayActionFeature = new AdActionFeature();
+//        user7dayActionFeature.setAdView(record.getString("user_view_7day"));
+//        user7dayActionFeature.setAdClick(record.getString("user_click7day"));
+//        user7dayActionFeature.setAdConversion(record.getString("user_conversion_7day"));
+//        user7dayActionFeature.setCtr(record.getString("user_ctr_7day"));
+//        user7dayActionFeature.setCvr(record.getString("user_cvr_7day"));
+//        userFeature.setDay7_cnt_features(user7dayActionFeature);
+//
+//        // 3month features
+//        AdActionFeature user3MonthActionFeature = new AdActionFeature();
+//        user3MonthActionFeature.setAdView(record.getString("user_view_3month"));
+//        user3MonthActionFeature.setAdClick(record.getString("user_click_3month"));
+//        user3MonthActionFeature.setAdConversion(record.getString("user_conversion_3month"));
+//        user3MonthActionFeature.setCtr(record.getString("user_ctr_3month"));
+//        user3MonthActionFeature.setCvr(record.getString("user_cvr_3month"));
+//        userFeature.setMonth3_cnt_features(user3MonthActionFeature);
+//
+//        return userFeature;
+//    }
+//
+//
+//    public static AdItemFeature constructItemFeature(Record record) {
+//        AdItemFeature itemFeature = new AdItemFeature();
+//
+//
+//        itemFeature.setAdId(record.getString("adid"));
+//        // itemFeature.setAdCode(record.getString("adcode"));
+//        itemFeature.setAdvertiserId(record.getString("advertiserid"));
+//        itemFeature.setCampaignId(record.getString("campaignid"));
+//        itemFeature.setCreativeId(record.getString("creativeid"));
+//
+//        // 1day features
+//        AdActionFeature user1dayActionFeature = new AdActionFeature();
+//        user1dayActionFeature.setAdView(record.getString("ad_view_1day"));
+//        user1dayActionFeature.setAdClick(record.getString("ad_click_1day"));
+//        user1dayActionFeature.setAdConversion(record.getString("ad_conversion_1day"));
+//        user1dayActionFeature.setCtr(record.getString("ad_ctr_1day"));
+//        user1dayActionFeature.setCvr(record.getString("ad_cvr_1day"));
+//        itemFeature.setDay1_cnt_features(user1dayActionFeature);
+//
+//        // 3day features
+//        AdActionFeature user3dayActionFeature = new AdActionFeature();
+//        user3dayActionFeature.setAdView(record.getString("ad_view_3day"));
+//        user3dayActionFeature.setAdClick(record.getString("ad_click_3day"));
+//        user3dayActionFeature.setAdConversion(record.getString("ad_conversion_3day"));
+//        user3dayActionFeature.setCtr(record.getString("ad_ctr_3day"));
+//        user3dayActionFeature.setCvr(record.getString("ad_cvr_3day"));
+//        itemFeature.setDay3_cnt_features(user3dayActionFeature);
+//
+//
+//        // 7day features
+//        AdActionFeature user7dayActionFeature = new AdActionFeature();
+//        user7dayActionFeature.setAdView(record.getString("ad_view_7day"));
+//        user7dayActionFeature.setAdClick(record.getString("ad_click_7day"));
+//        user7dayActionFeature.setAdConversion(record.getString("ad_conversion_7day"));
+//        user7dayActionFeature.setCtr(record.getString("ad_ctr_7day"));
+//        user7dayActionFeature.setCvr(record.getString("ad_cvr_7day"));
+//        itemFeature.setDay7_cnt_features(user7dayActionFeature);
+//
+//        // 3month features
+//        AdActionFeature user3MonthActionFeature = new AdActionFeature();
+//        user3MonthActionFeature.setAdView(record.getString("ad_view_3month"));
+//        user3MonthActionFeature.setAdClick(record.getString("ad_click_3month"));
+//        user3MonthActionFeature.setAdConversion(record.getString("ad_conversion_3month"));
+//        user3MonthActionFeature.setCtr(record.getString("ad_ctr_3month"));
+//        user3MonthActionFeature.setCvr(record.getString("ad_cvr_3month"));
+//        itemFeature.setMonth3_cnt_features(user3MonthActionFeature);
+//
+//
+//        //TODO  CREATIVE 维度  需要在样本中补齐
+//        AdActionFeature creative1dayFeature = new AdActionFeature();
+//        creative1dayFeature.setAdView(record.getString("view_creative_1day"));
+//        creative1dayFeature.setAdClick(record.getString("click_creative_1day"));
+//        creative1dayFeature.setAdConversion(record.getString("conversion_creative_1day"));
+//        creative1dayFeature.setCtr(record.getString("ctr_creative_1day"));
+//        creative1dayFeature.setCvr(record.getString("cvr_creative_1day"));
+//        itemFeature.setCreative_1day_cnt_features(creative1dayFeature);
+//
+//        // 3day features
+//        AdActionFeature creative3dayFeature = new AdActionFeature();
+//        creative3dayFeature.setAdView(record.getString("view_creative_3day"));
+//        creative3dayFeature.setAdClick(record.getString("click_creative_3day"));
+//        creative3dayFeature.setAdConversion(record.getString("conversion_creative_3day"));
+//        creative3dayFeature.setCtr(record.getString("ctr_creative_3day"));
+//        creative3dayFeature.setCvr(record.getString("cvr_creative_3day"));
+//        itemFeature.setCreative_3day_cnt_features(creative3dayFeature);
+//
+//
+//        // 7day features
+//        AdActionFeature creative7dayFeature = new AdActionFeature();
+//        creative7dayFeature.setAdView(record.getString("view_creative_7day"));
+//        creative7dayFeature.setAdClick(record.getString("click_creative_7day"));
+//        creative7dayFeature.setAdConversion(record.getString("conversion_creative_7day"));
+//        creative7dayFeature.setCtr(record.getString("ctr_creative_7day"));
+//        creative7dayFeature.setCvr(record.getString("cvr_creative_7day"));
+//        itemFeature.setCreative_7day_cnt_features(creative7dayFeature);
+//
+//        // 3month features
+//        AdActionFeature creative3MonthFeature = new AdActionFeature();
+//        creative3MonthFeature.setAdView(record.getString("view_creative_3month"));
+//        creative3MonthFeature.setAdClick(record.getString("click_creative_3month"));
+//        creative3MonthFeature.setAdConversion(record.getString("conversion_creative_3month"));
+//        creative3MonthFeature.setCtr(record.getString("ctr_creative_3month"));
+//        creative3MonthFeature.setCvr(record.getString("cvr_creative_3month"));
+//        itemFeature.setCreative_3month_cnt_features(creative3MonthFeature);
+//
+//
+//        // advertiser id
+//        // 1day features
+//        AdActionFeature advertiser1dayFeature = new AdActionFeature();
+//        advertiser1dayFeature.setAdView(record.getString("advertiser_view_1day"));
+//        advertiser1dayFeature.setAdClick(record.getString("advertiser_click_1day"));
+//        advertiser1dayFeature.setAdConversion(record.getString("advertiser_conversion_1day"));
+//        advertiser1dayFeature.setCtr(record.getString("advertiser_ctr_1day"));
+//        advertiser1dayFeature.setCvr(record.getString("advertiser_cvr_1day"));
+//        itemFeature.setAdvertiser_1day_cnt_features(advertiser1dayFeature);
+//
+//        // 3day features
+//        AdActionFeature advertiser3dayFeature = new AdActionFeature();
+//        advertiser3dayFeature.setAdView(record.getString("advertiser_view_3day"));
+//        advertiser3dayFeature.setAdClick(record.getString("advertiser_click_3day"));
+//        advertiser3dayFeature.setAdConversion(record.getString("advertiser_conversion_3day"));
+//        advertiser3dayFeature.setCtr(record.getString("advertiser_ctr_3day"));
+//        advertiser3dayFeature.setCvr(record.getString("advertiser_cvr_3day"));
+//        itemFeature.setAdvertiser_3day_cnt_features(advertiser3dayFeature);
+//
+//
+//        // 7day features
+//        AdActionFeature advertiser7dayFeature = new AdActionFeature();
+//        advertiser7dayFeature.setAdView(record.getString("advertiser_view_7day"));
+//        advertiser7dayFeature.setAdClick(record.getString("advertiser_click_7day"));
+//        advertiser7dayFeature.setAdConversion(record.getString("advertiser_conversion_7day"));
+//        advertiser7dayFeature.setCtr(record.getString("advertiser_ctr_7day"));
+//        advertiser7dayFeature.setCvr(record.getString("advertiser_cvr_7day"));
+//        itemFeature.setAdvertiser_7day_cnt_features(advertiser7dayFeature);
+//
+//        // 3month features
+//        AdActionFeature advertiser3monthFeature = new AdActionFeature();
+//        advertiser3monthFeature.setAdView(record.getString("advertiser_view_3month"));
+//        advertiser3monthFeature.setAdClick(record.getString("advertiser_view_3month"));
+//        advertiser3monthFeature.setAdConversion(record.getString("advertiser_conversion_3month"));
+//        advertiser3monthFeature.setCtr(record.getString("advertiser_ctr_3month"));
+//        advertiser3monthFeature.setCvr(record.getString("advertiser_cvr_3month"));
+//        itemFeature.setAdvertiser_3month_cnt_features(advertiser3monthFeature);
+//
+//
+//
+//        return itemFeature;
+//    }
+//
+//
+//}

+ 124 - 0
src/main/java/examples/extractor/ExtractorUtils.java

@@ -0,0 +1,124 @@
+package examples.extractor;
+
+import java.util.Map;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.List;
+public class ExtractorUtils {
+
+    public static Double division(String s1, String s2, Map<String, String> maps){
+        double rate = 0.0;
+        if (maps.containsKey(s1) && maps.containsKey(s2)){
+            Double d1 = Double.valueOf(maps.get(s1));
+            if (isDoubleEqualToZero(d1)){
+                return rate;
+            }
+            Double d2 = Double.valueOf(maps.get(s2));
+            rate = d2 / d1;
+        }
+        return rate;
+    }
+    public static Double divisionDouble(Double d1, Double d2){
+        double rate = 0.0;
+        if (isDoubleEqualToZero(d1)){
+            return rate;
+        }
+        rate = d2 / d1;
+        return rate;
+    }
+
+    public static boolean isDoubleEqualToZero(double value) {
+        final double epsilon = 1e-10; // 定义一个很小的误差范围
+        // 判断value是否在误差范围内
+        return Math.abs(value) < epsilon;
+    }
+
+    public static double ceilLog(Double key) {
+        double bucket = Math.ceil(Math.log(key + 1.0) * 100);
+        if (bucket > 100L) {
+            bucket = 100L;
+        }
+        if (bucket < 0) {
+            bucket = 0;
+        }
+        return  (double)bucket;
+    }
+
+    public static double bucketRatioFeature(Double key) {
+//        long bucket = Math.round(Math.log((key + 1.0) * 10));
+//        if (bucket > 50L) {
+//            bucket = 50L;
+//        }
+        double bucket = Math.round(Math.pow(key, 0.5) * 100);
+        if (bucket > 100L) {
+            bucket = 100L;
+        }
+        if (bucket < 0) {
+            bucket = 0;
+        }
+        return (double)bucket;
+    }
+
+    public static double calculateVariance(List<Double> numbers) {
+        double average = numbers.stream()
+                .mapToDouble(Double::doubleValue)
+                .average()
+                .orElse(0.0);
+
+        double squaredDiffSum = numbers.stream()
+                .mapToDouble(Double::doubleValue)
+                .map(x -> Math.pow(x - average, 2))
+                .average()
+                .orElse(0.0);
+
+        return squaredDiffSum;
+    }
+
+    public static double calculateAverage(List<Double> numbers) {
+        if (numbers == null || numbers.isEmpty()) {
+            return 0.0;
+        }
+        return numbers.stream()
+                .mapToDouble(Number::doubleValue)
+                .average()
+                .orElse(0.0);
+    }
+
+    public static List<Double> calculateDifferences(List<Double> numbers) {
+        List<Double> differences = new ArrayList<>();
+
+        for (int i = 0; i < numbers.size() - 1; i++) {
+            Double diff = numbers.get(i + 1) - numbers.get(i);
+            differences.add(diff);
+        }
+
+        return differences;
+    }
+
+    public static List<String> generateHourStrings(String timeString, int N) {
+        LocalDateTime dateTime = LocalDateTime.parse(timeString, DateTimeFormatter.ofPattern("yyyyMMddHH"));
+        List<String> hourStrings = new ArrayList<>();
+        for (int i = 0; i < N; i++) {
+            hourStrings.add(dateTime.minusHours(i).format(DateTimeFormatter.ofPattern("yyyyMMddHH")));
+        }
+
+        return hourStrings;
+    }
+
+    public static String subtractHours(String inputDateTime, int hoursToSubtract) {
+        DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyyMMddHH");
+        LocalDateTime dateTime = LocalDateTime.parse(inputDateTime, formatter);
+        LocalDateTime subtractedDateTime = dateTime.minusHours(hoursToSubtract);
+        return subtractedDateTime.format(formatter);
+    }
+
+    public static void main(String[] args) {
+        System.out.println(bucketRatioFeature(0.1));
+        System.out.println(bucketRatioFeature(0.8));
+        System.out.println(bucketRatioFeature(0.01));
+        System.out.println(bucketRatioFeature(0.007));
+
+    }
+
+}

+ 291 - 0
src/main/java/examples/extractor/RankExtractorItemFeature.java

@@ -0,0 +1,291 @@
+package examples.extractor;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+public class RankExtractorItemFeature {
+    public static Map<String, String> getItemRateFeature(Map<String, String> maps) {
+
+        double d;
+        Map<String, Double> result = new HashMap<>();
+        d = ExtractorUtils.division("i_1day_exp_cnt", "i_1day_click_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("i_1day_ctr",d);
+        }
+        d = ExtractorUtils.division("i_1day_exp_cnt", "i_1day_share_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("i_1day_str",d);
+        }
+        d = ExtractorUtils.division("i_1day_exp_cnt", "i_1day_return_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("i_1day_rov",d);
+        }
+        d = ExtractorUtils.division("i_1day_share_cnt", "i_1day_return_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("i_1day_ros",d);
+        }
+
+        d = ExtractorUtils.division("i_3day_exp_cnt", "i_3day_click_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("i_3day_ctr",d);
+        }
+        d = ExtractorUtils.division("i_3day_exp_cnt", "i_3day_share_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("i_3day_str",d);
+        }
+        d = ExtractorUtils.division("i_3day_exp_cnt", "i_3day_return_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("i_3day_rov",d);
+        }
+        d = ExtractorUtils.division("i_3day_share_cnt", "i_3day_return_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("i_3day_ros",d);
+        }
+
+        d = ExtractorUtils.division("i_7day_exp_cnt", "i_7day_click_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("i_7day_ctr",d);
+        }
+        d = ExtractorUtils.division("i_7day_exp_cnt", "i_7day_share_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("i_7day_str",d);
+        }
+        d = ExtractorUtils.division("i_7day_exp_cnt", "i_7day_return_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("i_7day_rov",d);
+        }
+        d = ExtractorUtils.division("i_7day_share_cnt", "i_7day_return_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("i_7day_ros",d);
+        }
+
+        d = ExtractorUtils.division("i_3month_exp_cnt", "i_3month_click_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("i_3month_ctr",d);
+        }
+        d = ExtractorUtils.division("i_3month_exp_cnt", "i_3month_share_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("i_3month_str",d);
+        }
+        d = ExtractorUtils.division("i_3month_exp_cnt", "i_3month_return_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("i_3month_rov",d);
+        }
+        d = ExtractorUtils.division("i_3month_share_cnt", "i_3month_return_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("i_3month_ros",d);
+        }
+
+
+        return rateFeatureChange(result);
+    }
+
+    public static Map<String, String> getItemRealtimeTrend(Map<String, Map<String, Double>> maps, String date, String hour){
+        Map<String, Double> result = new HashMap<>();
+        if (date.isEmpty() || hour.isEmpty()){
+            return rateFeatureChange(result);
+        }
+        int N = 6;
+
+        List<String> hourStrs = ExtractorUtils.generateHourStrings(date + hour, N);
+
+        String key;
+
+        key = "share_uv_list_1day";
+        if (maps.containsKey(key)){
+            Map<String, Double> fList = maps.get(key);
+            List<Double> arrs = hourStrs.stream().map(r -> fList.getOrDefault(r, 0.0D)).collect(Collectors.toList());
+            result.put(key+"_"+N+"_avg", ExtractorUtils.calculateAverage(arrs));
+            result.put(key+"_"+N+"_var", ExtractorUtils.calculateVariance(arrs));
+
+            List<Double> arrsDiff = ExtractorUtils.calculateDifferences(arrs);
+            result.put(key+"_diff_"+N+"_avg", ExtractorUtils.calculateAverage(arrsDiff));
+            result.put(key+"_diff_"+N+"_var", ExtractorUtils.calculateVariance(arrsDiff));
+        }
+
+        key = "return_uv_list_1day";
+        if (maps.containsKey(key)){
+            Map<String, Double> fList = maps.get(key);
+            List<Double> arrs = hourStrs.stream().map(r -> fList.getOrDefault(r, 0.0D)).collect(Collectors.toList());
+            result.put(key+"_"+N+"_avg", ExtractorUtils.calculateAverage(arrs));
+            result.put(key+"_"+N+"_var", ExtractorUtils.calculateVariance(arrs));
+
+            List<Double> arrsDiff = ExtractorUtils.calculateDifferences(arrs);
+            result.put(key+"_diff_"+N+"_avg", ExtractorUtils.calculateAverage(arrsDiff));
+            result.put(key+"_diff_"+N+"_var", ExtractorUtils.calculateVariance(arrsDiff));
+        }
+
+        key = "share_uv_list_1h";
+        if (maps.containsKey(key)){
+            Map<String, Double> fList = maps.get(key);
+            List<Double> arrs = hourStrs.stream().map(r -> fList.getOrDefault(r, 0.0D)).collect(Collectors.toList());
+            result.put(key+"_"+N+"_avg", ExtractorUtils.calculateAverage(arrs));
+            result.put(key+"_"+N+"_var", ExtractorUtils.calculateVariance(arrs));
+
+            List<Double> arrsDiff = ExtractorUtils.calculateDifferences(arrs);
+            result.put(key+"_diff_"+N+"_avg", ExtractorUtils.calculateAverage(arrsDiff));
+            result.put(key+"_diff_"+N+"_var", ExtractorUtils.calculateVariance(arrsDiff));
+        }
+
+        key = "return_uv_list_1h";
+        if (maps.containsKey(key)){
+            Map<String, Double> fList = maps.get(key);
+            List<Double> arrs = hourStrs.stream().map(r -> fList.getOrDefault(r, 0.0D)).collect(Collectors.toList());
+            result.put(key+"_"+N+"_avg", ExtractorUtils.calculateAverage(arrs));
+            result.put(key+"_"+N+"_var", ExtractorUtils.calculateVariance(arrs));
+
+            List<Double> arrsDiff = ExtractorUtils.calculateDifferences(arrs);
+            result.put(key+"_diff_"+N+"_avg", ExtractorUtils.calculateAverage(arrsDiff));
+            result.put(key+"_diff_"+N+"_var", ExtractorUtils.calculateVariance(arrsDiff));
+        }
+
+        return rateFeatureChange(result);
+    }
+
+
+    public static Map<String, String> rateFeatureChange(Map<String, Double> maps){
+        Map<String, String> result = new HashMap<>();
+        for (Map.Entry<String, Double> entry : maps.entrySet()){
+            Double value = ExtractorUtils.bucketRatioFeature(entry.getValue());
+            result.put(entry.getKey(), String.valueOf(value));
+        }
+        return result;
+    }
+
+    public static Map<String, String> cntFeatureChange(Map<String, String> maps,
+                                                       Set<String> names){
+        Map<String, String> result = new HashMap<>();
+        for (Map.Entry<String, String> entry : maps.entrySet()){
+            if (!names.contains(entry.getKey())){
+                continue;
+            }
+            Double value = ExtractorUtils.ceilLog(Double.valueOf(entry.getValue()));
+            result.put(entry.getKey(), String.valueOf(value));
+        }
+        return result;
+    }
+
+    public static Map<String, String> getItemRealtimeCnt(Map<String, Map<String, Double>> maps,
+                                                         Set<String> names,
+                                                         String date, String hour){
+        Map<String, String> result = new HashMap<>();
+        if (date.isEmpty() || hour.isEmpty()){
+            return result;
+        }
+        String dateHour = ExtractorUtils.subtractHours(date + hour, 1);
+        for (Map.Entry<String, Map<String, Double>> entry : maps.entrySet()){
+            if (!names.contains(entry.getKey())){
+                continue;
+            }
+            Double num = entry.getValue().getOrDefault(dateHour, 0.0);
+            if (!ExtractorUtils.isDoubleEqualToZero(num)){
+                result.put(entry.getKey(), String.valueOf(ExtractorUtils.ceilLog(num)));
+            }
+        }
+        return result;
+    }
+
+    public static Map<String, String> getItemRealtimeRate(Map<String, Map<String, Double>> maps,
+                                                         String date, String hour){
+        Map<String, Double> result = new HashMap<>();
+        if (date.isEmpty() || hour.isEmpty()){
+            return rateFeatureChange(result);
+        }
+        String dateHour = ExtractorUtils.subtractHours(date + hour, 1);
+
+        double d, d1, d2;
+        String k1, k2;
+
+        k1 = "view_pv_list_1day";
+        k2 = "play_pv_list_1day";
+        if (maps.containsKey(k1) && maps.containsKey(k2)){
+            d1 = maps.get(k1).getOrDefault(dateHour, 0.0);
+            d2 = maps.get(k2).getOrDefault(dateHour, 0.0);
+            d = ExtractorUtils.divisionDouble(d1, d2);
+            if (!ExtractorUtils.isDoubleEqualToZero(d)){
+                result.put("i_1day_ctr_rt", d);
+            }
+        }
+
+        k1 = "view_pv_list_1day";
+        k2 = "share_pv_list_1day";
+        if (maps.containsKey(k1) && maps.containsKey(k2)){
+            d1 = maps.get(k1).getOrDefault(dateHour, 0.0);
+            d2 = maps.get(k2).getOrDefault(dateHour, 0.0);
+            d = ExtractorUtils.divisionDouble(d1, d2);
+            if (!ExtractorUtils.isDoubleEqualToZero(d)){
+                result.put("i_1day_str_rt", d);
+            }
+        }
+
+        k1 = "share_pv_list_1day";
+        k2 = "return_uv_list_1day";
+        if (maps.containsKey(k1) && maps.containsKey(k2)){
+            d1 = maps.get(k1).getOrDefault(dateHour, 0.0);
+            d2 = maps.get(k2).getOrDefault(dateHour, 0.0);
+            d = ExtractorUtils.divisionDouble(d1, d2);
+            if (!ExtractorUtils.isDoubleEqualToZero(d)){
+                result.put("i_1day_ros_rt", d);
+            }
+        }
+
+        k1 = "view_pv_list_1day";
+        k2 = "return_uv_list_1day";
+        if (maps.containsKey(k1) && maps.containsKey(k2)){
+            d1 = maps.get(k1).getOrDefault(dateHour, 0.0);
+            d2 = maps.get(k2).getOrDefault(dateHour, 0.0);
+            d = ExtractorUtils.divisionDouble(d1, d2);
+            if (!ExtractorUtils.isDoubleEqualToZero(d)){
+                result.put("i_1day_rov_rt", d);
+            }
+        }
+
+        //---
+        k1 = "view_pv_list_1h";
+        k2 = "play_pv_list_1h";
+        if (maps.containsKey(k1) && maps.containsKey(k2)){
+            d1 = maps.get(k1).getOrDefault(dateHour, 0.0);
+            d2 = maps.get(k2).getOrDefault(dateHour, 0.0);
+            d = ExtractorUtils.divisionDouble(d1, d2);
+            if (!ExtractorUtils.isDoubleEqualToZero(d)){
+                result.put("i_1h_ctr_rt", d);
+            }
+        }
+
+        k1 = "view_pv_list_1h";
+        k2 = "share_pv_list_1h";
+        if (maps.containsKey(k1) && maps.containsKey(k2)){
+            d1 = maps.get(k1).getOrDefault(dateHour, 0.0);
+            d2 = maps.get(k2).getOrDefault(dateHour, 0.0);
+            d = ExtractorUtils.divisionDouble(d1, d2);
+            if (!ExtractorUtils.isDoubleEqualToZero(d)){
+                result.put("i_1h_str_rt", d);
+            }
+        }
+
+        k1 = "share_pv_list_1day";
+        k2 = "return_uv_list_1h";
+        if (maps.containsKey(k1) && maps.containsKey(k2)){
+            d1 = maps.get(k1).getOrDefault(dateHour, 0.0);
+            d2 = maps.get(k2).getOrDefault(dateHour, 0.0);
+            d = ExtractorUtils.divisionDouble(d1, d2);
+            if (!ExtractorUtils.isDoubleEqualToZero(d)){
+                result.put("i_1h_ros_rt", d);
+            }
+        }
+
+        k1 = "view_pv_list_1h";
+        k2 = "return_uv_list_1h";
+        if (maps.containsKey(k1) && maps.containsKey(k2)){
+            d1 = maps.get(k1).getOrDefault(dateHour, 0.0);
+            d2 = maps.get(k2).getOrDefault(dateHour, 0.0);
+            d = ExtractorUtils.divisionDouble(d1, d2);
+            if (!ExtractorUtils.isDoubleEqualToZero(d)){
+                result.put("i_1h_rov_rt", d);
+            }
+        }
+
+
+        return rateFeatureChange(result);
+    }
+}

+ 104 - 0
src/main/java/examples/extractor/RankExtractorUserFeature.java

@@ -0,0 +1,104 @@
+package examples.extractor;
+
+
+import java.util.*;
+
+public class RankExtractorUserFeature {
+    public static Map<String, String> getUserRateFeature(Map<String, String> maps) {
+
+        double d;
+        Map<String, Double> result = new HashMap<>();
+        d = ExtractorUtils.division("u_1day_exp_cnt", "u_1day_click_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("u_1day_ctr",d);
+        }
+        d = ExtractorUtils.division("u_1day_exp_cnt", "u_1day_share_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("u_1day_str",d);
+        }
+        d = ExtractorUtils.division("u_1day_exp_cnt", "u_1day_return_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("u_1day_rov",d);
+        }
+        d = ExtractorUtils.division("u_1day_share_cnt", "u_1day_return_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("u_1day_ros",d);
+        }
+
+        d = ExtractorUtils.division("u_3day_exp_cnt", "u_3day_click_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("u_3day_ctr",d);
+        }
+        d = ExtractorUtils.division("u_3day_exp_cnt", "u_3day_share_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("u_3day_str",d);
+        }
+        d = ExtractorUtils.division("u_3day_exp_cnt", "u_3day_return_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("u_3day_rov",d);
+        }
+        d = ExtractorUtils.division("u_3day_share_cnt", "u_3day_return_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("u_3day_ros",d);
+        }
+
+        d = ExtractorUtils.division("u_7day_exp_cnt", "u_7day_click_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("u_7day_ctr",d);
+        }
+        d = ExtractorUtils.division("u_7day_exp_cnt", "u_7day_share_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("u_7day_str",d);
+        }
+        d = ExtractorUtils.division("u_7day_exp_cnt", "u_7day_return_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("u_7day_rov",d);
+        }
+        d = ExtractorUtils.division("u_7day_share_cnt", "u_7day_return_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("u_7day_ros",d);
+        }
+
+        d = ExtractorUtils.division("u_3month_exp_cnt", "u_3month_click_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("u_3month_ctr",d);
+        }
+        d = ExtractorUtils.division("u_3month_exp_cnt", "u_3month_share_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("u_3month_str",d);
+        }
+        d = ExtractorUtils.division("u_3month_exp_cnt", "u_3month_return_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("u_3month_rov",d);
+        }
+        d = ExtractorUtils.division("u_3month_share_cnt", "u_3month_return_cnt", maps);
+        if (!ExtractorUtils.isDoubleEqualToZero(d)){
+            result.put("u_3month_ros",d);
+        }
+
+        return rateFeatureChange(result);
+    }
+
+
+    public static Map<String, String> rateFeatureChange(Map<String, Double> maps){
+        Map<String, String> result = new HashMap<>();
+        for (Map.Entry<String, Double> entry : maps.entrySet()){
+            Double value = ExtractorUtils.bucketRatioFeature(entry.getValue());
+            result.put(entry.getKey(), String.valueOf(value));
+        }
+        return result;
+    }
+
+    public static Map<String, String> cntFeatureChange(Map<String, String> maps, Set<String> names){
+        Map<String, String> result = new HashMap<>();
+        for (Map.Entry<String, String> entry : maps.entrySet()){
+            if (!names.contains(entry.getKey())){
+                continue;
+            }
+            Double value = ExtractorUtils.ceilLog(Double.valueOf(entry.getValue()));
+            result.put(entry.getKey(), String.valueOf(value));
+        }
+        return result;
+    }
+
+}

+ 257 - 0
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_06_strData.scala

@@ -0,0 +1,257 @@
+package com.aliyun.odps.spark.examples.makedata
+
+import com.aliyun.odps.TableSchema
+import com.aliyun.odps.data.Record
+import com.aliyun.odps.spark.examples.myUtils.{MyDateUtils, MyHdfsUtils, ParamUtils, env}
+import examples.dataloader.{OfflineVlogShareLRFeatureExtractor, RequestContextOffline}
+import examples.extractor.RankExtractorUserFeature
+import examples.extractor.RankExtractorItemFeature
+import org.apache.hadoop.io.compress.GzipCodec
+import org.apache.spark.sql.SparkSession
+
+import java.util
+import scala.collection.JavaConversions._
+import scala.collection.mutable
+import java.util.{Arrays, HashMap, HashSet, Map, Set}
+import com.alibaba.fastjson.JSONObject
+
+/*
+   注意:所有的构造特征,原始值为0.0时,当作无意义,不保留; 如果经过change变换,得到0.0,保留。
+ */
+
+object makedata_06_strData {
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder()
+      .appName(this.getClass.getName)
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    // 1 读取参数
+    val param = ParamUtils.parseArgs(args)
+    val tablePart = param.getOrElse("tablePart", "32").toInt
+    val partitionPrefix = param.getOrElse("partitionPrefix", "dt=")
+    val beginStr = param.getOrElse("beginStr", "20230101")
+    val endStr = param.getOrElse("endStr", "20230101")
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/sample_data/")
+    val project = param.getOrElse("project", "loghubods")
+    val table = param.getOrElse("table", "alg_recsys_view_sample_v2")
+
+
+    // 2 读取odps+表信息
+    val odpsOps = env.getODPS(sc)
+
+    // 3 循环执行数据生产
+    val dateRange = MyDateUtils.getDateRange(beginStr, endStr)
+    for (date <- dateRange) {
+      val partition = partitionPrefix + date
+      println("执行partiton:" + partition)
+      val odpsData = odpsOps.readTable(project = project,
+        table = table,
+        partition = partition,
+        transfer = func,
+        numPartition = tablePart)
+        .map(record => {
+
+          val originSecene = Set(
+            "apptype", "logtimestamp", "clientip", "ctx_day", "ctx_week", "ctx_hour", "ctx_region", "ctx_city"
+          )
+          val originUser = Set(
+            "gender", "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_sdkversion",
+            "machineinfo_system", "machineinfo_wechatversion", "gmt_create_user",
+            "u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
+            "u_3day_exp_cnt", "u_3day_click_cnt", "u_3day_share_cnt", "u_3day_return_cnt",
+            "u_7day_exp_cnt", "u_7day_click_cnt", "u_7day_share_cnt", "u_7day_return_cnt",
+            "u_3month_exp_cnt", "u_3month_click_cnt", "u_3month_share_cnt", "u_3month_return_cnt"
+          )
+          val originItem = Set(
+            "title", "tags", "total_time", "play_count_total",
+            "i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
+            "i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt",
+            "i_7day_exp_cnt", "i_7day_click_cnt", "i_7day_share_cnt", "i_7day_return_cnt",
+            "i_3month_exp_cnt", "i_3month_click_cnt", "i_3month_share_cnt", "i_3month_return_cnt"
+          )
+          val originItemRealtime = Set(
+            "view_pv_list_1day","view_uv_list_1day","play_pv_list_1day","play_uv_list_1day",
+            "share_pv_list_1day", "share_uv_list_1day","return_uv_list_1day",
+            "p_view_uv_list_1day","p_view_pv_list_1day","p_return_uv_list_1day",
+            "share_uv_list_2day","share_pv_list_2day","share_uv_list_3day","share_pv_list_3day",
+
+            "view_uv_list_1h","view_pv_list_1h","play_uv_list_1h","play_pv_list_1h",
+            "share_uv_list_1h","share_pv_list_1h","return_uv_list_1h","p_return_uv_list_1h"
+          )
+
+          val sceneFeatureMap = getFeatureFromSet(originSecene, record)
+          val userFeatureMap = getFeatureFromSet(originUser, record)
+          val itemFeatureMap = getFeatureFromSet(originItem, record)
+          val itemRealtimeFeatureMap = getFeatureFromSet(originItemRealtime, record).map(r => {
+            val m = new java.util.HashMap[String, Double]()
+            r._2.split(",").foreach(r => {
+              m.put(r.split(":")(0), r.split(":")(1).toDouble)
+            })
+            (r._1, m)
+          })
+          val javaMap = new HashMap[String, Map[String, java.lang.Double]]()
+          itemRealtimeFeatureMap.foreach { case (key, value) =>
+            val javaValue = new HashMap[String, java.lang.Double]()
+            value.foreach { case (innerKey, innerValue) =>
+              javaValue.put(innerKey, innerValue.asInstanceOf[java.lang.Double])
+            }
+            javaMap.put(key, javaValue)
+          }
+
+          val f1 = getFeatureFromSet(Set(
+            "apptype", "logtimestamp", "clientip", "ctx_day", "ctx_week", "ctx_hour", "ctx_region", "ctx_city",
+            "gender", "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_sdkversion",
+            "machineinfo_system", "machineinfo_wechatversion", "gmt_create_user",
+            "title", "tags"
+          ), record)
+          val f2 = RankExtractorUserFeature.getUserRateFeature(userFeatureMap)
+          val f3 = RankExtractorUserFeature.cntFeatureChange(userFeatureMap,
+            new util.HashSet[String](util.Arrays.asList(
+              "u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
+              "u_3day_exp_cnt", "u_3day_click_cnt", "u_3day_share_cnt", "u_3day_return_cnt",
+              "u_7day_exp_cnt", "u_7day_click_cnt", "u_7day_share_cnt", "u_7day_return_cnt",
+              "u_3month_exp_cnt", "u_3month_click_cnt", "u_3month_share_cnt", "u_3month_return_cnt"))
+          )
+          val f4 = RankExtractorItemFeature.getItemRateFeature(itemFeatureMap)
+          val f5 = RankExtractorItemFeature.cntFeatureChange(itemFeatureMap,
+            new util.HashSet[String](util.Arrays.asList(
+              "total_time", "play_count_total",
+              "i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
+              "i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt",
+              "i_7day_exp_cnt", "i_7day_click_cnt", "i_7day_share_cnt", "i_7day_return_cnt",
+              "i_3month_exp_cnt", "i_3month_click_cnt", "i_3month_share_cnt", "i_3month_return_cnt")))
+          val f6 = RankExtractorItemFeature.getItemRealtimeTrend(javaMap,
+            sceneFeatureMap.getOrElse("ctx_day", ""), sceneFeatureMap.getOrElse("ctx_hour", ""))
+          val f7 = RankExtractorItemFeature.getItemRealtimeCnt(javaMap,
+            new util.HashSet[String](util.Arrays.asList(
+              "view_pv_list_1day", "view_uv_list_1day", "play_pv_list_1day", "play_uv_list_1day",
+              "share_pv_list_1day", "share_uv_list_1day", "return_uv_list_1day",
+              "p_view_uv_list_1day", "p_view_pv_list_1day", "p_return_uv_list_1day",
+              "share_uv_list_2day", "share_pv_list_2day", "share_uv_list_3day", "share_pv_list_3day",
+
+              "view_uv_list_1h", "view_pv_list_1h", "play_uv_list_1h", "play_pv_list_1h",
+              "share_uv_list_1h", "share_pv_list_1h", "return_uv_list_1h", "p_return_uv_list_1h"
+            )),
+            sceneFeatureMap.getOrElse("ctx_day", ""),
+            sceneFeatureMap.getOrElse("ctx_hour", "")
+          )
+          val f8 = RankExtractorItemFeature.getItemRealtimeRate(javaMap,
+            sceneFeatureMap.getOrElse("ctx_day", ""),
+            sceneFeatureMap.getOrElse("ctx_hour", "")
+          )
+
+          // 1:特征聚合到map中
+          val result = new util.HashMap[String, String]()
+          result += f1
+          result += f2
+          result += f3
+          result += f4
+          result += f5
+          result += f6
+          result += f7
+          result += f8
+          val names = Set(
+            "ctx_week", "ctx_hour", "ctx_region", "ctx_city",
+            "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_system",
+            "u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
+            "u_3day_exp_cnt", "u_3day_click_cnt", "u_3day_share_cnt", "u_3day_return_cnt",
+            "total_time", "play_count_total",
+            "i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
+            "i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt",
+            "u_1day_ctr", "u_1day_str", "u_1day_rov", "u_1day_ros",
+            "u_3day_ctr", "u_3day_str", "u_3day_rov", "u_3day_ros",
+            "i_1day_ctr", "i_1day_str", "i_1day_rov", "i_1day_ros",
+            "i_3day_ctr", "i_3day_str", "i_3day_rov", "i_3day_ros",
+
+            "share_uv_list_1day_6_avg", "share_uv_list_1day_6_var", "share_uv_list_1day_diff_6_avg", "share_uv_list_1day_diff_6_var",
+            "return_uv_list_1day_6_avg", "return_uv_list_1day_6_var", "return_uv_list_1day_diff_6_avg", "return_uv_list_1day_diff_6_var",
+            "share_uv_list_1h_6_avg", "share_uv_list_1h_6_var", "share_uv_list_1h_diff_6_avg", "share_uv_list_1h_diff_6_var",
+            "return_uv_list_1h_6_avg", "return_uv_list_1h_6_var", "return_uv_list_1h_diff_6_avg", "return_uv_list_1h_diff_6_var",
+
+            "view_pv_list_1day", "view_uv_list_1day", "play_pv_list_1day", "play_uv_list_1day",
+            "share_pv_list_1day", "share_uv_list_1day", "return_uv_list_1day",
+            "p_view_uv_list_1day", "p_view_pv_list_1day", "p_return_uv_list_1day",
+            "share_uv_list_2day", "share_pv_list_2day", "share_uv_list_3day", "share_pv_list_3day",
+
+            "view_uv_list_1h", "view_pv_list_1h", "play_uv_list_1h", "play_pv_list_1h",
+            "share_uv_list_1h", "share_pv_list_1h", "return_uv_list_1h", "p_return_uv_list_1h",
+
+            "i_1day_ctr_rt", "i_1day_str_rt", "i_1day_ros_rt", "i_1day_rov_rt",
+            "i_1h_ctr_rt", "i_1h_str_rt", "i_1h_ros_rt", "i_1h_rov_rt"
+
+
+          )
+          val resultNew = new JSONObject
+          names.foreach(r => {
+            if (result.containsKey(r)){
+              resultNew.put(r, result.get(r))
+            }
+          })
+          //2: label聚合到map中
+          val labels = Set(
+            "is_share", "is_return", "playtime",
+            "is_play",
+            "share_ts", "share_ts_list", "return_mid_ts_list"
+          )
+          val labelNew = new JSONObject
+          val labelMap = getFeatureFromSet(labels, record)
+          labels.foreach(r => {
+            if (labelMap.containsKey(r)) {
+              labelMap.put(r, labelMap.get(r).get)
+            }
+          })
+          //3:记录唯一key
+          val mid = record.getString("mid")
+          val videoid = record.getString("videoid")
+          val logtimestamp = record.getString("logtimestamp")
+          val sessionid = record.getString("sessionid")
+
+          val logKey = (mid, videoid, logtimestamp, sessionid).productIterator.mkString("-")
+          val labelKey = labelNew.toString()
+          val featureKey = resultNew.toString()
+
+          logKey + "\t" + labelKey + "\t" + featureKey
+        })
+
+
+      // 4 保存数据到hdfs
+      val hdfsPath = savePath + "/" + partition
+      if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")){
+        println("删除路径并开始数据写入:" + hdfsPath)
+        MyHdfsUtils.delete_hdfs_path(hdfsPath)
+        odpsData.saveAsTextFile(hdfsPath, classOf[GzipCodec])
+      }else{
+        println("路径不合法,无法写入:" + hdfsPath)
+      }
+    }
+  }
+
+  def func(record: Record, schema: TableSchema): Record = {
+    record
+  }
+//
+//  def singleParse(record: Record, label: String): String = {
+//    //2 处理特征
+//    val reqContext: RequestContextOffline = new RequestContextOffline()
+//    reqContext.putUserFeature(record)
+//    reqContext.putItemFeature(record)
+//    reqContext.putSceneFeature(record)
+//    val bytesFeatureExtractor = new OfflineVlogShareLRFeatureExtractor()
+//    bytesFeatureExtractor.makeFeature(reqContext.featureMap)
+//    val featureMap = bytesFeatureExtractor.featureMap
+//    label + "\t" + featureMap.entries().map(r => r.getValue.getIdentifier + ":1").mkString("\t")
+//    ""
+//  }
+
+  def getFeatureFromSet(set: Set[String], record: Record): mutable.HashMap[String, String] = {
+    val result = mutable.HashMap[String, String]()
+    set.foreach(r =>{
+      if (!record.isNull(r)){
+        result.put(r, record.getString(r))
+      }
+    })
+    result
+  }
+}

+ 4 - 0
zhangbo/04_upload.sh

@@ -3,6 +3,10 @@ cat /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_sharev2_20231220
 dfs -put /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_sharev2_20231220_change.txt oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model/
 
 
+cat /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_sharev2_20231220.txt | sed '1d' | awk -F " " '{if($2!="0") print $1"\t"$2}' > /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_sharev2_20231220_change.txt
+dfs -put /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_sharev2_20240107_change.txt oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model/model_sharev2_20231220.txt
+
+
 
 cat /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_ros_v2_20231220.txt | sed '1d' | awk -F " " '{if($2!="0") print $1"\t"$2}' > /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_ros_v2_20231220_change.txt
 dfs -put /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_ros_v2_20231220_change.txt oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model/