zhangbo 1 éve
szülő
commit
40bdc2920c
2 módosított fájl, 99 hozzáadás és 100 törlés
  1. 1 1
      pom.xml
  2. 98 99
      src/main/java/examples/sparksql/SparkShareRatioSampleLoader.java

+ 1 - 1
pom.xml

@@ -52,7 +52,7 @@
         <dependency>
             <groupId>com.tzld.piaoquan</groupId>
             <artifactId>ad-engine-commons</artifactId>
-            <version>1.0.1</version>
+            <version>1.0.0</version>
         </dependency>
 
         <dependency>

+ 98 - 99
src/main/java/examples/sparksql/SparkShareRatioSampleLoader.java

@@ -1,99 +1,98 @@
-package examples.sparksql;
-
-import com.aliyun.odps.TableSchema;
-import com.aliyun.odps.data.Record;
-import com.google.common.collect.ListMultimap;
-import com.tzld.piaoquan.recommend.feature.domain.video.base.*;
-
-import examples.dataloader.RecommendSampleConstructor;
-import com.tzld.piaoquan.recommend.feature.domain.video.feature.VlogShareLRFeatureExtractor;
-import com.tzld.piaoquan.recommend.feature.gen.recommend.BaseFeature;
-import com.tzld.piaoquan.recommend.feature.gen.recommend.FeatureGroup;
-import org.apache.spark.SparkConf;
-import org.apache.spark.aliyun.odps.OdpsOps;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.api.java.function.Function2;
-
-import java.util.ArrayList;
-import java.util.Map;
-
-
-public class SparkShareRatioSampleLoader {
-
-    public static void main(String[] args) {
-
-        String partition = args[0];
-        String accessId = "LTAIWYUujJAm7CbH";
-        String accessKey = "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P";
-        String odpsUrl = "http://service.odps.aliyun.com/api";
-        String tunnelUrl = "http://dt.cn-hangzhou.maxcompute.aliyun-inc.com";
-        String project = "loghubods";
-        String table = "alg_recsys_view_sample";
-        String hdfsPath = "/dw/recommend/model/share_ratio_samples/" + partition;
-
-        SparkConf sparkConf = new SparkConf().setAppName("E-MapReduce Demo 3-2: Spark MaxCompute Demo (Java)");
-        JavaSparkContext jsc = new JavaSparkContext(sparkConf);
-        OdpsOps odpsOps = new OdpsOps(jsc.sc(), accessId, accessKey, odpsUrl, tunnelUrl);
-        System.out.println("Read odps table...");
-
-        JavaRDD<String> readData = odpsOps.readTableWithJava(project, table, partition, new RecordsToSamples(), Integer.valueOf(50));
-        readData.saveAsTextFile(hdfsPath);
-    }
-
-
-    static class RecordsToSamples implements Function2<Record, TableSchema, String> {
-        @Override
-        public String call(Record record, TableSchema schema) throws Exception {
-            String labelName = "share_ornot";
-            String ret = singleParse(record, labelName);
-            return ret;
-        }
-    }
-
-
-    // 单条日志处理逻辑
-    public static String singleParse(Record record, String labelName) {
-        // 数据解析
-        String label = record.getString(labelName);
-        if (label == null || label.equals("1")) {
-            label = "0";
-        } else {
-            label = "1";
-        }
-
-        // 从sql的 record中 初始化对象内容
-        RequestContext requestContext = RecommendSampleConstructor.constructRequestContext(record);
-        UserFeature userFeature = RecommendSampleConstructor.constructUserFeature(record);
-        ItemFeature itemFeature = RecommendSampleConstructor.constructItemFeature(record);
-
-        // 转化成bytes
-        RequestContextBytesFeature requestContextBytesFeature = new RequestContextBytesFeature(requestContext);
-        UserBytesFeature userBytesFeature = new UserBytesFeature(userFeature);
-        VideoBytesFeature videoBytesFeature = new VideoBytesFeature(itemFeature);
-
-        // 特征抽取
-        VlogShareLRFeatureExtractor bytesFeatureExtractor;
-        bytesFeatureExtractor = new VlogShareLRFeatureExtractor();
-
-        bytesFeatureExtractor.getUserFeatures(userBytesFeature);
-        bytesFeatureExtractor.getItemFeature(videoBytesFeature);
-        bytesFeatureExtractor.getContextFeatures(requestContextBytesFeature);
-
-        ListMultimap<FeatureGroup, BaseFeature> featureMap = bytesFeatureExtractor.getFeatures();
-        return parseSamplesToString(label, featureMap);
-    }
-
-    // 构建样本的字符串
-    public static String parseSamplesToString(String label, ListMultimap<FeatureGroup, BaseFeature> featureMap) {
-        ArrayList<String> featureList = new ArrayList<String>();
-        for (Map.Entry<FeatureGroup, BaseFeature> entry : featureMap.entries()) {
-            FeatureGroup groupedFeature = entry.getKey();
-            BaseFeature baseFeature = entry.getValue();
-            Long featureIdentifier = baseFeature.getIdentifier();
-            featureList.add(String.valueOf(featureIdentifier) + ":1");
-        }
-        return label + "\t" + String.join("\t", featureList);
-    }
-
-}
+//package examples.sparksql;
+//
+//import com.aliyun.odps.TableSchema;
+//import com.aliyun.odps.data.Record;
+//import com.google.common.collect.ListMultimap;
+//import com.tzld.piaoquan.data.base.*;
+//import examples.dataloader.RecommendSampleConstructor;
+//import com.tzld.piaoquan.data.score.feature.VlogShareLRFeatureExtractor;
+//import com.tzld.piaoquan.recommend.server.gen.recommend.BaseFeature;
+//import com.tzld.piaoquan.recommend.server.gen.recommend.FeatureGroup;
+//import org.apache.spark.SparkConf;
+//import org.apache.spark.aliyun.odps.OdpsOps;
+//import org.apache.spark.api.java.JavaRDD;
+//import org.apache.spark.api.java.JavaSparkContext;
+//import org.apache.spark.api.java.function.Function2;
+//
+//import java.util.ArrayList;
+//import java.util.Map;
+//
+//
+//public class SparkShareRatioSampleLoader {
+//
+//    public static void main(String[] args) {
+//
+//        String partition = args[0];
+//        String accessId = "LTAIWYUujJAm7CbH";
+//        String accessKey = "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P";
+//        String odpsUrl = "http://service.odps.aliyun.com/api";
+//        String tunnelUrl = "http://dt.cn-hangzhou.maxcompute.aliyun-inc.com";
+//        String project = "loghubods";
+//        String table = "alg_recsys_view_sample";
+//        String hdfsPath = "/dw/recommend/model/share_ratio_samples/" + partition;
+//
+//        SparkConf sparkConf = new SparkConf().setAppName("E-MapReduce Demo 3-2: Spark MaxCompute Demo (Java)");
+//        JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+//        OdpsOps odpsOps = new OdpsOps(jsc.sc(), accessId, accessKey, odpsUrl, tunnelUrl);
+//        System.out.println("Read odps table...");
+//
+//        JavaRDD<String> readData = odpsOps.readTableWithJava(project, table, partition, new RecordsToSamples(), Integer.valueOf(50));
+//        readData.saveAsTextFile(hdfsPath);
+//    }
+//
+//
+//    static class RecordsToSamples implements Function2<Record, TableSchema, String> {
+//        @Override
+//        public String call(Record record, TableSchema schema) throws Exception {
+//            String labelName = "share_ornot";
+//            String ret = singleParse(record, labelName);
+//            return ret;
+//        }
+//    }
+//
+//
+//    // 单条日志处理逻辑
+//    public static String singleParse(Record record, String labelName) {
+//        // 数据解析
+//        String label = record.getString(labelName);
+//        if (label == null || label.equals("1")) {
+//            label = "0";
+//        } else {
+//            label = "1";
+//        }
+//
+//        // 从sql的 record中 初始化对象内容
+//        RequestContext requestContext = RecommendSampleConstructor.constructRequestContext(record);
+//        UserFeature userFeature = RecommendSampleConstructor.constructUserFeature(record);
+//        ItemFeature itemFeature = RecommendSampleConstructor.constructItemFeature(record);
+//
+//        // 转化成bytes
+//        RequestContextBytesFeature requestContextBytesFeature = new RequestContextBytesFeature(requestContext);
+//        UserBytesFeature userBytesFeature = new UserBytesFeature(userFeature);
+//        VideoBytesFeature videoBytesFeature = new VideoBytesFeature(itemFeature);
+//
+//        // 特征抽取
+//        VlogShareLRFeatureExtractor bytesFeatureExtractor;
+//        bytesFeatureExtractor = new VlogShareLRFeatureExtractor();
+//
+//        bytesFeatureExtractor.getUserFeatures(userBytesFeature);
+//        bytesFeatureExtractor.getItemFeature(videoBytesFeature);
+//        bytesFeatureExtractor.getContextFeatures(requestContextBytesFeature);
+//
+//        ListMultimap<FeatureGroup, BaseFeature> featureMap = bytesFeatureExtractor.getFeatures();
+//        return parseSamplesToString(label, featureMap);
+//    }
+//
+//    // 构建样本的字符串
+//    public static String parseSamplesToString(String label, ListMultimap<FeatureGroup, BaseFeature> featureMap) {
+//        ArrayList<String> featureList = new ArrayList<String>();
+//        for (Map.Entry<FeatureGroup, BaseFeature> entry : featureMap.entries()) {
+//            FeatureGroup groupedFeature = entry.getKey();
+//            BaseFeature baseFeature = entry.getValue();
+//            Long featureIdentifier = baseFeature.getIdentifier();
+//            featureList.add(String.valueOf(featureIdentifier) + ":1");
+//        }
+//        return label + "\t" + String.join("\t", featureList);
+//    }
+//
+//}