丁云鹏 8 mesiacov pred
rodič
commit
a292566be1

+ 10 - 71
recommend-model-produce/pom.xml

@@ -13,13 +13,11 @@
 
     <properties>
         <!--        <spark.version>3.3.1</spark.version>-->
-        <!--        <scala.version>2.12.15</scala.version>-->
         <!--        <spark.version>2.3.0</spark.version>-->
         <spark.version>2.4.8</spark.version>
-        <scala.version>2.11.8</scala.version>
+        <scala.version>2.12.15</scala.version>
         <emr.version>2.0.0</emr.version>
         <java.version>1.8</java.version>
-        <odps.version>0.48.4-public</odps.version>
         <fastjson.version>1.2.45</fastjson.version>
 
         <maven.compiler.source>8</maven.compiler.source>
@@ -29,7 +27,7 @@
     <dependencies>
         <dependency>
             <groupId>org.apache.spark</groupId>
-            <artifactId>spark-core_2.11</artifactId>
+            <artifactId>spark-core_2.12</artifactId>
             <version>${spark.version}</version>
             <exclusions>
                 <exclusion>
@@ -72,7 +70,7 @@
         </dependency>
         <dependency>
             <groupId>org.apache.spark</groupId>
-            <artifactId>spark-mllib_2.11</artifactId>
+            <artifactId>spark-mllib_2.12</artifactId>
             <version>${spark.version}</version>
             <exclusions>
                 <exclusion>
@@ -112,11 +110,11 @@
                 </exclusion>
             </exclusions>
         </dependency>
-<!--        <dependency>-->
-<!--            <groupId>org.glassfish.jersey</groupId>-->
-<!--            <artifactId>project</artifactId>-->
-<!--            <version>2.22.2</version>-->
-<!--        </dependency>-->
+        <!--        <dependency>-->
+        <!--            <groupId>org.glassfish.jersey</groupId>-->
+        <!--            <artifactId>project</artifactId>-->
+        <!--            <version>2.22.2</version>-->
+        <!--        </dependency>-->
         <dependency>
             <groupId>org.glassfish.jersey.core</groupId>
             <artifactId>jersey-common</artifactId>
@@ -174,25 +172,6 @@
             <artifactId>netty-all</artifactId>
             <version>4.1.17.Final</version>
         </dependency>
-        <dependency>
-            <groupId>com.aliyun.emr</groupId>
-            <artifactId>emr-maxcompute_2.11</artifactId>
-            <version>${emr.version}</version>
-            <exclusions>
-                <exclusion>
-                    <artifactId>spark-core_2.11</artifactId>
-                    <groupId>org.apache.spark</groupId>
-                </exclusion>
-                <exclusion>
-                    <artifactId>spark-catalyst_2.11</artifactId>
-                    <groupId>org.apache.spark</groupId>
-                </exclusion>
-                <exclusion>
-                    <artifactId>guava</artifactId>
-                    <groupId>com.google.guava</groupId>
-                </exclusion>
-            </exclusions>
-        </dependency>
         <dependency>
             <groupId>org.scala-lang</groupId>
             <artifactId>scala-library</artifactId>
@@ -204,51 +183,11 @@
             <artifactId>fastjson</artifactId>
             <version>${fastjson.version}</version>
         </dependency>
-        <dependency>
-            <groupId>redis.clients</groupId>
-            <artifactId>jedis</artifactId>
-            <version>5.1.3</version>
-        </dependency>
-        <dependency>
-            <groupId>com.aliyun.odps</groupId>
-            <artifactId>odps-sdk-core</artifactId>
-            <version>${odps.version}</version>
-            <exclusions>
-                <exclusion>
-                    <groupId>org.codehaus.jackson</groupId>
-                    <artifactId>jackson-mapper-asl</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.codehaus.jackson</groupId>
-                    <artifactId>jackson-core-asl</artifactId>
-                </exclusion>
-                <exclusion>
-                    <artifactId>guava</artifactId>
-                    <groupId>com.google.guava</groupId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-        <dependency>
-            <groupId>com.aliyun.odps</groupId>
-            <artifactId>odps-sdk-commons</artifactId>
-            <version>${odps.version}</version>
-        </dependency>
         <dependency>
             <groupId>org.projectlombok</groupId>
             <artifactId>lombok</artifactId>
             <version>1.18.24</version>
         </dependency>
-        <dependency>
-            <groupId>com.ctrip.framework.apollo</groupId>
-            <artifactId>apollo-client</artifactId>
-            <version>1.8.0</version>
-            <exclusions>
-                <exclusion>
-                    <artifactId>guava</artifactId>
-                    <groupId>com.google.guava</groupId>
-                </exclusion>
-            </exclusions>
-        </dependency>
         <dependency>
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-simple</artifactId>
@@ -261,8 +200,8 @@
         </dependency>
         <dependency>
             <groupId>ml.dmlc</groupId>
-            <artifactId>xgboost4j-spark</artifactId>
-            <version>0.90</version>
+            <artifactId>xgboost4j-spark_2.12</artifactId>
+            <version>1.7.6</version>
             <exclusions>
                 <exclusion>
                     <artifactId>scala-library</artifactId>

+ 0 - 109
recommend-model-produce/src/main/java/com/tzld/piaoquan/recommend/model/produce/service/ODPSService.java

@@ -1,109 +0,0 @@
-package com.tzld.piaoquan.recommend.model.produce.service;
-
-import com.aliyun.odps.Instance;
-import com.aliyun.odps.Odps;
-import com.aliyun.odps.OdpsException;
-import com.aliyun.odps.TableSchema;
-import com.aliyun.odps.account.Account;
-import com.aliyun.odps.account.AliyunAccount;
-import com.aliyun.odps.data.Record;
-import com.aliyun.odps.data.SimpleJsonValue;
-import com.aliyun.odps.task.SQLTask;
-import com.google.common.base.Joiner;
-import com.tzld.piaoquan.recommend.model.produce.util.CommonCollectionUtils;
-import lombok.extern.slf4j.Slf4j;
-import org.apache.spark.aliyun.odps.OdpsOps;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.api.java.function.Function2;
-
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * https://help.aliyun.com/zh/maxcompute/user-guide/java-sdk-1/?spm=a2c4g.11174283.0.0.6d0111c1E15lI3
- *
- * @author dyp
- */
-@Slf4j
-public class ODPSService {
-    private final String accessId = "LTAIWYUujJAm7CbH";
-    private final String accessKey = "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P";
-    private final String odpsUrl = "http://service.odps.aliyun.com/api";
-    private final String tunnelUrl = "http://dt.cn-hangzhou.maxcompute.aliyun.com";
-    private final String sqlFormat = "select %s from %s where 1=1 %s ;";
-    private final String countSqlFormat = "select count(1) as count from %s where 1=1 %s ;";
-
-
-    public JavaRDD<Map<String, String>> read(JavaSparkContext jsc, String project, String table, String partition,
-                                             int partitionNum) {
-        OdpsOps odpsOps = new OdpsOps(jsc.sc(), accessId, accessKey, odpsUrl, tunnelUrl);
-
-        JavaRDD<Map<String, String>> readData = odpsOps.readTableWithJava(project, table, partition,
-                new RecordToMap(), partitionNum);
-        return readData;
-    }
-
-    static class RecordToMap implements Function2<Record, TableSchema, Map<String, String>> {
-        private List<String> cols;
-
-        public RecordToMap(List<String> cols) {
-            this.cols = cols;
-        }
-
-        public RecordToMap() {
-        }
-
-        @Override
-        public Map<String, String> call(Record r, TableSchema schema) {
-            Map<String, String> map = new HashMap<>();
-            for (int i = 0; i < schema.getColumns().size(); i++) {
-                if (cols == null || cols.contains(r.getColumns()[i].getName())) {
-                    Object obj = r.get(i);
-                    if (obj instanceof SimpleJsonValue) {
-                        map.put(r.getColumns()[i].getName(), ((SimpleJsonValue) obj).toString());
-                    } else if (obj instanceof Long) {
-                        map.put(r.getColumns()[i].getName(), ((Long) obj) + "");
-                    } else {
-                        map.put(r.getColumns()[i].getName(), r.getString(i));
-                    }
-                }
-            }
-            return map;
-        }
-    }
-
-    private List<Map<String, String>> read(String project,
-                                           String table,
-                                           List<String> colNames,
-                                           String condition) {
-        Account account = new AliyunAccount(accessId, accessKey);
-        Odps odps = new Odps(account);
-        odps.setEndpoint(odpsUrl);
-        odps.setDefaultProject(project);
-
-        String sql = String.format(sqlFormat, Joiner.on(",").join(colNames), table, condition);
-
-        List<Record> records;
-        try {
-            Instance i = SQLTask.run(odps, sql);
-            i.waitForSuccess();
-            records = SQLTask.getResult(i);
-        } catch (OdpsException e) {
-            log.error("request odps error", e);
-            return Collections.emptyList();
-        }
-
-        List<Map<String, String>> fieldValues = CommonCollectionUtils.toList(records, r -> {
-            Map<String, String> map = new HashMap<>();
-            for (int i = 0; i < r.getColumnCount(); i++) {
-                map.put(r.getColumns()[i].getName(), r.getString(i));
-            }
-            return map;
-        });
-
-        return fieldValues;
-    }
-}

+ 1 - 1
recommend-model-produce/src/main/java/com/tzld/piaoquan/recommend/model/produce/xgboost/XGBoostTrain.java

@@ -124,7 +124,7 @@ public class XGBoostTrain {
 
             // 显示预测结果
             Dataset<Row> predictions = model.transform(testData);
-            predictions.select("label", "prediction").show();
+            predictions.select("label", "prediction").show(1000);
 
 
         } catch (Throwable e) {