丁云鹏 hai 8 meses
pai
achega
888a54240e

+ 82 - 1
recommend-model-produce/pom.xml

@@ -14,7 +14,7 @@
     <properties>
         <!--        <spark.version>3.3.1</spark.version>-->
         <!--        <scala.version>2.12.15</scala.version>-->
-<!--        <spark.version>2.3.0</spark.version>-->
+        <!--        <spark.version>2.3.0</spark.version>-->
         <spark.version>2.4.8</spark.version>
         <scala.version>2.11.8</scala.version>
         <emr.version>2.0.0</emr.version>
@@ -48,6 +48,26 @@
                     <artifactId>guava</artifactId>
                     <groupId>com.google.guava</groupId>
                 </exclusion>
+                <exclusion>
+                    <artifactId>jersey-server</artifactId>
+                    <groupId>org.glassfish.jersey.core</groupId>
+                </exclusion>
+                <exclusion>
+                    <artifactId>jersey-common</artifactId>
+                    <groupId>org.glassfish.jersey.core</groupId>
+                </exclusion>
+                <exclusion>
+                    <artifactId>jersey-client</artifactId>
+                    <groupId>org.glassfish.jersey.core</groupId>
+                </exclusion>
+                <exclusion>
+                    <artifactId>jersey-container-servlet</artifactId>
+                    <groupId>org.glassfish.jersey.containers</groupId>
+                </exclusion>
+                <exclusion>
+                    <artifactId>jersey-container-servlet-core</artifactId>
+                    <groupId>org.glassfish.jersey.containers</groupId>
+                </exclusion>
             </exclusions>
         </dependency>
         <dependency>
@@ -73,6 +93,67 @@
                 </exclusion>
             </exclusions>
         </dependency>
+        <dependency>
+            <groupId>org.glassfish.jersey.core</groupId>
+            <artifactId>jersey-server</artifactId>
+            <version>2.22.2</version>
+            <exclusions>
+                <exclusion>
+                    <artifactId>jersey-media-jaxb</artifactId>
+                    <groupId>org.glassfish.jersey.media</groupId>
+                </exclusion>
+                <exclusion>
+                    <artifactId>jersey-client</artifactId>
+                    <groupId>org.glassfish.jersey.core</groupId>
+                </exclusion>
+                <exclusion>
+                    <artifactId>jersey-common</artifactId>
+                    <groupId>org.glassfish.jersey.core</groupId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+<!--        <dependency>-->
+<!--            <groupId>org.glassfish.jersey</groupId>-->
+<!--            <artifactId>project</artifactId>-->
+<!--            <version>2.22.2</version>-->
+<!--        </dependency>-->
+        <dependency>
+            <groupId>org.glassfish.jersey.core</groupId>
+            <artifactId>jersey-common</artifactId>
+            <version>2.22.2</version>
+        </dependency>
+        <dependency>
+            <groupId>org.glassfish.jersey.containers</groupId>
+            <artifactId>jersey-container-servlet</artifactId>
+            <version>2.22.2</version>
+        </dependency>
+        <dependency>
+            <groupId>org.glassfish.jersey.containers</groupId>
+            <artifactId>jersey-container-servlet-core</artifactId>
+            <version>2.22.2</version>
+        </dependency>
+        <dependency>
+            <groupId>org.glassfish.jersey.media</groupId>
+            <artifactId>jersey-media-jaxb</artifactId>
+            <version>2.22.2</version>
+            <exclusions>
+                <exclusion>
+                    <artifactId>jersey-common</artifactId>
+                    <groupId>org.glassfish.jersey.core</groupId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>org.glassfish.jersey.core</groupId>
+            <artifactId>jersey-client</artifactId>
+            <version>2.22.2</version>
+            <exclusions>
+                <exclusion>
+                    <artifactId>jersey-common</artifactId>
+                    <groupId>org.glassfish.jersey.core</groupId>
+                </exclusion>
+            </exclusions>
+        </dependency>
         <dependency>
             <groupId>org.codehaus.janino</groupId>
             <artifactId>janino</artifactId>

+ 5 - 4
recommend-model-produce/src/main/java/com/tzld/piaoquan/recommend/model/produce/xgboost/XGBoostTrain.java

@@ -60,21 +60,22 @@ public class XGBoostTrain {
 
             SparkSession spark = SparkSession.builder()
                     .appName("XGBoostTrain")
-                    //.master("local")
+                    .master("local")
                     .getOrCreate();
 
             JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
             String file = "/dw/recommend/model/33_ad_train_data_v4/20240726/part-00099.gz";
+            file = "/Users/dingyunpeng/Desktop/part-00099.gz";
             JavaRDD<String> rdd = jsc.textFile(file);
 
             // 将 RDD[LabeledPoint] 转换为 JavaRDD<Row>
             JavaRDD<Row> rowRDD = rdd.map(s -> {
-                String[] line = StringUtils.split("\t");
+                String[] line = StringUtils.split(s, '\t');
                 int label = NumberUtils.toInt(line[0]);
                 // 选特征
                 Map<String, Double> map = new HashMap<>();
                 for (int i = 1; i < line.length; i++) {
-                    String[] fv = StringUtils.split(":");
+                    String[] fv = StringUtils.split(line[i], ':');
                     map.put(fv[0], NumberUtils.toDouble(fv[1], 0.0));
                 }
 
@@ -88,7 +89,7 @@ public class XGBoostTrain {
                 return RowFactory.create(label, vector);
             });
 
-            // log.info("rowRDD count {}", rowRDD.count());
+            log.info("rowRDD count {}", rowRDD.count());
             // 将 JavaRDD<Row> 转换为 Dataset<Row>
             List<StructField> fields = new ArrayList<>();
             fields.add(DataTypes.createStructField("label", DataTypes.IntegerType, true));