sunmingze 1 vuosi sitten
vanhempi
commit
a96c345bc3

+ 36 - 19
pom.xml

@@ -33,6 +33,19 @@
     <packaging>jar</packaging>
 
     <dependencies>
+        <dependency>
+            <groupId>com.google.protobuf</groupId>
+            <artifactId>protobuf-java</artifactId>
+            <version>3.12.0</version>
+        </dependency>
+
+        <dependency>
+            <groupId>com.tzld.piaoquan</groupId>
+            <artifactId>recommend-server-client</artifactId>
+            <version>1.0.1</version>
+        </dependency>
+
+
         <dependency>
             <groupId>org.apache.spark</groupId>
             <artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -45,6 +58,7 @@
                 </exclusion>
             </exclusions>
         </dependency>
+
         <dependency>
             <groupId>org.apache.spark</groupId>
             <artifactId>spark-sql_${scala.binary.version}</artifactId>
@@ -57,17 +71,18 @@
                 </exclusion>
             </exclusions>
         </dependency>
+
         <dependency>
             <groupId>org.apache.spark</groupId>
             <artifactId>spark-mllib_${scala.binary.version}</artifactId>
             <version>${spark.version}</version>
             <scope>provided</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-            <version>${spark.version}</version>
-            <scope>provided</scope>
+            <exclusions>
+                <exclusion>
+                    <artifactId>protobuf-java</artifactId>
+                    <groupId>com.google.protobuf</groupId>
+                </exclusion>
+            </exclusions>
         </dependency>
 
         <dependency>
@@ -87,21 +102,15 @@
             <groupId>com.aliyun.odps</groupId>
             <artifactId>hadoop-fs-oss</artifactId>
             <version>${cupid.sdk.version}</version>
+            <exclusions>
+                <exclusion>
+                    <artifactId>protobuf-java</artifactId>
+                    <groupId>com.google.protobuf</groupId>
+                </exclusion>
+            </exclusions>
         </dependency>
 
 
-        <dependency>
-            <groupId>com.tzld.piaoquan</groupId>
-            <artifactId>recommend-server-client</artifactId>
-            <version>1.0.1</version>
-        </dependency>
-
-        <dependency>
-            <groupId>com.google.protobuf</groupId>
-            <artifactId>protobuf-java</artifactId>
-            <version>3.12.0</version>
-        </dependency>
-
         <dependency>
             <groupId>org.springframework.boot</groupId>
             <artifactId>spring-boot-starter-data-redis</artifactId>
@@ -134,8 +143,10 @@
                     <artifactId>aliyun-sdk-mns</artifactId>
                 </exclusion>
             </exclusions>
+
         </dependency>
 
+
         <dependency>
             <groupId>com.aliyun.emr</groupId>
             <artifactId>emr-maxcompute_2.11</artifactId>
@@ -150,7 +161,7 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-shade-plugin</artifactId>
-                <version>2.4.3</version>
+                <version>3.2.4</version>
                 <executions>
                     <execution>
                         <phase>package</phase>
@@ -158,6 +169,12 @@
                             <goal>shade</goal>
                         </goals>
                         <configuration>
+                            <relocations>
+                                <relocation>
+                                    <pattern>com.google.protobuf</pattern>
+                                    <shadedPattern>shaded.com.google.protobuf</shadedPattern>
+                                </relocation>
+                            </relocations>
                             <minimizeJar>false</minimizeJar>
                             <shadedArtifactAttached>true</shadedArtifactAttached>
                             <artifactSet>

+ 5 - 5
src/main/java/com/aliyun/odps/spark/examples/sparksql/SparkEMRShareRatioSampleLoader.java

@@ -4,7 +4,7 @@ import com.aliyun.odps.TableSchema;
 import com.aliyun.odps.data.Record;
 import com.google.common.collect.ListMultimap;
 import com.tzld.piaoquan.data.base.*;
-import com.tzld.piaoquan.data.dataloader.FeatureConstructor;
+import com.tzld.piaoquan.data.dataloader.SampleFeatureConstructor;
 import com.tzld.piaoquan.data.score.feature.VlogShareLRFeatureExtractor;
 import com.tzld.piaoquan.recommend.server.gen.recommend.BaseFeature;
 import com.tzld.piaoquan.recommend.server.gen.recommend.FeatureGroup;
@@ -22,7 +22,7 @@ public class SparkEMRShareRatioSampleLoader {
 
     public static void main(String[] args) {
 
-        String partition = "dt=20231123";
+        String partition = "dt=20231211";
         String accessId = "LTAIWYUujJAm7CbH";
         String accessKey = "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P";
         String odpsUrl = "http://service.odps.aliyun.com/api";
@@ -60,9 +60,9 @@ public class SparkEMRShareRatioSampleLoader {
         }
 
         // 从sql的 record中 初始化对象内容
-        RequestContext requestContext = FeatureConstructor.constructRequestContext(record);
-        UserFeature userFeature = FeatureConstructor.constructUserFeature(record);
-        ItemFeature itemFeature = FeatureConstructor.constructItemFeature(record);
+        RequestContext requestContext = SampleFeatureConstructor.constructRequestContext(record);
+        UserFeature userFeature = SampleFeatureConstructor.constructUserFeature(record);
+        ItemFeature itemFeature = SampleFeatureConstructor.constructItemFeature(record);
 
         // 转化成bytes
         RequestContextBytesFeature requestContextBytesFeature = new RequestContextBytesFeature(requestContext);

+ 2 - 2
src/main/java/com/aliyun/odps/spark/examples/sparksql/SparkLoadDataRedisMaxCompute.java

@@ -4,7 +4,7 @@ package com.aliyun.odps.spark.examples.sparksql;
 
 import com.aliyun.odps.data.Record;
 import com.tzld.piaoquan.data.base.UserFeature;
-import com.tzld.piaoquan.data.dataloader.FeatureConstructor;
+import com.tzld.piaoquan.data.dataloader.SampleFeatureConstructor;
 import org.apache.spark.sql.SparkSession;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.Row;
@@ -38,7 +38,7 @@ public class SparkLoadDataRedisMaxCompute {
 
     public static void loadFeatureToRedis(RedisTemplate<String, String> redisTemplate, Record line) {
         Map<String, String> userFeaRedisFormat = new HashMap<String, String>();
-        UserFeature userFeature = FeatureConstructor.constructUserFeature(line);
+        UserFeature userFeature = SampleFeatureConstructor.constructUserFeature(line);
         String key = String.format(userKeyFormat, userFeature.getKey());
         String value = userFeature.getValue();
         userFeaRedisFormat.put(key, value);

+ 7 - 8
src/main/java/com/tzld/piaoquan/data/dataloader/FeatureConstructor.java → src/main/java/com/tzld/piaoquan/data/dataloader/SampleFeatureConstructor.java

@@ -8,12 +8,11 @@ import com.tzld.piaoquan.data.base.ItemFeature;
 import com.tzld.piaoquan.data.base.RequestContext;
 import com.tzld.piaoquan.data.base.UserActionFeature;
 import com.tzld.piaoquan.data.base.UserFeature;
-import org.apache.spark.sql.Row;
 
 import java.util.HashMap;
 import java.util.Map;
 
-public class FeatureConstructor {
+public class SampleFeatureConstructor {
 
     private static final String BUCKET_NAME = "ali-recommend";
     private static final Map<String, String> ODPS_CONFIG = new HashMap<String, String>();
@@ -109,12 +108,12 @@ public class FeatureConstructor {
 
     public static ItemFeature constructItemFeature(Record record) {
         ItemFeature itemFeature = new ItemFeature();
-        itemFeature.setVideoId(record.getString("videoid").toString());
-        itemFeature.setUpId(record.getString("uid").toString());
-        itemFeature.setTitleLength(record.getString("play_count").toString());
-        itemFeature.setPlayLength(record.getString("total_time").toString());
-        itemFeature.setTotalTime(record.getString("total_time").toString());
-        itemFeature.setDaysSinceUpload(record.getString("existence_days").toString());
+        itemFeature.setVideoId(record.getString("videoid"));
+        itemFeature.setUpId(record.getString("uid"));
+        itemFeature.setTitleLength(record.getString("i_title_len"));
+        itemFeature.setPlayLength(record.getString("i_play_len"));
+        itemFeature.setTotalTime(record.getString("total_time"));
+        itemFeature.setDaysSinceUpload(record.getString("i_days_since_upload"));
 
         UserActionFeature user1dayActionFeature = new UserActionFeature();
         user1dayActionFeature.setExp_cnt(record.getString("i_1day_exp_cnt"));