Browse Source

feat:修改label

zhaohaipeng 2 months ago
parent
commit
8649132acc

+ 7 - 0
src/main/java/examples/extractor/ExtractorUtils.java

@@ -195,6 +195,13 @@ public class ExtractorUtils {
                 .getHour();
     }
 
+    public static int getDayOrWeekByTimestamp(long timestamp) {
+        return LocalDateTime
+                .ofInstant(Instant.ofEpochSecond(timestamp), ZoneId.systemDefault())
+                .getDayOfWeek()
+                .getValue();
+    }
+
     public static void main(String[] args) {
         double[] sortedArray = {1.0, 2.0, 4.0, 4.0, 6.0};
         double target = 0.0;

+ 18 - 2
src/main/java/examples/extractor/v20250218/ExtractFeature20250218.java

@@ -2,12 +2,14 @@ package examples.extractor.v20250218;
 
 import examples.extractor.ExtractorUtils;
 import examples.extractor.RankExtractorFeature_20240530;
+import examples.utils.FestiveUtil;
 import examples.utils.SimilarityUtils;
 import org.apache.commons.lang3.StringUtils;
 
 import java.time.Instant;
 import java.time.LocalDateTime;
 import java.time.ZoneId;
+import java.time.format.DateTimeFormatter;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
@@ -60,10 +62,9 @@ public class ExtractFeature20250218 {
         Object size = videoFeature.getOrDefault("size", "0d");
         Object bit_rate = videoFeature.getOrDefault("bit_rate", "0d");
         String festiveLabel1 = videoFeature.getOrDefault("festive_label1", "").toString();
+        String festiveLabel2 = videoFeature.getOrDefault("festive_label2", "").toString();
 
-        long createTs = Long.parseLong(videoFeature.getOrDefault("gmt_create_timestamp", "0").toString());
 
-        featureMap.put("create_ts_diff", ExtractorUtils.getDaysBetween(createTs, ts));
         featureMap.put("total_time", totalTime);
         featureMap.put("width", width);
         featureMap.put("height", height);
@@ -81,6 +82,21 @@ public class ExtractFeature20250218 {
         LocalDateTime now = LocalDateTime.ofInstant(Instant.ofEpochSecond(ts), ZoneId.systemDefault());
         featureMap.put("hour_" + now.getHour(), "0.1");
         featureMap.put("day_of_week" + now.getDayOfWeek(), "0.1");
+
+        long createTs = Long.parseLong(videoFeature.getOrDefault("gmt_create_timestamp", "0").toString());
+        featureMap.put("create_ts_diff", ExtractorUtils.getDaysBetween(createTs, ts));
+
+        String date = LocalDateTime.ofInstant(Instant.ofEpochSecond(ts), ZoneId.systemDefault()).format(DateTimeFormatter.ofPattern("yyyy-MM-dd"));
+        String festiveByDate = FestiveUtil.getFestiveByDate(date);
+        featureMap.put("today_is_fes", 0);
+        featureMap.put("video_fes_eq", 0);
+        if (StringUtils.isNotBlank(festiveByDate)) {
+            featureMap.put("today_is_fes", 1);
+            if (StringUtils.equals(festiveByDate, festiveLabel2)) {
+                featureMap.put("video_fes_eq", 1);
+            }
+        }
+
     }
 
     public static void handleC1(Map<String, Object> c1Feature, Map<String, Object> featureMap) {

+ 46 - 0
src/main/java/examples/utils/FestiveUtil.java

@@ -0,0 +1,46 @@
+package examples.utils;
+
+import lombok.extern.slf4j.Slf4j;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+
+@Slf4j
+public class FestiveUtil {
+
+    private static Map<String, String> festiveMap = new HashMap<>();
+
+    public static void init() {
+        long start = System.currentTimeMillis();
+        InputStream resourceAsStream = FestiveUtil.class.getClassLoader().getResourceAsStream("festive.txt");
+
+        if (Objects.nonNull(resourceAsStream)) {
+            try (BufferedReader reader = new BufferedReader(new InputStreamReader(resourceAsStream))) {
+                Map<String, String> tmpMap = new HashMap<>();
+                String line;
+                while ((line = reader.readLine()) != null) {
+                    String[] split = line.split(",");
+                    if (split.length >= 2) {
+                        tmpMap.put(split[0], split[1]);
+                    }
+                }
+
+                FestiveUtil.festiveMap = tmpMap;
+            } catch (IOException e) {
+                log.error("read festive.txt error: ", e);
+            }
+        }
+
+        long end = System.currentTimeMillis();
+        log.info("festive.txt loaded successfully cost {}", end - start);
+    }
+
+    public static String getFestiveByDate(String date) {
+        return festiveMap.get(date);
+    }
+}

+ 249 - 0
src/main/resources/festive.txt

@@ -0,0 +1,249 @@
+2024-01-01,元旦
+2024-01-18,腊八节
+2024-02-02,小年
+2024-02-03,小年
+2024-02-09,除夕
+2024-02-10,春节
+2024-02-10,初一
+2024-02-11,初二
+2024-02-12,初三
+2024-02-13,初四
+2024-02-14,初五
+2024-02-15,初六
+2024-02-16,初七
+2024-02-17,初八
+2024-02-18,初九
+2024-02-19,初十
+2024-02-14,情人节
+2024-02-24,元宵节
+2024-03-11,龙抬头
+2024-03-08,妇女节
+2024-03-12,植树节
+2024-05-01,劳动节
+2024-05-12,母亲节
+2024-06-01,儿童节
+2024-06-10,端午节
+2024-06-16,父亲节
+2024-07-01,建党节
+2024-08-01,建军节
+2024-08-10,七夕节
+2024-08-18,中元节
+2024-09-17,中秋节
+2024-10-01,国庆节
+2024-10-11,重阳节
+2024-11-28,感恩节
+2024-12-13,公祭日
+2024-12-24,平安夜
+2024-12-25,圣诞节
+2024-01-06,小寒
+2024-01-20,大寒
+2024-02-04,立春
+2024-02-19,雨水
+2024-03-05,惊蛰
+2024-03-20,春分
+2024-04-04,清明
+2024-04-19,谷雨
+2024-05-05,立夏
+2024-05-20,小满
+2024-06-05,芒种
+2024-06-21,夏至
+2024-07-06,小暑
+2024-07-22,大暑
+2024-08-07,立秋
+2024-08-22,处暑
+2024-09-07,白露
+2024-09-22,秋分
+2024-10-08,寒露
+2024-10-23,霜降
+2024-11-07,立冬
+2024-11-22,小雪
+2024-12-06,大雪
+2024-12-21,冬至
+2024-11-12,孙中山诞辰
+2024-03-12,孙中山逝世
+2024-12-26,毛主席诞辰
+2024-09-09,毛主席逝世
+2024-03-05,周恩来诞辰
+2024-01-08,周恩来逝世
+2024-08-22,邓小平诞辰
+2024-02-19,邓小平逝世
+2024-07-03,李克强诞辰
+2024-10-27,李克强逝世
+2024-09-18,九一八
+2024-07-07,七七事变
+2024-09-07,袁隆平诞辰
+2024-05-22,袁隆平逝世
+2024-10-24,彭德怀诞辰
+2024-11-29,彭德怀逝世
+2024-12-01,朱德诞辰
+2024-07-06,朱德逝世
+2024-10-27,吴尊友逝世
+2024-03-05,学雷锋
+2024-03-05,两会
+2024-03-15,315国际消费者权益日
+2025-01-01,元旦
+2025-01-07,腊八节
+2025-01-22,小年
+2025-01-23,小年
+2025-01-28,除夕
+2025-01-29,春节
+2025-01-29,初一
+2025-01-30,初二
+2025-01-31,初三
+2025-02-01,初四
+2025-02-02,初五
+2025-02-03,初六
+2025-02-04,初七
+2025-02-05,初八
+2025-02-06,初九
+2025-02-07,初十
+2025-02-14,情人节
+2025-02-22,元宵节
+2025-03-01,龙抬头
+2025-03-08,妇女节
+2025-03-12,植树节
+2025-05-01,劳动节
+2025-05-11,母亲节
+2025-06-01,儿童节
+2025-05-31,端午节
+2025-06-15,父亲节
+2025-07-01,建党节
+2025-08-01,建军节
+2025-08-29,七夕节
+2025-09-06,中元节
+2025-10-06,中秋节
+2025-10-01,国庆节
+2025-10-29,重阳节
+2025-11-27,感恩节
+2025-12-13,公祭日
+2025-12-24,平安夜
+2025-12-25,圣诞节
+2025-01-05,小寒
+2025-01-20,大寒
+2025-02-03,立春
+2025-02-18,雨水
+2025-03-05,惊蛰
+2025-03-20,春分
+2025-04-04,清明
+2025-04-20,谷雨
+2025-05-05,立夏
+2025-05-21,小满
+2025-06-05,芒种
+2025-06-21,夏至
+2025-07-07,小暑
+2025-07-22,大暑
+2025-08-07,立秋
+2025-08-23,处暑
+2025-09-07,白露
+2025-09-23,秋分
+2025-10-08,寒露
+2025-10-23,霜降
+2025-11-07,立冬
+2025-11-22,小雪
+2025-12-07,大雪
+2025-12-21,冬至
+2025-11-12,孙中山诞辰
+2025-03-12,孙中山逝世
+2025-12-26,毛主席诞辰
+2025-09-09,毛主席逝世
+2025-03-05,周恩来诞辰
+2025-01-08,周恩来逝世
+2025-08-22,邓小平诞辰
+2025-02-19,邓小平逝世
+2025-07-03,李克强诞辰
+2025-10-27,李克强逝世
+2025-09-18,九一八
+2025-07-07,七七事变
+2025-09-07,袁隆平诞辰
+2025-05-22,袁隆平逝世
+2025-10-24,彭德怀诞辰
+2025-11-29,彭德怀逝世
+2025-12-01,朱德诞辰
+2025-07-06,朱德逝世
+2025-10-27,吴尊友逝世
+2025-03-05,学雷锋
+2024-03-05,两会
+2024-03-15,315国际消费者权益日
+2026-01-01,元旦
+2026-01-26,腊八节
+2026-02-10,小年
+2026-02-11,小年
+2026-02-16,除夕
+2026-02-17,春节
+2026-02-17,初一
+2026-02-18,初二
+2026-02-19,初三
+2026-02-20,初四
+2026-02-21,初五
+2026-02-22,初六
+2026-02-23,初七
+2026-02-24,初八
+2026-02-25,初九
+2026-02-26,初十
+2026-02-14,情人节
+2026-03-03,元宵节
+2026-03-20,龙抬头
+2026-03-08,妇女节
+2026-03-12,植树节
+2026-05-01,劳动节
+2026-05-10,母亲节
+2026-06-01,儿童节
+2026-06-19,端午节
+2026-06-21,父亲节
+2026-07-01,建党节
+2026-08-01,建军节
+2026-08-19,七夕节
+2026-08-27,中元节
+2026-09-25,中秋节
+2026-10-01,国庆节
+2026-10-18,重阳节
+2026-11-26,感恩节
+2026-12-13,公祭日
+2026-12-24,平安夜
+2026-12-25,圣诞节
+2026-01-05,小寒
+2026-01-20,大寒
+2026-02-04,立春
+2026-02-18,雨水
+2026-03-05,惊蛰
+2026-03-20,春分
+2026-04-05,清明
+2026-04-20,谷雨
+2026-05-05,立夏
+2026-05-21,小满
+2026-06-05,芒种
+2026-06-21,夏至
+2026-07-07,小暑
+2026-07-23,大暑
+2026-08-07,立秋
+2026-08-23,处暑
+2026-09-07,白露
+2026-09-23,秋分
+2026-10-08,寒露
+2026-10-23,霜降
+2026-11-07,立冬
+2026-11-22,小雪
+2026-12-07,大雪
+2026-12-22,冬至
+2026-11-12,孙中山诞辰
+2026-03-12,孙中山逝世
+2026-12-26,毛主席诞辰
+2026-09-09,毛主席逝世
+2026-03-05,周恩来诞辰
+2026-01-08,周恩来逝世
+2026-08-22,邓小平诞辰
+2026-02-19,邓小平逝世
+2026-07-03,李克强诞辰
+2026-10-27,李克强逝世
+2026-09-18,九一八
+2026-07-07,七七事变
+2026-09-07,袁隆平诞辰
+2026-05-22,袁隆平逝世
+2026-10-24,彭德怀诞辰
+2026-11-29,彭德怀逝世
+2026-12-01,朱德诞辰
+2026-07-06,朱德逝世
+2026-10-27,吴尊友逝世
+2026-03-05,学雷锋
+2024-03-05,两会
+2024-03-15,315国际消费者权益日

+ 3 - 2
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys/makedata_recsys_41_originData_20250218.scala

@@ -7,6 +7,7 @@ import com.aliyun.odps.spark.examples.myUtils.{MyDateUtils, MyHdfsUtils, ParamUt
 import examples.extractor.v20250218.ExtractFeature20250218
 import examples.extractor.ExtractorUtils
 import examples.utils.SimilarityUtils
+import examples.utils.FestiveUtil
 import org.apache.hadoop.io.compress.GzipCodec
 import org.apache.spark.sql.SparkSession
 import org.xm.Similarity
@@ -55,6 +56,7 @@ object makedata_recsys_41_originData_20250218 {
           numPartition = tablePart)
         .mapPartitions(p => {
           SimilarityUtils.init()
+          FestiveUtil.init()
           p.map(record => {
 
             val featureMap = new JSONObject()
@@ -125,7 +127,6 @@ object makedata_recsys_41_originData_20250218 {
             ExtractFeature20250218.handleVideoBasicFeature(v1, ts, featureMap)
             ExtractFeature20250218.handleVideoSimilarity(v1, v2, featureMap)
 
-
             //4 处理label信息。
             val labels = new JSONObject
             for (labelKey <- List(
@@ -147,8 +148,8 @@ object makedata_recsys_41_originData_20250218 {
                 logs.put(key, record.getString(key))
               }
             }
-
             logs.put("hour", ExtractorUtils.getHourByTimestamp(ts))
+            logs.put("day_of_week", ExtractorUtils.getDayOrWeekByTimestamp(ts))
 
             val logKey = logs.toString()
             val labelKey = labels.toString()