Browse Source

add time feature

jch 5 months ago
parent
commit
92a0c48b47

+ 22 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_72_originData_20250109.scala

@@ -9,6 +9,7 @@ import examples.utils.SimilarityUtils
 import org.apache.hadoop.io.compress.GzipCodec
 import org.apache.spark.sql.SparkSession
 
+import java.util.Calendar
 import scala.collection.JavaConversions._
 import scala.collection.mutable.ArrayBuffer
 import scala.util.Random
@@ -277,6 +278,16 @@ object makedata_recsys_72_originData_20250109 {
               }
             }
 
+            // time
+            val ts = record.getString("ts")
+            val calendar = tsToCalendar(ts)
+            if (null != calendar) {
+              val week = calendar.get(Calendar.DAY_OF_WEEK)
+              val hour = calendar.get(Calendar.HOUR_OF_DAY) + 1
+              featureMap.put("week", week)
+              featureMap.put("hour", hour)
+            }
+
             /*
             视频特征: 5*6*5 = 240个
                       曝光使用pv 分享使用pv 回流使用uv --> 1h 2h 3h 4h 12h 1d 3d 7d
@@ -316,7 +327,6 @@ object makedata_recsys_72_originData_20250109 {
             val abcode = record.getString("abcode")
             val mid = record.getString("mid")
             val level = getStringValue(record, "level", "0")
-            val ts = record.getString("ts")
             val logKey = (apptype, page, pagesource, recommendpagetype, flowpool, abcode, mid, vid, level, ts).productIterator.mkString(",")
             val labelKey = labels.toString()
             val featureKey = truncateDecimal(featureMap).toString()
@@ -467,4 +477,15 @@ object makedata_recsys_72_originData_20250109 {
     }
     default
   }
+
+  def tsToCalendar(ts: String): Calendar = {
+    val ms = ts.toLong * 1000L
+    if (ms > 1107658247000L && ms < 2685495047000L) {
+      val calendar = Calendar.getInstance
+      calendar.setTimeInMillis(ms)
+      calendar
+    } else {
+      null
+    }
+  }
 }