瀏覽代碼

add time feature

jch 4 月之前
父節點
當前提交
0dde44e89d

+ 22 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_61_originData_20241209.scala

@@ -9,6 +9,7 @@ import examples.utils.SimilarityUtils
 import org.apache.hadoop.io.compress.GzipCodec
 import org.apache.spark.sql.SparkSession
 
+import java.util.Calendar
 import scala.collection.JavaConversions._
 import scala.collection.mutable.ArrayBuffer
 import scala.util.Random
@@ -270,6 +271,16 @@ object makedata_recsys_61_originData_20241209 {
               }
             }
 
+            // time
+            val ts = record.getString("ts")
+            val calendar = tsToCalendar(ts)
+            if (null != calendar) {
+              val week = calendar.get(Calendar.DAY_OF_WEEK)
+              val hour = calendar.get(Calendar.HOUR_OF_DAY) + 1
+              featureMap.put("week", week)
+              featureMap.put("hour", hour)
+            }
+
             /*
             视频特征: 5*6*5 = 240个
                       曝光使用pv 分享使用pv 回流使用uv --> 1h 2h 3h 4h 12h 1d 3d 7d
@@ -311,7 +322,6 @@ object makedata_recsys_61_originData_20241209 {
             val pagesource = record.getString("pagesource")
             val mid = record.getString("mid")
             // vid 已经提取了
-            val ts = record.getString("ts")
             val abcode = record.getString("abcode")
             val level = if (record.isNull("level")) "0" else record.getString("level")
             val logKey = (apptype, pagesource, mid, vid, ts, abcode, level).productIterator.mkString(",")
@@ -420,4 +430,15 @@ object makedata_recsys_61_originData_20241209 {
     }
     new JSONObject()
   }
+
+  def tsToCalendar(ts: String): Calendar = {
+    val ms = ts.toLong * 1000L
+    if (ms > 1107658247000L && ms < 2685495047000L) {
+      val calendar = Calendar.getInstance
+      calendar.setTimeInMillis(ms)
+      calendar
+    } else {
+      null
+    }
+  }
 }