瀏覽代碼

头部视频与排序视频相似特征

jch 4 月之前
父節點
當前提交
69d6350efc

+ 10 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_61_str2ros_originData_20241209.scala → src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_61_originData_20241209.scala

@@ -17,7 +17,7 @@ import scala.util.Random
    20241211 提取特征
  */
 
-object makedata_recsys_61_str2ros_originData_20241209 {
+object makedata_recsys_61_originData_20241209 {
   def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder()
@@ -258,6 +258,15 @@ object makedata_recsys_61_str2ros_originData_20241209 {
             val headVideo = getJsonObject(record, "v2_feature")
             val rankVideo = getJsonObject(record, "v1_feature")
             if (headVideo.nonEmpty && rankVideo.nonEmpty) {
+              val videoAttrs = List("title", "topic", "keywords", "cate1_list", "cate2", "cate2_list", "style", "theme", "user_value")
+              for (attr <- videoAttrs) {
+                val headAttr = if (headVideo.containsKey(attr)) headVideo.getString(attr) else ""
+                val rankAttr = if (rankVideo.containsKey(attr)) rankVideo.getString(attr) else ""
+                if (!headAttr.equals("") && !rankAttr.equals("")) {
+                  val simScore = SimilarityUtils.word2VecSimilarity(headAttr, rankAttr)
+                  featureMap.put("video_sim_" + attr, simScore)
+                }
+              }
             }
 
             /*