|
@@ -17,7 +17,7 @@ import scala.util.Random
|
|
|
20241211 提取特征
|
|
|
*/
|
|
|
|
|
|
-object makedata_recsys_61_str2ros_originData_20241209 {
|
|
|
+object makedata_recsys_61_originData_20241209 {
|
|
|
def main(args: Array[String]): Unit = {
|
|
|
val spark = SparkSession
|
|
|
.builder()
|
|
@@ -258,6 +258,15 @@ object makedata_recsys_61_str2ros_originData_20241209 {
|
|
|
val headVideo = getJsonObject(record, "v2_feature")
|
|
|
val rankVideo = getJsonObject(record, "v1_feature")
|
|
|
if (headVideo.nonEmpty && rankVideo.nonEmpty) {
|
|
|
+ val videoAttrs = List("title", "topic", "keywords", "cate1_list", "cate2", "cate2_list", "style", "theme", "user_value")
|
|
|
+ for (attr <- videoAttrs) {
|
|
|
+ val headAttr = if (headVideo.containsKey(attr)) headVideo.getString(attr) else ""
|
|
|
+ val rankAttr = if (rankVideo.containsKey(attr)) rankVideo.getString(attr) else ""
|
|
|
+ if (!headAttr.equals("") && !rankAttr.equals("")) {
|
|
|
+ val simScore = SimilarityUtils.word2VecSimilarity(headAttr, rankAttr)
|
|
|
+ featureMap.put("video_sim_" + attr, simScore)
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/*
|