| 
					
				 | 
			
			
				@@ -0,0 +1,217 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+package com.aliyun.odps.spark.examples.makedata_recsys.v20250218 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import com.alibaba.fastjson.{JSON, JSONObject} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import com.aliyun.odps.TableSchema 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import com.aliyun.odps.data.Record 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import com.aliyun.odps.spark.examples.myUtils.{MyDateUtils, MyHdfsUtils, ParamUtils, env} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import examples.extractor.ExtractorUtils 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import examples.extractor.v20250218.ExtractFeature20250218 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import examples.utils.{FestiveUtil, SimilarityUtils, StatisticsUtil} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import org.apache.hadoop.io.compress.GzipCodec 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import org.apache.spark.sql.SparkSession 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import org.xm.Similarity 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import java.util 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import scala.collection.JavaConversions._ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import scala.collection.mutable.ArrayBuffer 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import scala.util.Random 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/* 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   20250218 提取特征 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+object makedata_recsys_41_str_train_data_sample_20250319 { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  def main(args: Array[String]): Unit = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val spark = SparkSession 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      .builder() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      .appName(this.getClass.getName) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      .getOrCreate() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val sc = spark.sparkContext 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 1 读取参数 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val param = ParamUtils.parseArgs(args) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val tablePart = param.getOrElse("tablePart", "64").toInt 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val beginStr = param.getOrElse("beginStr", "2025021812") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val endStr = param.getOrElse("endStr", "2025021812") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val project = param.getOrElse("project", "loghubods") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val table = param.getOrElse("table", "dwd_recsys_alg_sample_all_20250212") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val repartition = param.getOrElse("repartition", "32").toInt 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/41_sample_data/") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val fuSampleRate = param.getOrElse("fuSampleRate", "0.05").toDouble 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val whatLabel = param.getOrElse("whatLabel", "is_share") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val whatApps = param.getOrElse("whatApps", "0,4,2,32,17,18,21,22,24,25,26,27,28,29,3,30,31,33,34,35,36").split(",").filter(r => r.nonEmpty).toList 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 2 读取odps+表信息 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val odpsOps = env.getODPS(sc) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 3 循环执行数据生产 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val timeRange = MyDateUtils.getDateHourRange(beginStr, endStr) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val partitions = timeRange.map { dt_hh => 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      val dt = dt_hh.substring(0, 8) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      val hh = dt_hh.substring(8, 10) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      s"dt=$dt,hh=$hh" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val odpsData = partitions.map { partition => { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      println(s"开始读取分区: $partition") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      odpsOps.readTable( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          project = project, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          table = table, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          partition = partition, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          transfer = func, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          numPartition = tablePart) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        .filter(record => { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          val page = record.getString("page") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          val recommendPageType = record.getString("recommendpagetype") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          val apptype = record.getString("apptype") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          whatApps.contains(apptype) && StatisticsUtil.isRecommendScene(page, recommendPageType) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        .filter(record => { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          val label = record.getString(whatLabel) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          "1".equals(label) || new Random().nextDouble() <= fuSampleRate 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        .mapPartitions(p => { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          SimilarityUtils.init() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          FestiveUtil.init() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          p.map(record => { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val featureMap = new JSONObject() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val vid = if (record.isNull("vid")) "" else record.getString("vid") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val ts = if (record.isNull("ts")) 0 else record.getString("ts").toLong 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            // a 视频特征 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b1: JSONObject = if (record.isNull("b1_feature")) new JSONObject() else JSON.parseObject(record.getString("b1_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b2: JSONObject = if (record.isNull("b2_feature")) new JSONObject() else JSON.parseObject(record.getString("b2_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b3: JSONObject = if (record.isNull("b3_feature")) new JSONObject() else JSON.parseObject(record.getString("b3_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b4: JSONObject = if (record.isNull("b4_feature")) new JSONObject() else JSON.parseObject(record.getString("b4_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b5: JSONObject = if (record.isNull("b5_feature")) new JSONObject() else JSON.parseObject(record.getString("b5_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b6: JSONObject = if (record.isNull("b6_feature")) new JSONObject() else JSON.parseObject(record.getString("b6_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b7: JSONObject = if (record.isNull("b7_feature")) new JSONObject() else JSON.parseObject(record.getString("b7_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b8: JSONObject = if (record.isNull("b8_feature")) new JSONObject() else JSON.parseObject(record.getString("b8_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b9: JSONObject = if (record.isNull("b9_feature")) new JSONObject() else JSON.parseObject(record.getString("b9_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b10: JSONObject = if (record.isNull("b10_feature")) new JSONObject() else JSON.parseObject(record.getString("b10_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b11: JSONObject = if (record.isNull("b11_feature")) new JSONObject() else JSON.parseObject(record.getString("b11_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b12: JSONObject = if (record.isNull("b12_feature")) new JSONObject() else JSON.parseObject(record.getString("b12_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val b13: JSONObject = if (record.isNull("b13_feature")) new JSONObject() else JSON.parseObject(record.getString("b13_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            // 用户特征 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val c1: JSONObject = if (record.isNull("c1_feature")) new JSONObject() else JSON.parseObject(record.getString("c1_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val c2: JSONObject = if (record.isNull("c2_feature")) new JSONObject() else JSON.parseObject(record.getString("c2_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val c3: JSONObject = if (record.isNull("c3_feature")) new JSONObject() else JSON.parseObject(record.getString("c3_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val c4: JSONObject = if (record.isNull("c4_feature")) new JSONObject() else JSON.parseObject(record.getString("c4_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val c5: JSONObject = if (record.isNull("c5_feature")) new JSONObject() else JSON.parseObject(record.getString("c5_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val c6: JSONObject = if (record.isNull("c6_feature")) new JSONObject() else JSON.parseObject(record.getString("c6_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val c7: JSONObject = if (record.isNull("c7_feature")) new JSONObject() else JSON.parseObject(record.getString("c7_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val c8: JSONObject = if (record.isNull("c8_feature")) new JSONObject() else JSON.parseObject(record.getString("c8_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            // 视频基础信息 v1-待推荐视频,v2-头部视频 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val v1: JSONObject = if (record.isNull("v1_feature")) new JSONObject() else JSON.parseObject(record.getString("v1_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val v2: JSONObject = if (record.isNull("v2_feature")) new JSONObject() else JSON.parseObject(record.getString("v2_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            // CF特征 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val d1: JSONObject = if (record.isNull("d1_feature")) new JSONObject() else JSON.parseObject(record.getString("d1_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val d2: JSONObject = if (record.isNull("d2_feature")) new JSONObject() else JSON.parseObject(record.getString("d2_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val d3: JSONObject = if (record.isNull("d3_feature")) new JSONObject() else JSON.parseObject(record.getString("d3_feature")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val bFeatureMap = new util.HashMap[String, util.Map[String, Object]](); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            bFeatureMap.put("b2", b2); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            bFeatureMap.put("b3", b3); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            bFeatureMap.put("b4", b4); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            bFeatureMap.put("b5", b5); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            bFeatureMap.put("b6", b6); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            bFeatureMap.put("b7", b7); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            bFeatureMap.put("b8", b8); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            bFeatureMap.put("b9", b9); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            bFeatureMap.put("b10", b10); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            bFeatureMap.put("b11", b11); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            bFeatureMap.put("b13", b13); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ExtractFeature20250218.handleB1(b1, featureMap) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ExtractFeature20250218.handleB2ToB11AndB13(bFeatureMap, featureMap); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ExtractFeature20250218.handleB12(b12, featureMap) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ExtractFeature20250218.handleC1(c1, featureMap) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ExtractFeature20250218.handleC2ToC3(c2, c3, featureMap) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ExtractFeature20250218.handleC4(c4, featureMap) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ExtractFeature20250218.handleC5ToC6(c5, c6, v1, featureMap) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val c78Map = ExtractFeature20250218.handleC7ToC8(c7, c8) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ExtractFeature20250218.useC7ToC8(c78Map, vid, featureMap) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ExtractFeature20250218.handleD1(d1, featureMap) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ExtractFeature20250218.handleD2(d2, featureMap) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ExtractFeature20250218.handleD3(d3, featureMap) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ExtractFeature20250218.handleVideoBasicFeature(v1, ts, featureMap) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ExtractFeature20250218.handleVideoSimilarity(v1, v2, featureMap) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            //4 处理label信息。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val labels = new JSONObject 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            for (labelKey <- List( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              "is_share", "share_cnt", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              "is_return_1", "return_1_uv", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              "is_return_n", "return_n_uv", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              "is_return_noself", "return_1_uv_noself", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              "is_return_n_noself", "return_n_uv_noself" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            )) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              if (!record.isNull(labelKey)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                labels.put(labelKey, record.getString(labelKey)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            //5 处理log key表头。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val logs = new JSONObject() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            for (key <- List("apptype", "abcode", "mid", "vid", "page", "recommendpagetype", "level", "ts", "headvideoid", "flowpool", "level", "hotsencetype")) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              if (!record.isNull(key)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                logs.put(key, record.getString(key)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+              } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            logs.put("hour", ExtractorUtils.getHourByTimestamp(ts)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            logs.put("day_of_week", ExtractorUtils.getDayOfWeekByTimestamp(ts)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val logKey = logs.toString() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val labelKey = labels.toString() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            val featureKey = featureMap.toString() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            //6 拼接数据,保存。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            logKey + "\t" + labelKey + "\t" + featureKey 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    }.reduce(_ union _) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 4 保存数据到hdfs 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val hdfsPath = savePath 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      println("删除路径并开始数据写入:" + hdfsPath) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      MyHdfsUtils.delete_hdfs_path(hdfsPath) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      odpsData.coalesce(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      println("路径不合法,无法写入:" + hdfsPath) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  def func(record: Record, schema: TableSchema): Record = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    record 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  def funcC34567ForTags(tags: String, title: String): Tuple4[Double, String, Double, Double] = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 匹配数量 匹配词 语义最高相似度分 语义平均相似度分 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val tagsList = tags.split(",") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    var d1 = 0.0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    val d2 = new ArrayBuffer[String]() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    var d3 = 0.0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    var d4 = 0.0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    for (tag <- tagsList) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      if (title.contains(tag)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        d1 = d1 + 1.0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        d2.add(tag) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      val score = Similarity.conceptSimilarity(tag, title) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      d3 = if (score > d3) score else d3 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      d4 = d4 + score 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    d4 = if (tagsList.nonEmpty) d4 / tagsList.size else d4 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    (d1, d2.mkString(","), d3, d4) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 |