| 
														
															@@ -90,7 +90,9 @@ object video_dssm_sampler { 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     (record.getString("category1"), record.getString("feature"))  // 或 category2 取决于表 
														 | 
														
														 | 
														
															     (record.getString("category1"), record.getString("feature"))  // 或 category2 取决于表 
														 | 
													
												
											
												
													
														| 
														 | 
														
															   } 
														 | 
														
														 | 
														
															   } 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															- 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+  def funcL2CatStatFeatures(record: Record, schema: TableSchema): (String, String) = { 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    (record.getString("category2"), record.getString("feature"))  // 或 category2 取决于表 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+  } 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															   def generateNegativeSamples(spark: SparkSession, dt: String, outputPath: String): Unit = { 
														 | 
														
														 | 
														
															   def generateNegativeSamples(spark: SparkSession, dt: String, outputPath: String): Unit = { 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     val stats = ProcessingStats() 
														 | 
														
														 | 
														
															     val stats = ProcessingStats() 
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -209,7 +211,7 @@ object video_dssm_sampler { 
														 | 
													
												
											
												
													
														| 
														 | 
														
															           project = "loghubods", 
														 | 
														
														 | 
														
															           project = "loghubods", 
														 | 
													
												
											
												
													
														| 
														 | 
														
															           table = "t_vid_l2_cat_stat_feature", 
														 | 
														
														 | 
														
															           table = "t_vid_l2_cat_stat_feature", 
														 | 
													
												
											
												
													
														| 
														 | 
														
															           partition = s"dt='$dt'", 
														 | 
														
														 | 
														
															           partition = s"dt='$dt'", 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-          transfer = funcCatStatFeatures, 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+          transfer = funcL2CatStatFeatures, 
														 | 
													
												
											
												
													
														| 
														 | 
														
															           numPartition = CONFIG("shuffle.partitions").toInt 
														 | 
														
														 | 
														
															           numPartition = CONFIG("shuffle.partitions").toInt 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         ) 
														 | 
														
														 | 
														
															         ) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         val schema = StructType(Array( 
														 | 
														
														 | 
														
															         val schema = StructType(Array( 
														 |