Explorar el Código

feat:添加负样本采样

zhaohaipeng hace 9 meses
padre
commit
73116a1126

+ 16 - 16
src/main/scala/com/aliyun/odps/spark/examples/SparkPi.scala

@@ -1,20 +1,20 @@
 /**
-  * Licensed to the Apache Software Foundation (ASF) under one
-  * or more contributor license agreements.  See the NOTICE file
-  * distributed with this work for additional information
-  * regarding copyright ownership.  The ASF licenses this file
-  * to you under the Apache License, Version 2.0 (the
-  * "License"); you may not use this file except in compliance
-  * with the License.  You may obtain a copy of the License at
-  * <p>
-  * http://www.apache.org/licenses/LICENSE-2.0
-  * <p>
-  * Unless required by applicable law or agreed to in writing, software
-  * distributed under the License is distributed on an "AS IS" BASIS,
-  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  * See the License for the specific language governing permissions and
-  * limitations under the License.
-  */
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 package com.aliyun.odps.spark.examples
 

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/makedata_ad_33_bucketData_20240622.scala

@@ -51,7 +51,7 @@ object makedata_ad_33_bucketData_20240622 {
     val beginStr = param.getOrElse("beginStr", "20240620")
     val endStr = param.getOrElse("endStr", "20240620")
     val repartition = param.getOrElse("repartition", "100").toInt
-    val filterNames = param.getOrElse("filterNames", "").split(",").toSet
+    val filterNames = param.getOrElse("filterNames", "").split(",").filter(_.nonEmpty).toSet
     val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")
 
     val dateRange = MyDateUtils.getDateRange(beginStr, endStr)

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/makedata_ad_33_bucketData_20240718.scala

@@ -51,7 +51,7 @@ object makedata_ad_33_bucketData_20240718 {
     val beginStr = param.getOrElse("beginStr", "20240620")
     val endStr = param.getOrElse("endStr", "20240620")
     val repartition = param.getOrElse("repartition", "100").toInt
-    val filterNames = param.getOrElse("filterNames", "").split(",").toSet
+    val filterNames = param.getOrElse("filterNames", "").split(",").filter(_.nonEmpty).toSet
     val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")
 
     val dateRange = MyDateUtils.getDateRange(beginStr, endStr)

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/makedata_ad_33_bucketData_20240726.scala

@@ -31,7 +31,7 @@ object makedata_ad_33_bucketData_20240726 {
     val beginStr = param.getOrElse("beginStr", "20240620")
     val endStr = param.getOrElse("endStr", "20240620")
     val repartition = param.getOrElse("repartition", "100").toInt
-    val filterNames = param.getOrElse("filterNames", "").split(",").toSet
+    val filterNames = param.getOrElse("filterNames", "").split(",").filter(_.nonEmpty).toSet
     val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")
     val featureNameFile = param.getOrElse("featureNameFile", "20240718_ad_feature_name_517.txt");
 

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/makedata_ad_33_bucketData_20240729.scala

@@ -31,7 +31,7 @@ object makedata_ad_33_bucketData_20240729 {
     val beginStr = param.getOrElse("beginStr", "20240620")
     val endStr = param.getOrElse("endStr", "20240620")
     val repartition = param.getOrElse("repartition", "100").toInt
-    val filterNames = param.getOrElse("filterNames", "").split(",").toSet
+    val filterNames = param.getOrElse("filterNames", "").split(",").filter(_.nonEmpty).toSet
     val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")
 
     val loader = getClass.getClassLoader

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/makedata_ad_33_bucketData_20240729_copy_zheng.scala

@@ -31,7 +31,7 @@ object makedata_ad_33_bucketData_20240729_copy_zheng {
     val beginStr = param.getOrElse("beginStr", "20240620")
     val endStr = param.getOrElse("endStr", "20240620")
     val repartition = param.getOrElse("repartition", "100").toInt
-    val filterNames = param.getOrElse("filterNames", "").split(",").toSet
+    val filterNames = param.getOrElse("filterNames", "").split(",").filter(_.nonEmpty).toSet
     val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")
 
     val loader = getClass.getClassLoader

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/makedata_ad_33_bucketData_20240729_reduce_feature.scala

@@ -30,7 +30,7 @@ object makedata_ad_33_bucketData_20240729_reduce_feature {
     val beginStr = param.getOrElse("beginStr", "20240620")
     val endStr = param.getOrElse("endStr", "20240620")
     val repartition = param.getOrElse("repartition", "100").toInt
-    val retainNames = param.getOrElse("retainNames", "").split(",").toSet
+    val retainNames = param.getOrElse("retainNames", "").split(",").filter(_.nonEmpty).toSet
     val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")
 
     val loader = getClass.getClassLoader

+ 2 - 2
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/makedata_ad_33_bucketData_default_value_20240718.scala

@@ -51,9 +51,9 @@ object makedata_ad_33_bucketData_default_value_20240718 {
     val beginStr = param.getOrElse("beginStr", "20240620")
     val endStr = param.getOrElse("endStr", "20240620")
     val repartition = param.getOrElse("repartition", "100").toInt
-    val filterNames = param.getOrElse("filterNames", "").split(",").toSet
+    val filterNames = param.getOrElse("filterNames", "").split(",").filter(_.nonEmpty).toSet
     val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")
-    val modifyFeatureName= param.getOrElse("modifyName", "").split(",").toSet
+    val modifyFeatureName= param.getOrElse("modifyName", "").split(",").filter(_.nonEmpty).toSet
     val defaultValue= param.getOrElse("defaultValue", "0.01")
 
     val dateRange = MyDateUtils.getDateRange(beginStr, endStr)

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys/makedata_recsys_43_bucketData_fu_sample_20240709.scala

@@ -24,7 +24,7 @@ object makedata_recsys_43_bucketData_fu_sample_20240709 {
     val beginStr = param.getOrElse("beginStr", "20240703")
     val endStr = param.getOrElse("endStr", "20240703")
     val repartition = param.getOrElse("repartition", "100").toInt
-    val filterNames = param.getOrElse("filterNames", "").split(",").toSet
+    val filterNames = param.getOrElse("filterNames", "XXXXXXXXXX").split(",").filter(_.nonEmpty).toSet
     val whatLabel = param.getOrElse("whatLabel", "is_return")
     val whatApps = param.getOrElse("whatApps", "0,4,5,21,3,6").split(",").toSet
     val fuSampleRate= param.getOrElse("fuSampleRate", "0.1").toDouble