6 months ago · 022f98a513
--- a/widedeep_v12_2.py
+++ b/widedeep_v12_2.py
@@ -0,0 +1,234 @@
 
															+#! /usr/bin/env python
														
 
															+# -*- coding: utf-8 -*-
														
 
															+# vim:fenc=utf-8
														
 
															+#
														
 
															+# Copyright © 2025 StrayWarrior <i@straywarrior.com>
														
 
															+#
														
 
															+# Distributed under terms of the MIT license.
														
 
															+
														
 
															+
														
 
															+raw_input = open("data_fields_v3.config").readlines()
														
 
															+input_fields = dict(
														
 
															+    map(lambda x: (x[0], x[1]),
														
 
															+        map(lambda x: x.strip().split(' '), raw_input)))
														
 
															+dense_features = open("features_top300.config").readlines()
														
 
															+dense_features = [name.strip().lower() for name in dense_features]
														
 
															+top_dense_features = open('features_top100.config').readlines()
														
 
															+top_dense_features = [name.strip().lower() for name in top_dense_features]
														
 
															+
														
 
															+sparse_features = [
														
 
															+    "cid", "adid", "adverid",
														
 
															+    "region", "city", "brand",
														
 
															+    "vid", "cate1", "cate2",
														
 
															+]
														
 
															+tag_features = [
														
 
															+    "user_vid_return_tags_2h", "user_vid_return_tags_1d", "user_vid_return_tags_3d",
														
 
															+    "user_vid_return_tags_7d", "user_vid_return_tags_14d"
														
 
															+]
														
 
															+seq_features = [
														
 
															+    "user_cid_click_list", "user_cid_conver_list"
														
 
															+]
														
 
															+
														
 
															+input_type_map = {
														
 
															+    'BIGINT': 'INT64',
														
 
															+    'DOUBLE': 'DOUBLE',
														
 
															+    'STRING': 'STRING'
														
 
															+}
														
 
															+
														
 
															+print("""train_config {
														
 
															+  optimizer_config {
														
 
															+    adam_optimizer {
														
 
															+      learning_rate {
														
 
															+        constant_learning_rate {
														
 
															+          learning_rate: 0.0010
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    use_moving_average: false
														
 
															+  }
														
 
															+  optimizer_config {
														
 
															+    adam_optimizer {
														
 
															+      learning_rate {
														
 
															+        constant_learning_rate {
														
 
															+          learning_rate: 0.0006
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    use_moving_average: false
														
 
															+  }
														
 
															+  optimizer_config {
														
 
															+    adam_optimizer {
														
 
															+      learning_rate {
														
 
															+        constant_learning_rate {
														
 
															+          learning_rate: 0.002
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    use_moving_average: false
														
 
															+  }
														
 
															+  num_steps: 200000
														
 
															+  sync_replicas: true
														
 
															+  save_checkpoints_steps: 1100
														
 
															+  log_step_count_steps: 100
														
 
															+  save_summary_steps: 100
														
 
															+}
														
 
															+eval_config {
														
 
															+  metrics_set {
														
 
															+    auc {
														
 
															+    }
														
 
															+  }
														
 
															+  eval_online: true
														
 
															+  eval_interval_secs: 120
														
 
															+}
														
 
															+data_config {
														
 
															+  batch_size: 512
														
 
															+  num_epochs: 1
														
 
															+  shuffle: false
														
 
															+""")
														
 
															+
														
 
															+for name in input_fields:
														
 
															+    input_type = input_type_map[input_fields[name]]
														
 
															+    default_spec = ''
														
 
															+    if name in dense_features:
														
 
															+        default_spec = '\n    default_val: "0"'
														
 
															+    print(f"""  input_fields {{
														
 
															+    input_name: "{name}"
														
 
															+    input_type: {input_type}{default_spec}
														
 
															+  }}""")
														
 
															+    # default_val: "0"
														
 
															+
														
 
															+print("""  label_fields: "has_conversion"
														
 
															+  prefetch_size: 32
														
 
															+  input_type: OdpsInputV2
														
 
															+}
														
 
															+""")
														
 
															+
														
 
															+for name in dense_features:
														
 
															+    print(f"""feature_configs {{
														
 
															+  input_names: "{name}"
														
 
															+  feature_type: RawFeature
														
 
															+  boundaries: [0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.0]
														
 
															+  embedding_dim: 6
														
 
															+}}""")
														
 
															+
														
 
															+for name in sparse_features:
														
 
															+    print(f"""feature_configs {{
														
 
															+  input_names: "{name}"
														
 
															+  feature_type: IdFeature
														
 
															+  hash_bucket_size: 1000000
														
 
															+  embedding_dim: 6
														
 
															+}}""")
														
 
															+
														
 
															+for name in tag_features + seq_features:
														
 
															+    print(f"""feature_configs {{
														
 
															+  input_names: "{name}"
														
 
															+  feature_type: TagFeature
														
 
															+  hash_bucket_size: 1000000
														
 
															+  embedding_dim: 6
														
 
															+  separator: ','
														
 
															+}}""")
														
 
															+
														
 
															+def wide_and_deep():
														
 
															+    print("""
														
 
															+model_config {
														
 
															+  model_class: "WideAndDeep"
														
 
															+   feature_groups: {
														
 
															+    group_name: 'wide'""")
														
 
															+
														
 
															+    for name in dense_features + sparse_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: WIDE
														
 
															+  }
														
 
															+  feature_groups: {
														
 
															+    group_name: 'deep'""")
														
 
															+
														
 
															+    for name in dense_features + sparse_features + tag_features + seq_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: DEEP
														
 
															+  }
														
 
															+  wide_and_deep {
														
 
															+    wide_output_dim: 8
														
 
															+
														
 
															+    dnn {
														
 
															+      hidden_units: [256, 128, 64]
														
 
															+    }
														
 
															+    final_dnn {
														
 
															+      hidden_units: [64, 32]
														
 
															+    }
														
 
															+    l2_regularization: 1e-5
														
 
															+  }
														
 
															+  embedding_regularization: 1e-6
														
 
															+}""")
														
 
															+
														
 
															+def deep_fm():
														
 
															+    print("""
														
 
															+model_config {
														
 
															+  model_class: "DeepFM"
														
 
															+   feature_groups: {
														
 
															+    group_name: 'wide'""")
														
 
															+
														
 
															+    for name in dense_features + sparse_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: WIDE
														
 
															+  }
														
 
															+  feature_groups: {
														
 
															+    group_name: 'deep'""")
														
 
															+
														
 
															+    for name in top_dense_features + sparse_features + tag_features + seq_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: DEEP
														
 
															+  }
														
 
															+  deepfm {
														
 
															+    wide_output_dim: 8
														
 
															+
														
 
															+    dnn {
														
 
															+      hidden_units: [256, 128, 64]
														
 
															+    }
														
 
															+
														
 
															+    final_dnn {
														
 
															+      hidden_units: [64, 32]
														
 
															+    }
														
 
															+    l2_regularization: 1e-5
														
 
															+  }
														
 
															+  embedding_regularization: 1e-6
														
 
															+}""")
														
 
															+
														
 
															+def fm():
														
 
															+    print("""
														
 
															+model_config {
														
 
															+  model_class: "FM"
														
 
															+   feature_groups: {
														
 
															+    group_name: 'wide'""")
														
 
															+
														
 
															+    for name in dense_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: WIDE
														
 
															+  }
														
 
															+  feature_groups: {
														
 
															+    group_name: 'deep'""")
														
 
															+
														
 
															+    for name in dense_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: DEEP
														
 
															+  }
														
 
															+  fm {
														
 
															+  }
														
 
															+  embedding_regularization: 1e-5
														
 
															+}""")
														
 
															+
														
 
															+def config_export():
														
 
															+    print("""
														
 
															+export_config {
														
 
															+  exporter_type: "final"
														
 
															+}
														
 
															+""")
														
 
															+
														
 
															+
														
 
															+deep_fm()
														
 
															+config_export()
														
--- a/widedeep_v12_4.py
+++ b/widedeep_v12_4.py
@@ -0,0 +1,253 @@
 
															+#! /usr/bin/env python
														
 
															+# -*- coding: utf-8 -*-
														
 
															+# vim:fenc=utf-8
														
 
															+#
														
 
															+# Copyright © 2025 StrayWarrior <i@straywarrior.com>
														
 
															+#
														
 
															+# Distributed under terms of the MIT license.
														
 
															+
														
 
															+"""
														
 
															+删除容易导致偏差的viewall特征
														
 
															+"""
														
 
															+
														
 
															+
														
 
															+raw_input = open("data_fields_v3.config").readlines()
														
 
															+input_fields = dict(
														
 
															+    map(lambda x: (x[0], x[1]),
														
 
															+        map(lambda x: x.strip().split(' '), raw_input)))
														
 
															+
														
 
															+def read_features(filename, excludes=None):
														
 
															+    features = open(filename).readlines()
														
 
															+    features = [name.strip().lower() for name in features]
														
 
															+    if excludes:
														
 
															+        for x in excludes:
														
 
															+            if x in features:
														
 
															+                features.remove(x)
														
 
															+    return features
														
 
															+
														
 
															+exclude_features = ['viewall',]
														
 
															+
														
 
															+dense_features = read_features("features_top300.config", exclude_features)
														
 
															+top_dense_features = read_features('features_top100.config', exclude_features)
														
 
															+
														
 
															+sparse_features = [
														
 
															+    "cid", "adid", "adverid",
														
 
															+    "region", "city", "brand",
														
 
															+    "vid", "cate1", "cate2",
														
 
															+    "apptype", "hour", "hour_quarter", "root_source_scene", "root_source_channel", "is_first_layer", "title_split",
														
 
															+    "profession"
														
 
															+]
														
 
															+tag_features = [
														
 
															+    "user_vid_return_tags_2h", "user_vid_return_tags_1d", "user_vid_return_tags_3d",
														
 
															+    "user_vid_return_tags_7d", "user_vid_return_tags_14d"
														
 
															+]
														
 
															+seq_features = [
														
 
															+    "user_cid_click_list", "user_cid_conver_list"
														
 
															+]
														
 
															+
														
 
															+input_type_map = {
														
 
															+    'BIGINT': 'INT64',
														
 
															+    'DOUBLE': 'DOUBLE',
														
 
															+    'STRING': 'STRING'
														
 
															+}
														
 
															+
														
 
															+print("""train_config {
														
 
															+  optimizer_config {
														
 
															+    adam_optimizer {
														
 
															+      learning_rate {
														
 
															+        constant_learning_rate {
														
 
															+          learning_rate: 0.0010
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    use_moving_average: false
														
 
															+  }
														
 
															+  optimizer_config {
														
 
															+    adam_optimizer {
														
 
															+      learning_rate {
														
 
															+        constant_learning_rate {
														
 
															+          learning_rate: 0.0006
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    use_moving_average: false
														
 
															+  }
														
 
															+  optimizer_config {
														
 
															+    adam_optimizer {
														
 
															+      learning_rate {
														
 
															+        constant_learning_rate {
														
 
															+          learning_rate: 0.002
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    use_moving_average: false
														
 
															+  }
														
 
															+  num_steps: 200000
														
 
															+  sync_replicas: true
														
 
															+  save_checkpoints_steps: 1100
														
 
															+  log_step_count_steps: 100
														
 
															+  save_summary_steps: 100
														
 
															+}
														
 
															+eval_config {
														
 
															+  metrics_set {
														
 
															+    auc {
														
 
															+    }
														
 
															+  }
														
 
															+  eval_online: true
														
 
															+  eval_interval_secs: 120
														
 
															+}
														
 
															+data_config {
														
 
															+  batch_size: 512
														
 
															+  num_epochs: 1
														
 
															+""")
														
 
															+
														
 
															+for name in input_fields:
														
 
															+    input_type = input_type_map[input_fields[name]]
														
 
															+    default_spec = ''
														
 
															+    if name in dense_features:
														
 
															+        default_spec = '\n    default_val: "0"'
														
 
															+    print(f"""  input_fields {{
														
 
															+    input_name: "{name}"
														
 
															+    input_type: {input_type}{default_spec}
														
 
															+  }}""")
														
 
															+    # default_val: "0"
														
 
															+
														
 
															+print("""  label_fields: "has_conversion"
														
 
															+  prefetch_size: 32
														
 
															+  input_type: OdpsInputV2
														
 
															+}
														
 
															+""")
														
 
															+
														
 
															+for name in dense_features:
														
 
															+    print(f"""feature_configs {{
														
 
															+  input_names: "{name}"
														
 
															+  feature_type: RawFeature
														
 
															+  boundaries: [0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.0]
														
 
															+  embedding_dim: 6
														
 
															+}}""")
														
 
															+
														
 
															+for name in sparse_features:
														
 
															+    print(f"""feature_configs {{
														
 
															+  input_names: "{name}"
														
 
															+  feature_type: IdFeature
														
 
															+  hash_bucket_size: 1000000
														
 
															+  embedding_dim: 6
														
 
															+}}""")
														
 
															+
														
 
															+for name in tag_features + seq_features:
														
 
															+    print(f"""feature_configs {{
														
 
															+  input_names: "{name}"
														
 
															+  feature_type: TagFeature
														
 
															+  hash_bucket_size: 1000000
														
 
															+  embedding_dim: 6
														
 
															+  separator: ','
														
 
															+}}""")
														
 
															+
														
 
															+
														
 
															+def wide_and_deep():
														
 
															+    print("""
														
 
															+model_config {
														
 
															+  model_class: "WideAndDeep"
														
 
															+   feature_groups: {
														
 
															+    group_name: 'wide'""")
														
 
															+
														
 
															+    for name in dense_features + sparse_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: WIDE
														
 
															+  }
														
 
															+  feature_groups: {
														
 
															+    group_name: 'deep'""")
														
 
															+
														
 
															+    for name in dense_features + sparse_features + tag_features + seq_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: DEEP
														
 
															+  }
														
 
															+  wide_and_deep {
														
 
															+    wide_output_dim: 8
														
 
															+
														
 
															+    dnn {
														
 
															+      hidden_units: [256, 128, 64]
														
 
															+    }
														
 
															+    final_dnn {
														
 
															+      hidden_units: [64, 32]
														
 
															+    }
														
 
															+    l2_regularization: 1e-5
														
 
															+  }
														
 
															+  embedding_regularization: 1e-6
														
 
															+}""")
														
 
															+
														
 
															+
														
 
															+def deep_fm():
														
 
															+    print("""
														
 
															+model_config {
														
 
															+  model_class: "DeepFM"
														
 
															+   feature_groups: {
														
 
															+    group_name: 'wide'""")
														
 
															+
														
 
															+    for name in dense_features + sparse_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: WIDE
														
 
															+  }
														
 
															+  feature_groups: {
														
 
															+    group_name: 'deep'""")
														
 
															+
														
 
															+    for name in top_dense_features + sparse_features + tag_features + seq_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: DEEP
														
 
															+  }
														
 
															+  deepfm {
														
 
															+    wide_output_dim: 8
														
 
															+
														
 
															+    dnn {
														
 
															+      hidden_units: [256, 128, 64]
														
 
															+    }
														
 
															+
														
 
															+    final_dnn {
														
 
															+      hidden_units: [64, 32]
														
 
															+    }
														
 
															+    l2_regularization: 1e-5
														
 
															+  }
														
 
															+  embedding_regularization: 1e-6
														
 
															+}""")
														
 
															+
														
 
															+
														
 
															+def fm():
														
 
															+    print("""
														
 
															+model_config {
														
 
															+  model_class: "FM"
														
 
															+   feature_groups: {
														
 
															+    group_name: 'wide'""")
														
 
															+
														
 
															+    for name in dense_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: WIDE
														
 
															+  }
														
 
															+  feature_groups: {
														
 
															+    group_name: 'deep'""")
														
 
															+
														
 
															+    for name in dense_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: DEEP
														
 
															+  }
														
 
															+  fm {
														
 
															+  }
														
 
															+  embedding_regularization: 1e-5
														
 
															+}""")
														
 
															+
														
 
															+
														
 
															+def config_export():
														
 
															+    print("""
														
 
															+export_config {
														
 
															+  exporter_type: "final"
														
 
															+}
														
 
															+""")
														
 
															+
														
 
															+
														
 
															+deep_fm()
														
 
															+config_export()
														
--- a/widedeep_v12_5.py
+++ b/widedeep_v12_5.py
@@ -0,0 +1,254 @@
 
															+#! /usr/bin/env python
														
 
															+# -*- coding: utf-8 -*-
														
 
															+# vim:fenc=utf-8
														
 
															+#
														
 
															+# Copyright © 2025 StrayWarrior <i@straywarrior.com>
														
 
															+#
														
 
															+# Distributed under terms of the MIT license.
														
 
															+
														
 
															+"""
														
 
															+删除容易导致偏差的viewall特征
														
 
															+"""
														
 
															+
														
 
															+
														
 
															+raw_input = open("data_fields_v3.config").readlines()
														
 
															+input_fields = dict(
														
 
															+    map(lambda x: (x[0], x[1]),
														
 
															+        map(lambda x: x.strip().split(' '), raw_input)))
														
 
															+
														
 
															+def read_features(filename, excludes=None):
														
 
															+    features = open(filename).readlines()
														
 
															+    features = [name.strip().lower() for name in features]
														
 
															+    if excludes:
														
 
															+        for x in excludes:
														
 
															+            if x in features:
														
 
															+                features.remove(x)
														
 
															+    return features
														
 
															+
														
 
															+exclude_features = ['viewall', "e1_tags_14d_maxscore","e2_tags_14d_avgscore","e2_tags_14d_maxscore","e1_tags_14d_avgscore","e2_tags_7d_maxscore","e2_tags_7d_avgscore","e2_tags_3d_avgscore","e1_tags_3d_maxscore","e1_tags_7d_maxscore","e2_tags_3d_maxscore","e1_tags_3d_avgscore","e1_tags_7d_avgscore"
														
 
															+]
														
 
															+
														
 
															+dense_features = read_features("features_top300.config", exclude_features)
														
 
															+top_dense_features = read_features('features_top100.config', exclude_features)
														
 
															+
														
 
															+sparse_features = [
														
 
															+    "cid", "adid", "adverid",
														
 
															+    "region", "city", "brand",
														
 
															+    "vid", "cate1", "cate2",
														
 
															+    "apptype", "hour", "hour_quarter", "root_source_scene", "root_source_channel", "is_first_layer", "title_split",
														
 
															+    "profession"
														
 
															+]
														
 
															+tag_features = [
														
 
															+    "user_vid_return_tags_2h", "user_vid_return_tags_1d", "user_vid_return_tags_3d",
														
 
															+    "user_vid_return_tags_7d", "user_vid_return_tags_14d"
														
 
															+]
														
 
															+seq_features = [
														
 
															+    "user_cid_click_list", "user_cid_conver_list"
														
 
															+]
														
 
															+
														
 
															+input_type_map = {
														
 
															+    'BIGINT': 'INT64',
														
 
															+    'DOUBLE': 'DOUBLE',
														
 
															+    'STRING': 'STRING'
														
 
															+}
														
 
															+
														
 
															+print("""train_config {
														
 
															+  optimizer_config {
														
 
															+    adam_optimizer {
														
 
															+      learning_rate {
														
 
															+        constant_learning_rate {
														
 
															+          learning_rate: 0.0010
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    use_moving_average: false
														
 
															+  }
														
 
															+  optimizer_config {
														
 
															+    adam_optimizer {
														
 
															+      learning_rate {
														
 
															+        constant_learning_rate {
														
 
															+          learning_rate: 0.0006
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    use_moving_average: false
														
 
															+  }
														
 
															+  optimizer_config {
														
 
															+    adam_optimizer {
														
 
															+      learning_rate {
														
 
															+        constant_learning_rate {
														
 
															+          learning_rate: 0.002
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    use_moving_average: false
														
 
															+  }
														
 
															+  num_steps: 200000
														
 
															+  sync_replicas: true
														
 
															+  save_checkpoints_steps: 1100
														
 
															+  log_step_count_steps: 100
														
 
															+  save_summary_steps: 100
														
 
															+}
														
 
															+eval_config {
														
 
															+  metrics_set {
														
 
															+    auc {
														
 
															+    }
														
 
															+  }
														
 
															+  eval_online: true
														
 
															+  eval_interval_secs: 120
														
 
															+}
														
 
															+data_config {
														
 
															+  batch_size: 512
														
 
															+  num_epochs: 1
														
 
															+""")
														
 
															+
														
 
															+for name in input_fields:
														
 
															+    input_type = input_type_map[input_fields[name]]
														
 
															+    default_spec = ''
														
 
															+    if name in dense_features:
														
 
															+        default_spec = '\n    default_val: "0"'
														
 
															+    print(f"""  input_fields {{
														
 
															+    input_name: "{name}"
														
 
															+    input_type: {input_type}{default_spec}
														
 
															+  }}""")
														
 
															+    # default_val: "0"
														
 
															+
														
 
															+print("""  label_fields: "has_conversion"
														
 
															+  prefetch_size: 32
														
 
															+  input_type: OdpsInputV2
														
 
															+}
														
 
															+""")
														
 
															+
														
 
															+for name in dense_features:
														
 
															+    print(f"""feature_configs {{
														
 
															+  input_names: "{name}"
														
 
															+  feature_type: RawFeature
														
 
															+  boundaries: [0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.0]
														
 
															+  embedding_dim: 6
														
 
															+}}""")
														
 
															+
														
 
															+for name in sparse_features:
														
 
															+    print(f"""feature_configs {{
														
 
															+  input_names: "{name}"
														
 
															+  feature_type: IdFeature
														
 
															+  hash_bucket_size: 1000000
														
 
															+  embedding_dim: 6
														
 
															+}}""")
														
 
															+
														
 
															+for name in tag_features + seq_features:
														
 
															+    print(f"""feature_configs {{
														
 
															+  input_names: "{name}"
														
 
															+  feature_type: TagFeature
														
 
															+  hash_bucket_size: 1000000
														
 
															+  embedding_dim: 6
														
 
															+  separator: ','
														
 
															+}}""")
														
 
															+
														
 
															+
														
 
															+def wide_and_deep():
														
 
															+    print("""
														
 
															+model_config {
														
 
															+  model_class: "WideAndDeep"
														
 
															+   feature_groups: {
														
 
															+    group_name: 'wide'""")
														
 
															+
														
 
															+    for name in dense_features + sparse_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: WIDE
														
 
															+  }
														
 
															+  feature_groups: {
														
 
															+    group_name: 'deep'""")
														
 
															+
														
 
															+    for name in dense_features + sparse_features + tag_features + seq_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: DEEP
														
 
															+  }
														
 
															+  wide_and_deep {
														
 
															+    wide_output_dim: 8
														
 
															+
														
 
															+    dnn {
														
 
															+      hidden_units: [256, 128, 64]
														
 
															+    }
														
 
															+    final_dnn {
														
 
															+      hidden_units: [64, 32]
														
 
															+    }
														
 
															+    l2_regularization: 1e-5
														
 
															+  }
														
 
															+  embedding_regularization: 1e-6
														
 
															+}""")
														
 
															+
														
 
															+
														
 
															+def deep_fm():
														
 
															+    print("""
														
 
															+model_config {
														
 
															+  model_class: "DeepFM"
														
 
															+   feature_groups: {
														
 
															+    group_name: 'wide'""")
														
 
															+
														
 
															+    for name in dense_features + sparse_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: WIDE
														
 
															+  }
														
 
															+  feature_groups: {
														
 
															+    group_name: 'deep'""")
														
 
															+
														
 
															+    for name in top_dense_features + sparse_features + tag_features + seq_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: DEEP
														
 
															+  }
														
 
															+  deepfm {
														
 
															+    wide_output_dim: 8
														
 
															+
														
 
															+    dnn {
														
 
															+      hidden_units: [256, 128, 64]
														
 
															+    }
														
 
															+
														
 
															+    final_dnn {
														
 
															+      hidden_units: [64, 32]
														
 
															+    }
														
 
															+    l2_regularization: 1e-5
														
 
															+  }
														
 
															+  embedding_regularization: 1e-6
														
 
															+}""")
														
 
															+
														
 
															+
														
 
															+def fm():
														
 
															+    print("""
														
 
															+model_config {
														
 
															+  model_class: "FM"
														
 
															+   feature_groups: {
														
 
															+    group_name: 'wide'""")
														
 
															+
														
 
															+    for name in dense_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: WIDE
														
 
															+  }
														
 
															+  feature_groups: {
														
 
															+    group_name: 'deep'""")
														
 
															+
														
 
															+    for name in dense_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: DEEP
														
 
															+  }
														
 
															+  fm {
														
 
															+  }
														
 
															+  embedding_regularization: 1e-5
														
 
															+}""")
														
 
															+
														
 
															+
														
 
															+def config_export():
														
 
															+    print("""
														
 
															+export_config {
														
 
															+  exporter_type: "final"
														
 
															+}
														
 
															+""")
														
 
															+
														
 
															+
														
 
															+deep_fm()
														
 
															+config_export()
														
--- a/widedeep_v12_6.py
+++ b/widedeep_v12_6.py
@@ -0,0 +1,252 @@
 
															+#! /usr/bin/env python
														
 
															+# -*- coding: utf-8 -*-
														
 
															+# vim:fenc=utf-8
														
 
															+#
														
 
															+# Copyright © 2025 StrayWarrior <i@straywarrior.com>
														
 
															+#
														
 
															+# Distributed under terms of the MIT license.
														
 
															+
														
 
															+"""
														
 
															+删除容易导致偏差的viewall特征
														
 
															+"""
														
 
															+
														
 
															+
														
 
															+raw_input = open("data_fields_v3.config").readlines()
														
 
															+input_fields = dict(
														
 
															+    map(lambda x: (x[0], x[1]),
														
 
															+        map(lambda x: x.strip().split(' '), raw_input)))
														
 
															+
														
 
															+def read_features(filename, excludes=None):
														
 
															+    features = open(filename).readlines()
														
 
															+    features = [name.strip().lower() for name in features]
														
 
															+    if excludes:
														
 
															+        for x in excludes:
														
 
															+            if x in features:
														
 
															+                features.remove(x)
														
 
															+    return features
														
 
															+
														
 
															+exclude_features = ['viewall',]
														
 
															+
														
 
															+dense_features = read_features("features_top300.config", exclude_features)
														
 
															+top_dense_features = read_features('features_top100.config', exclude_features)
														
 
															+
														
 
															+sparse_features = [
														
 
															+    "cid", "adid", "adverid",
														
 
															+    "region", "city", "brand",
														
 
															+    "vid", "cate1", "cate2",
														
 
															+    "apptype", "hour", "hour_quarter", "root_source_scene", "root_source_channel", "is_first_layer", "title_split",
														
 
															+]
														
 
															+tag_features = [
														
 
															+    "user_vid_return_tags_2h", "user_vid_return_tags_1d", "user_vid_return_tags_3d",
														
 
															+    "user_vid_return_tags_7d", "user_vid_return_tags_14d"
														
 
															+]
														
 
															+seq_features = [
														
 
															+    "user_cid_click_list", "user_cid_conver_list"
														
 
															+]
														
 
															+
														
 
															+input_type_map = {
														
 
															+    'BIGINT': 'INT64',
														
 
															+    'DOUBLE': 'DOUBLE',
														
 
															+    'STRING': 'STRING'
														
 
															+}
														
 
															+
														
 
															+print("""train_config {
														
 
															+  optimizer_config {
														
 
															+    adam_optimizer {
														
 
															+      learning_rate {
														
 
															+        constant_learning_rate {
														
 
															+          learning_rate: 0.0010
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    use_moving_average: false
														
 
															+  }
														
 
															+  optimizer_config {
														
 
															+    adam_optimizer {
														
 
															+      learning_rate {
														
 
															+        constant_learning_rate {
														
 
															+          learning_rate: 0.0006
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    use_moving_average: false
														
 
															+  }
														
 
															+  optimizer_config {
														
 
															+    adam_optimizer {
														
 
															+      learning_rate {
														
 
															+        constant_learning_rate {
														
 
															+          learning_rate: 0.002
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    use_moving_average: false
														
 
															+  }
														
 
															+  num_steps: 200000
														
 
															+  sync_replicas: true
														
 
															+  save_checkpoints_steps: 1100
														
 
															+  log_step_count_steps: 100
														
 
															+  save_summary_steps: 100
														
 
															+}
														
 
															+eval_config {
														
 
															+  metrics_set {
														
 
															+    auc {
														
 
															+    }
														
 
															+  }
														
 
															+  eval_online: true
														
 
															+  eval_interval_secs: 120
														
 
															+}
														
 
															+data_config {
														
 
															+  batch_size: 512
														
 
															+  num_epochs: 1
														
 
															+""")
														
 
															+
														
 
															+for name in input_fields:
														
 
															+    input_type = input_type_map[input_fields[name]]
														
 
															+    default_spec = ''
														
 
															+    if name in dense_features:
														
 
															+        default_spec = '\n    default_val: "0"'
														
 
															+    print(f"""  input_fields {{
														
 
															+    input_name: "{name}"
														
 
															+    input_type: {input_type}{default_spec}
														
 
															+  }}""")
														
 
															+    # default_val: "0"
														
 
															+
														
 
															+print("""  label_fields: "has_conversion"
														
 
															+  prefetch_size: 32
														
 
															+  input_type: OdpsInputV2
														
 
															+}
														
 
															+""")
														
 
															+
														
 
															+for name in dense_features:
														
 
															+    print(f"""feature_configs {{
														
 
															+  input_names: "{name}"
														
 
															+  feature_type: RawFeature
														
 
															+  boundaries: [0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.0]
														
 
															+  embedding_dim: 6
														
 
															+}}""")
														
 
															+
														
 
															+for name in sparse_features:
														
 
															+    print(f"""feature_configs {{
														
 
															+  input_names: "{name}"
														
 
															+  feature_type: IdFeature
														
 
															+  hash_bucket_size: 1000000
														
 
															+  embedding_dim: 6
														
 
															+}}""")
														
 
															+
														
 
															+for name in tag_features + seq_features:
														
 
															+    print(f"""feature_configs {{
														
 
															+  input_names: "{name}"
														
 
															+  feature_type: TagFeature
														
 
															+  hash_bucket_size: 1000000
														
 
															+  embedding_dim: 6
														
 
															+  separator: ','
														
 
															+}}""")
														
 
															+
														
 
															+
														
 
															+def wide_and_deep():
														
 
															+    print("""
														
 
															+model_config {
														
 
															+  model_class: "WideAndDeep"
														
 
															+   feature_groups: {
														
 
															+    group_name: 'wide'""")
														
 
															+
														
 
															+    for name in dense_features + sparse_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: WIDE
														
 
															+  }
														
 
															+  feature_groups: {
														
 
															+    group_name: 'deep'""")
														
 
															+
														
 
															+    for name in dense_features + sparse_features + tag_features + seq_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: DEEP
														
 
															+  }
														
 
															+  wide_and_deep {
														
 
															+    wide_output_dim: 8
														
 
															+
														
 
															+    dnn {
														
 
															+      hidden_units: [256, 128, 64]
														
 
															+    }
														
 
															+    final_dnn {
														
 
															+      hidden_units: [64, 32]
														
 
															+    }
														
 
															+    l2_regularization: 1e-5
														
 
															+  }
														
 
															+  embedding_regularization: 1e-6
														
 
															+}""")
														
 
															+
														
 
															+
														
 
															+def deep_fm():
														
 
															+    print("""
														
 
															+model_config {
														
 
															+  model_class: "DeepFM"
														
 
															+   feature_groups: {
														
 
															+    group_name: 'wide'""")
														
 
															+
														
 
															+    for name in dense_features + sparse_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: WIDE
														
 
															+  }
														
 
															+  feature_groups: {
														
 
															+    group_name: 'deep'""")
														
 
															+
														
 
															+    for name in top_dense_features + sparse_features + tag_features + seq_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: DEEP
														
 
															+  }
														
 
															+  deepfm {
														
 
															+    wide_output_dim: 8
														
 
															+
														
 
															+    dnn {
														
 
															+      hidden_units: [256, 128, 64]
														
 
															+    }
														
 
															+
														
 
															+    final_dnn {
														
 
															+      hidden_units: [64, 32]
														
 
															+    }
														
 
															+    l2_regularization: 1e-5
														
 
															+  }
														
 
															+  embedding_regularization: 1e-6
														
 
															+}""")
														
 
															+
														
 
															+
														
 
															+def fm():
														
 
															+    print("""
														
 
															+model_config {
														
 
															+  model_class: "FM"
														
 
															+   feature_groups: {
														
 
															+    group_name: 'wide'""")
														
 
															+
														
 
															+    for name in dense_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: WIDE
														
 
															+  }
														
 
															+  feature_groups: {
														
 
															+    group_name: 'deep'""")
														
 
															+
														
 
															+    for name in dense_features:
														
 
															+        print(f"""    feature_names: '{name}'""")
														
 
															+
														
 
															+    print("""    wide_deep: DEEP
														
 
															+  }
														
 
															+  fm {
														
 
															+  }
														
 
															+  embedding_regularization: 1e-5
														
 
															+}""")
														
 
															+
														
 
															+
														
 
															+def config_export():
														
 
															+    print("""
														
 
															+export_config {
														
 
															+  exporter_type: "final"
														
 
															+}
														
 
															+""")
														
 
															+
														
 
															+
														
 
															+deep_fm()
														
 
															+config_export()