|  | @@ -0,0 +1,260 @@
 | 
	
		
			
				|  |  | +#! /usr/bin/env python
 | 
	
		
			
				|  |  | +# -*- coding: utf-8 -*-
 | 
	
		
			
				|  |  | +# vim:fenc=utf-8
 | 
	
		
			
				|  |  | +#
 | 
	
		
			
				|  |  | +# Copyright © 2025 StrayWarrior <i@straywarrior.com>
 | 
	
		
			
				|  |  | +#
 | 
	
		
			
				|  |  | +# Distributed under terms of the MIT license.
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +"""
 | 
	
		
			
				|  |  | +1.删除容易导致偏差的viewall特征
 | 
	
		
			
				|  |  | +2.删除分桶不均匀的cpa特征
 | 
	
		
			
				|  |  | +3.减少dense特征
 | 
	
		
			
				|  |  | +"""
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +raw_input = open("data_fields_v3.config").readlines()
 | 
	
		
			
				|  |  | +input_fields = dict(
 | 
	
		
			
				|  |  | +    map(lambda x: (x[0], x[1]),
 | 
	
		
			
				|  |  | +        map(lambda x: x.strip().split(' '), raw_input)))
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def read_features(filename, excludes=None):
 | 
	
		
			
				|  |  | +    features = open(filename).readlines()
 | 
	
		
			
				|  |  | +    features = [name.strip().lower() for name in features]
 | 
	
		
			
				|  |  | +    if excludes:
 | 
	
		
			
				|  |  | +        for x in excludes:
 | 
	
		
			
				|  |  | +            if x in features:
 | 
	
		
			
				|  |  | +                features.remove(x)
 | 
	
		
			
				|  |  | +    return features
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +exclude_features = ['viewall', 'cpa']
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +dense_features = read_features("features_top300.config", exclude_features)[:150]
 | 
	
		
			
				|  |  | +top_dense_features = read_features('features_top50.config', exclude_features)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +sparse_features = [
 | 
	
		
			
				|  |  | +    "cid", "adid", "adverid",
 | 
	
		
			
				|  |  | +    "region", "city", "brand",
 | 
	
		
			
				|  |  | +    "vid", "cate1", "cate2",
 | 
	
		
			
				|  |  | +    "apptype", "hour", "hour_quarter", "root_source_scene", "root_source_channel", "is_first_layer", "title_split", "user_has_conver_1y",
 | 
	
		
			
				|  |  | +    "user_adverid_view_3d", "user_adverid_view_7d", "user_adverid_view_30d",
 | 
	
		
			
				|  |  | +    "user_adverid_click_3d", "user_adverid_click_7d", "user_adverid_click_30d",
 | 
	
		
			
				|  |  | +    "user_adverid_conver_3d", "user_adverid_conver_7d", "user_adverid_conver_30d",
 | 
	
		
			
				|  |  | +    "user_skuid_view_3d", "user_skuid_view_7d", "user_skuid_view_30d",
 | 
	
		
			
				|  |  | +    "user_skuid_click_3d", "user_skuid_click_7d", "user_skuid_click_30d",
 | 
	
		
			
				|  |  | +    "user_skuid_conver_3d", "user_skuid_conver_7d", "user_skuid_conver_30d"
 | 
	
		
			
				|  |  | +]
 | 
	
		
			
				|  |  | +tag_features = [
 | 
	
		
			
				|  |  | +    "user_vid_return_tags_2h", "user_vid_return_tags_1d", "user_vid_return_tags_3d",
 | 
	
		
			
				|  |  | +    "user_vid_return_tags_7d", "user_vid_return_tags_14d"
 | 
	
		
			
				|  |  | +]
 | 
	
		
			
				|  |  | +seq_features = [
 | 
	
		
			
				|  |  | +    "user_cid_click_list", "user_cid_conver_list"
 | 
	
		
			
				|  |  | +]
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +input_type_map = {
 | 
	
		
			
				|  |  | +    'BIGINT': 'INT64',
 | 
	
		
			
				|  |  | +    'DOUBLE': 'DOUBLE',
 | 
	
		
			
				|  |  | +    'STRING': 'STRING'
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +print("""train_config {
 | 
	
		
			
				|  |  | +  optimizer_config {
 | 
	
		
			
				|  |  | +    adam_optimizer {
 | 
	
		
			
				|  |  | +      learning_rate {
 | 
	
		
			
				|  |  | +        constant_learning_rate {
 | 
	
		
			
				|  |  | +          learning_rate: 0.0010
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +    use_moving_average: false
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +  optimizer_config {
 | 
	
		
			
				|  |  | +    adam_optimizer {
 | 
	
		
			
				|  |  | +      learning_rate {
 | 
	
		
			
				|  |  | +        constant_learning_rate {
 | 
	
		
			
				|  |  | +          learning_rate: 0.0006
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +    use_moving_average: false
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +  optimizer_config {
 | 
	
		
			
				|  |  | +    adam_optimizer {
 | 
	
		
			
				|  |  | +      learning_rate {
 | 
	
		
			
				|  |  | +        constant_learning_rate {
 | 
	
		
			
				|  |  | +          learning_rate: 0.002
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +    use_moving_average: false
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +  num_steps: 200000
 | 
	
		
			
				|  |  | +  sync_replicas: true
 | 
	
		
			
				|  |  | +  save_checkpoints_steps: 1100
 | 
	
		
			
				|  |  | +  log_step_count_steps: 100
 | 
	
		
			
				|  |  | +  save_summary_steps: 100
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +eval_config {
 | 
	
		
			
				|  |  | +  metrics_set {
 | 
	
		
			
				|  |  | +    auc {
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +  eval_online: true
 | 
	
		
			
				|  |  | +  eval_interval_secs: 120
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +data_config {
 | 
	
		
			
				|  |  | +  batch_size: 512
 | 
	
		
			
				|  |  | +  num_epochs: 1
 | 
	
		
			
				|  |  | +""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +for name in input_fields:
 | 
	
		
			
				|  |  | +    input_type = input_type_map[input_fields[name]]
 | 
	
		
			
				|  |  | +    default_spec = ''
 | 
	
		
			
				|  |  | +    if name in dense_features:
 | 
	
		
			
				|  |  | +        default_spec = '\n    default_val: "0"'
 | 
	
		
			
				|  |  | +    print(f"""  input_fields {{
 | 
	
		
			
				|  |  | +    input_name: "{name}"
 | 
	
		
			
				|  |  | +    input_type: {input_type}{default_spec}
 | 
	
		
			
				|  |  | +  }}""")
 | 
	
		
			
				|  |  | +    # default_val: "0"
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +print("""  label_fields: "has_conversion"
 | 
	
		
			
				|  |  | +  prefetch_size: 32
 | 
	
		
			
				|  |  | +  input_type: OdpsInputV2
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +for name in dense_features:
 | 
	
		
			
				|  |  | +    print(f"""feature_configs {{
 | 
	
		
			
				|  |  | +  input_names: "{name}"
 | 
	
		
			
				|  |  | +  feature_type: RawFeature
 | 
	
		
			
				|  |  | +  boundaries: [0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.0]
 | 
	
		
			
				|  |  | +  embedding_dim: 6
 | 
	
		
			
				|  |  | +}}""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +for name in sparse_features:
 | 
	
		
			
				|  |  | +    print(f"""feature_configs {{
 | 
	
		
			
				|  |  | +  input_names: "{name}"
 | 
	
		
			
				|  |  | +  feature_type: IdFeature
 | 
	
		
			
				|  |  | +  hash_bucket_size: 1000000
 | 
	
		
			
				|  |  | +  embedding_dim: 6
 | 
	
		
			
				|  |  | +}}""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +for name in tag_features + seq_features:
 | 
	
		
			
				|  |  | +    print(f"""feature_configs {{
 | 
	
		
			
				|  |  | +  input_names: "{name}"
 | 
	
		
			
				|  |  | +  feature_type: TagFeature
 | 
	
		
			
				|  |  | +  hash_bucket_size: 1000000
 | 
	
		
			
				|  |  | +  embedding_dim: 6
 | 
	
		
			
				|  |  | +  separator: ','
 | 
	
		
			
				|  |  | +}}""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def wide_and_deep():
 | 
	
		
			
				|  |  | +    print("""
 | 
	
		
			
				|  |  | +model_config {
 | 
	
		
			
				|  |  | +  model_class: "WideAndDeep"
 | 
	
		
			
				|  |  | +   feature_groups: {
 | 
	
		
			
				|  |  | +    group_name: 'wide'""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    for name in dense_features + sparse_features:
 | 
	
		
			
				|  |  | +        print(f"""    feature_names: '{name}'""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    print("""    wide_deep: WIDE
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +  feature_groups: {
 | 
	
		
			
				|  |  | +    group_name: 'deep'""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    for name in dense_features + sparse_features + tag_features + seq_features:
 | 
	
		
			
				|  |  | +        print(f"""    feature_names: '{name}'""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    print("""    wide_deep: DEEP
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +  wide_and_deep {
 | 
	
		
			
				|  |  | +    wide_output_dim: 8
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    dnn {
 | 
	
		
			
				|  |  | +      hidden_units: [256, 128, 64]
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +    final_dnn {
 | 
	
		
			
				|  |  | +      hidden_units: [64, 32]
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +    l2_regularization: 1e-5
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +  embedding_regularization: 1e-6
 | 
	
		
			
				|  |  | +}""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def deep_fm():
 | 
	
		
			
				|  |  | +    print("""
 | 
	
		
			
				|  |  | +model_config {
 | 
	
		
			
				|  |  | +  model_class: "DeepFM"
 | 
	
		
			
				|  |  | +   feature_groups: {
 | 
	
		
			
				|  |  | +    group_name: 'wide'""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    for name in dense_features + sparse_features:
 | 
	
		
			
				|  |  | +        print(f"""    feature_names: '{name}'""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    print("""    wide_deep: WIDE
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +  feature_groups: {
 | 
	
		
			
				|  |  | +    group_name: 'deep'""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    for name in top_dense_features + sparse_features + tag_features + seq_features:
 | 
	
		
			
				|  |  | +        print(f"""    feature_names: '{name}'""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    print("""    wide_deep: DEEP
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +  deepfm {
 | 
	
		
			
				|  |  | +    wide_output_dim: 8
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    dnn {
 | 
	
		
			
				|  |  | +      hidden_units: [256, 128, 64]
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    final_dnn {
 | 
	
		
			
				|  |  | +      hidden_units: [64, 32]
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +    l2_regularization: 1e-5
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +  embedding_regularization: 1e-6
 | 
	
		
			
				|  |  | +}""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def fm():
 | 
	
		
			
				|  |  | +    print("""
 | 
	
		
			
				|  |  | +model_config {
 | 
	
		
			
				|  |  | +  model_class: "FM"
 | 
	
		
			
				|  |  | +   feature_groups: {
 | 
	
		
			
				|  |  | +    group_name: 'wide'""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    for name in dense_features:
 | 
	
		
			
				|  |  | +        print(f"""    feature_names: '{name}'""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    print("""    wide_deep: WIDE
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +  feature_groups: {
 | 
	
		
			
				|  |  | +    group_name: 'deep'""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    for name in dense_features:
 | 
	
		
			
				|  |  | +        print(f"""    feature_names: '{name}'""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    print("""    wide_deep: DEEP
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +  fm {
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +  embedding_regularization: 1e-5
 | 
	
		
			
				|  |  | +}""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def config_export():
 | 
	
		
			
				|  |  | +    print("""
 | 
	
		
			
				|  |  | +export_config {
 | 
	
		
			
				|  |  | +  exporter_type: "final"
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +""")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +deep_fm()
 | 
	
		
			
				|  |  | +config_export()
 |