#! /usr/bin/env python # -*- coding: utf-8 -*- # vim:fenc=utf-8 # # Copyright © 2025 StrayWarrior # # Distributed under terms of the MIT license. raw_input = open("data_fields_v3.config").readlines() input_fields = dict( map(lambda x: (x[0], x[1]), map(lambda x: x.strip().split(' '), raw_input))) dense_features = open("features_top300.config").readlines() dense_features = [name.strip().lower() for name in dense_features] top_dense_features = open('features_top100.config').readlines() top_dense_features = [name.strip().lower() for name in top_dense_features] sparse_features = [ "cid", "adid", "adverid", "region", "city", "brand", "vid", "cate1", "cate2", ] tag_features = [ "user_vid_return_tags_2h", "user_vid_return_tags_1d", "user_vid_return_tags_3d", "user_vid_return_tags_7d", "user_vid_return_tags_14d" ] seq_features = [ "user_cid_click_list", "user_cid_conver_list" ] input_type_map = { 'BIGINT': 'INT64', 'DOUBLE': 'DOUBLE', 'STRING': 'STRING' } print("""train_config { optimizer_config { adam_optimizer { learning_rate { constant_learning_rate { learning_rate: 0.0010 } } } use_moving_average: false } optimizer_config { adam_optimizer { learning_rate { constant_learning_rate { learning_rate: 0.0006 } } } use_moving_average: false } optimizer_config { adam_optimizer { learning_rate { constant_learning_rate { learning_rate: 0.002 } } } use_moving_average: false } num_steps: 200000 sync_replicas: true save_checkpoints_steps: 1100 log_step_count_steps: 100 save_summary_steps: 100 } eval_config { metrics_set { auc { } } eval_online: true eval_interval_secs: 120 } data_config { batch_size: 512 num_epochs: 1 """) for name in input_fields: input_type = input_type_map[input_fields[name]] default_spec = '' if name in dense_features: default_spec = '\n default_val: "0"' print(f""" input_fields {{ input_name: "{name}" input_type: {input_type}{default_spec} }}""") # default_val: "0" print(""" label_fields: "has_conversion" prefetch_size: 32 input_type: OdpsInputV2 } """) for name in dense_features: print(f"""feature_configs {{ input_names: "{name}" feature_type: RawFeature boundaries: [0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.0] embedding_dim: 6 }}""") for name in sparse_features: print(f"""feature_configs {{ input_names: "{name}" feature_type: IdFeature hash_bucket_size: 1000000 embedding_dim: 6 }}""") for name in tag_features + seq_features: print(f"""feature_configs {{ input_names: "{name}" feature_type: TagFeature hash_bucket_size: 1000000 embedding_dim: 6 separator: ',' }}""") def wide_and_deep(): print(""" model_config { model_class: "WideAndDeep" feature_groups: { group_name: 'wide'""") for name in dense_features + sparse_features: print(f""" feature_names: '{name}'""") print(""" wide_deep: WIDE } feature_groups: { group_name: 'deep'""") for name in dense_features + sparse_features + tag_features + seq_features: print(f""" feature_names: '{name}'""") print(""" wide_deep: DEEP } wide_and_deep { wide_output_dim: 8 dnn { hidden_units: [256, 128, 64] } final_dnn { hidden_units: [64, 32] } l2_regularization: 1e-5 } embedding_regularization: 1e-6 }""") def deep_fm(): print(""" model_config { model_class: "DeepFM" feature_groups: { group_name: 'wide'""") for name in dense_features + sparse_features: print(f""" feature_names: '{name}'""") print(""" wide_deep: WIDE } feature_groups: { group_name: 'deep'""") for name in top_dense_features + sparse_features + tag_features + seq_features: print(f""" feature_names: '{name}'""") print(""" wide_deep: DEEP } deepfm { wide_output_dim: 8 dnn { hidden_units: [256, 128, 64] } final_dnn { hidden_units: [64, 32] } l2_regularization: 1e-5 } embedding_regularization: 1e-6 }""") def fm(): print(""" model_config { model_class: "FM" feature_groups: { group_name: 'wide'""") for name in dense_features: print(f""" feature_names: '{name}'""") print(""" wide_deep: WIDE } feature_groups: { group_name: 'deep'""") for name in dense_features: print(f""" feature_names: '{name}'""") print(""" wide_deep: DEEP } fm { } embedding_regularization: 1e-5 }""") def config_export(): print(""" export_config { exporter_type: "final" } """) deep_fm() config_export()