xielixun vor 4 Jahren
Ursprung
Commit
f73a474546
2 geänderte Dateien mit 2 neuen und 19 gelöschten Zeilen
  1. 1 11
      match_recall/Dssm_tzld_match.py
  2. 1 8
      match_recall/FM_tzld_match.py

+ 1 - 11
match_recall/Dssm_tzld_match.py

@@ -4,9 +4,6 @@ import os
 import gc
 import time
 from deepctr.feature_column import SparseFeat, VarLenSparseFeat, get_feature_names
-# from preprocess_tzld210223 import gen_data_set, gen_model_input, gen_model_input_user_emb
-# from preprocess_tzld210303 import gen_data_set, gen_model_input, gen_model_input_user_emb
-# from preprocess_tzld210315 import gen_data_set, gen_model_input, gen_model_input_user_emb
 # from preprocess_tzld210322_gen import gen_data_set, gen_model_input, gen_model_input_user_emb
 # from preprocess_tzld210327_gen import gen_data_set, gen_model_input, gen_model_input_user_emb
 from preprocess_tzld210423_gen import gen_data_set, gen_model_input, gen_model_input_user_emb
@@ -44,7 +41,6 @@ def generate_arrays_from_train(train_set, user_profile, SEQ_LEN):
 
 def generate_arrays_from_train_bak(train_set, user_profile, SEQ_LEN):
     global count_train
-    # batch_size = 8
     while True:
         try:
             train_batch = train_set[(count_train - 1) * batch_size: count_train * batch_size]
@@ -64,8 +60,6 @@ def generate_arrays_from_train_bak(train_set, user_profile, SEQ_LEN):
 def generate_arrays_from_test(train_set, user_profile, SEQ_LEN):
     # x_y 是我们的训练集包括标签,每一行的第一个是我们的图片路径,后面的是我们的独热化后的标签
 
-    # global count
-    # batch_size = 8
     while 1:
         for i in range(0, len(train_set), batch_size):
             try:
@@ -83,7 +77,6 @@ def generate_arrays_from_test(train_set, user_profile, SEQ_LEN):
 
 def generate_arrays_from_test_bak(train_set, user_profile, SEQ_LEN):
     global count_test
-    # batch_size = 8
     while True:
         try:
             train_batch = train_set[(count_test - 1) * batch_size: count_test * batch_size]
@@ -202,7 +195,6 @@ if __name__ == "__main__":
                                                         embedding_name="videoid"), SEQ_LEN, 'mean', 'hist_len'),
                             ]
 
-    # item_feature_columns = [SparseFeat('movie_id', feature_max_idx['movie_id'], embedding_dim)]
     item_feature_columns = [SparseFeat('videoid', feature_max_idx['videoid'], embedding_dim),
                             SparseFeat("videoGenre1", feature_max_idx["videoGenre1"], embedding_dim),
                             SparseFeat("videoGenre2", feature_max_idx["videoGenre2"], embedding_dim),
@@ -223,7 +215,7 @@ if __name__ == "__main__":
         tf.compat.v1.disable_eager_execution()
     # 3.Define Model and train
 
-    model = FM(user_feature_columns, item_feature_columns)
+    model = DSSM(user_feature_columns, item_feature_columns)
     logdir = os.path.join("log_callbacks_dssm")  # Tensorboard需要一个文件夹
     if not os.path.exists(logdir):
         os.mkdir(logdir)
@@ -250,7 +242,6 @@ if __name__ == "__main__":
         tf.keras.metrics.AUC(name='auc-PRC', curve='PR')
     ]
 
-    # model.compile(optimizer='adagrad', loss="binary_crossentropy")
     # compile the model, set loss function, optimizer and evaluation metrics
     model.compile(
         loss='binary_crossentropy',
@@ -264,7 +255,6 @@ if __name__ == "__main__":
 
     model.save("./tensorflow_DSSM-027-tzld-1.h5")
 
-    # all_item_model_input = {"videoid": item_profile['videoid'].values}
     all_item_model_input = {"videoid": item_profile['videoid'].values,
                             "videoGenre1": item_profile['videoGenre1'].values,
                             "videoGenre2": item_profile['videoGenre2'].values,

+ 1 - 8
match_recall/FM_tzld_match.py

@@ -4,9 +4,6 @@ import os
 import gc
 import time
 from deepctr.feature_column import SparseFeat, VarLenSparseFeat, get_feature_names
-# from preprocess_tzld210223 import gen_data_set, gen_model_input, gen_model_input_user_emb
-# from preprocess_tzld210303 import gen_data_set, gen_model_input, gen_model_input_user_emb
-# from preprocess_tzld210315 import gen_data_set, gen_model_input, gen_model_input_user_emb
 # from preprocess_tzld210322_gen import gen_data_set, gen_model_input, gen_model_input_user_emb
 # from preprocess_tzld210327_gen import gen_data_set, gen_model_input, gen_model_input_user_emb
 from preprocess_tzld210423_gen import gen_data_set, gen_model_input, gen_model_input_user_emb
@@ -44,7 +41,6 @@ def generate_arrays_from_train(train_set, user_profile, SEQ_LEN):
 
 def generate_arrays_from_train_bak(train_set, user_profile, SEQ_LEN):
     global count_train
-    # batch_size = 8
     while True:
         try:
             train_batch = train_set[(count_train - 1) * batch_size: count_train * batch_size]
@@ -64,8 +60,6 @@ def generate_arrays_from_train_bak(train_set, user_profile, SEQ_LEN):
 def generate_arrays_from_test(train_set, user_profile, SEQ_LEN):
     # x_y 是我们的训练集包括标签,每一行的第一个是我们的图片路径,后面的是我们的独热化后的标签
 
-    # global count
-    # batch_size = 8
     while 1:
         for i in range(0, len(train_set), batch_size):
             try:
@@ -203,7 +197,6 @@ if __name__ == "__main__":
                                                         embedding_name="videoid"), SEQ_LEN, 'mean', 'hist_len'),
                             ]
 
-    # item_feature_columns = [SparseFeat('movie_id', feature_max_idx['movie_id'], embedding_dim)]
     item_feature_columns = [SparseFeat('videoid', feature_max_idx['videoid'], embedding_dim),
                             SparseFeat("videoGenre1", feature_max_idx["videoGenre1"], embedding_dim),
                             SparseFeat("videoGenre2", feature_max_idx["videoGenre2"], embedding_dim),
@@ -224,7 +217,7 @@ if __name__ == "__main__":
         tf.compat.v1.disable_eager_execution()
     # 3.Define Model and train
 
-    model = DSSM(user_feature_columns, item_feature_columns)
+    model = FM(user_feature_columns, item_feature_columns)
     logdir = os.path.join("log_callbacks_fm")  # Tensorboard需要一个文件夹
     if not os.path.exists(logdir):
         os.mkdir(logdir)