|
@@ -4,9 +4,6 @@ import os
|
|
|
import gc
|
|
|
import time
|
|
|
from deepctr.feature_column import SparseFeat, VarLenSparseFeat, get_feature_names
|
|
|
-# from preprocess_tzld210223 import gen_data_set, gen_model_input, gen_model_input_user_emb
|
|
|
-# from preprocess_tzld210303 import gen_data_set, gen_model_input, gen_model_input_user_emb
|
|
|
-# from preprocess_tzld210315 import gen_data_set, gen_model_input, gen_model_input_user_emb
|
|
|
# from preprocess_tzld210322_gen import gen_data_set, gen_model_input, gen_model_input_user_emb
|
|
|
# from preprocess_tzld210327_gen import gen_data_set, gen_model_input, gen_model_input_user_emb
|
|
|
from preprocess_tzld210423_gen import gen_data_set, gen_model_input, gen_model_input_user_emb
|
|
@@ -44,7 +41,6 @@ def generate_arrays_from_train(train_set, user_profile, SEQ_LEN):
|
|
|
|
|
|
def generate_arrays_from_train_bak(train_set, user_profile, SEQ_LEN):
|
|
|
global count_train
|
|
|
- # batch_size = 8
|
|
|
while True:
|
|
|
try:
|
|
|
train_batch = train_set[(count_train - 1) * batch_size: count_train * batch_size]
|
|
@@ -64,8 +60,6 @@ def generate_arrays_from_train_bak(train_set, user_profile, SEQ_LEN):
|
|
|
def generate_arrays_from_test(train_set, user_profile, SEQ_LEN):
|
|
|
# x_y 是我们的训练集包括标签,每一行的第一个是我们的图片路径,后面的是我们的独热化后的标签
|
|
|
|
|
|
- # global count
|
|
|
- # batch_size = 8
|
|
|
while 1:
|
|
|
for i in range(0, len(train_set), batch_size):
|
|
|
try:
|
|
@@ -83,7 +77,6 @@ def generate_arrays_from_test(train_set, user_profile, SEQ_LEN):
|
|
|
|
|
|
def generate_arrays_from_test_bak(train_set, user_profile, SEQ_LEN):
|
|
|
global count_test
|
|
|
- # batch_size = 8
|
|
|
while True:
|
|
|
try:
|
|
|
train_batch = train_set[(count_test - 1) * batch_size: count_test * batch_size]
|
|
@@ -202,7 +195,6 @@ if __name__ == "__main__":
|
|
|
embedding_name="videoid"), SEQ_LEN, 'mean', 'hist_len'),
|
|
|
]
|
|
|
|
|
|
- # item_feature_columns = [SparseFeat('movie_id', feature_max_idx['movie_id'], embedding_dim)]
|
|
|
item_feature_columns = [SparseFeat('videoid', feature_max_idx['videoid'], embedding_dim),
|
|
|
SparseFeat("videoGenre1", feature_max_idx["videoGenre1"], embedding_dim),
|
|
|
SparseFeat("videoGenre2", feature_max_idx["videoGenre2"], embedding_dim),
|
|
@@ -223,7 +215,7 @@ if __name__ == "__main__":
|
|
|
tf.compat.v1.disable_eager_execution()
|
|
|
# 3.Define Model and train
|
|
|
|
|
|
- model = FM(user_feature_columns, item_feature_columns)
|
|
|
+ model = DSSM(user_feature_columns, item_feature_columns)
|
|
|
logdir = os.path.join("log_callbacks_dssm") # Tensorboard需要一个文件夹
|
|
|
if not os.path.exists(logdir):
|
|
|
os.mkdir(logdir)
|
|
@@ -250,7 +242,6 @@ if __name__ == "__main__":
|
|
|
tf.keras.metrics.AUC(name='auc-PRC', curve='PR')
|
|
|
]
|
|
|
|
|
|
- # model.compile(optimizer='adagrad', loss="binary_crossentropy")
|
|
|
# compile the model, set loss function, optimizer and evaluation metrics
|
|
|
model.compile(
|
|
|
loss='binary_crossentropy',
|
|
@@ -264,7 +255,6 @@ if __name__ == "__main__":
|
|
|
|
|
|
model.save("./tensorflow_DSSM-027-tzld-1.h5")
|
|
|
|
|
|
- # all_item_model_input = {"videoid": item_profile['videoid'].values}
|
|
|
all_item_model_input = {"videoid": item_profile['videoid'].values,
|
|
|
"videoGenre1": item_profile['videoGenre1'].values,
|
|
|
"videoGenre2": item_profile['videoGenre2'].values,
|