xielixun vor 4 Jahren
Ursprung
Commit
f2ce233c7c

+ 0 - 1
match_recall/Dssm_tzld_match.py

@@ -213,7 +213,6 @@ if __name__ == "__main__":
 
     if tf.__version__ >= '2.0.0':
         tf.compat.v1.disable_eager_execution()
-    # 3.Define Model and train
 
     model = DSSM(user_feature_columns, item_feature_columns)
     logdir = os.path.join("log_callbacks_dssm")  # Tensorboard需要一个文件夹

+ 0 - 2
match_recall/FM_tzld_match.py

@@ -168,7 +168,6 @@ if __name__ == "__main__":
     user_profile.set_index("mid", inplace=True)
     print(data)
     print("\n\n after group by mid videoid")
-    # print(data)
     del data
     gc.collect()
     # 按序列采样,没有加负采样,会再训练时的batch里进行batch负采样
@@ -215,7 +214,6 @@ if __name__ == "__main__":
 
     if tf.__version__ >= '2.0.0':
         tf.compat.v1.disable_eager_execution()
-    # 3.Define Model and train
 
     model = FM(user_feature_columns, item_feature_columns)
     logdir = os.path.join("log_callbacks_fm")  # Tensorboard需要一个文件夹

+ 0 - 3
match_recall/item2Vec-userEmbedding-tzld.py

@@ -1,12 +1,9 @@
-#!/usr/bin/env python
 # coding: utf-8
 import pandas as pd
 import time
 
 begin_time = time.time()
 
-# In[31]:
-
 
 #df = pd.read_csv("./datas/item2vecTzld1106.csv")
 #df = pd.read_csv("/root/xielixun/item2vec_app_20201126.csv")

+ 1 - 2
match_recall/spark-item2Vec-tzld.py

@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # coding: utf-8
 
 import pandas as pd
@@ -64,7 +63,7 @@ sc = spark.sparkContext
 #df = spark.read.csv("./datas/tzld_uid_videoids_app_20210406.csv", header=True)
 df = spark.read.csv("./datas/tzld_uid_videoids_app_20210419.csv", header=True)
 df.show(15)
-# In[9]:
+
 from pyspark.sql import functions as F
 from pyspark.sql import types as T
 

+ 2 - 4
match_recall/youtube_tzld_match.py

@@ -265,8 +265,7 @@ if __name__ == "__main__":
 
     item_embs = item_embedding_model.predict(all_item_model_input, batch_size=2 ** 12)
 
-    # #### 得到user embedding
-
+    # 得到user embedding
     user_layer_model = tf.keras.models.Model(
         inputs=[model.user_input],
         # outputs=model.get_layer("user_embedding").output
@@ -275,7 +274,7 @@ if __name__ == "__main__":
 
     user_embeddings = []
 
-    # #### 得到video embedding
+    # 得到video embedding
     video_layer_model = tf.keras.models.Model(
         inputs=[model.item_input],
         # outputs=model.get_layer("item_embedding").output
@@ -292,7 +291,6 @@ if __name__ == "__main__":
             np.reshape(row["videoGenre1"], [1, 1]),
             # np.reshape(row["videoGenre2"], [1, 1]),
             np.reshape(row["authorid"], [1, 1]),
-            #
             np.reshape(row["videoRealPlayCount"], [1, 1]),
             np.reshape(row["videoDuration"], [1, 1])
         ]