liqian 2 年之前
父節點
當前提交
7496f61632
共有 1 個文件被更改,包括 4 次插入1 次删除
  1. 4 1
      videos_similarity.py

+ 4 - 1
videos_similarity.py

@@ -83,7 +83,8 @@ def get_movie_video_top_list():
     data_df = get_data_from_odps(project='videoods', sql=sql)
     movie_videos = dict()
     for index, row in data_df.iterrows():
-        movie_videos[int(row['videoid'])] = row['title']
+        if index < 20:
+            movie_videos[int(row['videoid'])] = row['title']
     return movie_videos
 
 
@@ -112,6 +113,8 @@ def similarity_rank(movie_videos, sim_videos):
     for video_id, title in movie_videos.items():
         # item_sim = dict()
         for vid, title1 in sim_videos.items():
+            if vid == video_id:
+                continue
             vec1, vec2 = get_word_vector(title, title1)
             dist = cos_dist(vec1, vec2)
             if dist > 0: