liqian 2 years ago
parent
commit
16a8ae87d4
1 changed files with 5 additions and 2 deletions
  1. 5 2
      videos_similarity.py

+ 5 - 2
videos_similarity.py

@@ -116,7 +116,7 @@ def get_sim_videos():
 def similarity_rank(movie_videos, sim_videos):
     sim_result = []
     for video_id, title in movie_videos.items():
-        # item_sim = dict()
+        item_sim_list = []
         for vid, title1 in sim_videos.items():
             if vid == video_id:
                 continue
@@ -125,7 +125,10 @@ def similarity_rank(movie_videos, sim_videos):
             if dist > 0:
                 # item_sim[vid] = dist
                 item_sim = {'top_video_id': video_id, 'title': title, 'vid': vid, 'title1': title1, 'dist': dist}
-                sim_result.append(item_sim)
+                item_sim_list.append(item_sim)
+                item_sim_list.sort(key=lambda x: x['dist'], reverse=True)
+                sim_result.extend(item_sim_list[:4])
+                # sim_result.append(item_sim)
     dist_df = pd.DataFrame(sim_result, columns=['top_video_id', 'title', 'vid', 'title1', 'dist'])
     dist_df.to_csv('./data/videos_dist.csv', index=False)
         # sim_result[video_id] = item_sim