|
@@ -83,7 +83,8 @@ def get_movie_video_top_list():
|
|
|
data_df = get_data_from_odps(project='videoods', sql=sql)
|
|
|
movie_videos = dict()
|
|
|
for index, row in data_df.iterrows():
|
|
|
- movie_videos[int(row['videoid'])] = row['title']
|
|
|
+ if index < 20:
|
|
|
+ movie_videos[int(row['videoid'])] = row['title']
|
|
|
return movie_videos
|
|
|
|
|
|
|
|
@@ -112,6 +113,8 @@ def similarity_rank(movie_videos, sim_videos):
|
|
|
for video_id, title in movie_videos.items():
|
|
|
|
|
|
for vid, title1 in sim_videos.items():
|
|
|
+ if vid == video_id:
|
|
|
+ continue
|
|
|
vec1, vec2 = get_word_vector(title, title1)
|
|
|
dist = cos_dist(vec1, vec2)
|
|
|
if dist > 0:
|