|
@@ -81,9 +81,13 @@ def cos_dist(vec1, vec2):
|
|
|
def get_movie_video_top_list():
|
|
|
sql = "select videoid, title from videoods.movie_store_video_top_list;"
|
|
|
data_df = get_data_from_odps(project='videoods', sql=sql)
|
|
|
+ data_df = data_df.fillna(0)
|
|
|
+ data_df['videoid'] = data_df['videoid'].astype(int)
|
|
|
movie_videos = dict()
|
|
|
for index, row in data_df.iterrows():
|
|
|
- if index < 20 and row['videoid'].isna():
|
|
|
+ if row['videoid'] == 0:
|
|
|
+ continue
|
|
|
+ if index < 20:
|
|
|
movie_videos[int(row['videoid'])] = row['title']
|
|
|
return movie_videos
|
|
|
|
|
@@ -135,7 +139,7 @@ if __name__ == '__main__':
|
|
|
# dist1 = cos_dist(vec1, vec2)
|
|
|
# print(dist1)
|
|
|
movie_videos = get_movie_video_top_list()
|
|
|
- sim_videos = get_sim_videos()
|
|
|
- print(len(movie_videos), len(sim_videos))
|
|
|
- similarity_rank(movie_videos=movie_videos, sim_videos=sim_videos)
|
|
|
+ # sim_videos = get_sim_videos()
|
|
|
+ # print(len(movie_videos), len(sim_videos))
|
|
|
+ # similarity_rank(movie_videos=movie_videos, sim_videos=sim_videos)
|
|
|
|