liqian 2 years ago
parent
commit
0a130422f3
1 changed files with 10 additions and 3 deletions
  1. 10 3
      whole_movies_update.py

+ 10 - 3
whole_movies_update.py

@@ -70,24 +70,31 @@ def video_rank(app_type, df, now_date, now_h, return_count):
     :param return_count: 小时级数据回流限制数
     :param return_count: 小时级数据回流限制数
     :return:
     :return:
     """
     """
+    df = df.fillna(0)
     # 视频状态过滤
     # 视频状态过滤
     log_.info(f'initial_df count = {len(df)}')
     log_.info(f'initial_df count = {len(df)}')
     video_ids = [int(video_id) for video_id in df['videoid']]
     video_ids = [int(video_id) for video_id in df['videoid']]
     df['videoid'] = df['videoid'].astype(int)
     df['videoid'] = df['videoid'].astype(int)
+    df = df.drop_duplicates(['videoid'], keep=False)
+    log_.info(f'df length = {len(df)}')
 
 
     # 获取待推荐
     # 获取待推荐
     filtered_result_6 = filter_video_status_with_applet_rec(video_ids=video_ids, applet_rec_status=-6)
     filtered_result_6 = filter_video_status_with_applet_rec(video_ids=video_ids, applet_rec_status=-6)
     filtered_df_6 = df[df['videoid'].isin(filtered_result_6)]
     filtered_df_6 = df[df['videoid'].isin(filtered_result_6)]
-    filtered_df_6 = filtered_df_6.drop_duplicates(['videoid'], keep=False)
+    filtered_df_6 = filtered_df_6.sort_values(by=['站外播放量'], ascending=False)
     log_.info(f'filtered_df_6 count = {len(filtered_df_6)}')
     log_.info(f'filtered_df_6 count = {len(filtered_df_6)}')
 
 
     # 获取普通推荐
     # 获取普通推荐
     filtered_result_1 = filter_video_status_with_applet_rec(video_ids=video_ids, applet_rec_status=1)
     filtered_result_1 = filter_video_status_with_applet_rec(video_ids=video_ids, applet_rec_status=1)
     filtered_df_1 = df[df['videoid'].isin(filtered_result_1)]
     filtered_df_1 = df[df['videoid'].isin(filtered_result_1)]
-    filtered_df_1 = filtered_df_1.drop_duplicates(['videoid'], keep=False)
+    filtered_df_1 = filtered_df_1.sort_values(by=['站外播放量'], ascending=False)
     log_.info(f'filtered_df_1 count = {len(filtered_df_1)}')
     log_.info(f'filtered_df_1 count = {len(filtered_df_1)}')
 
 
-    log_.info(f'df length = {len(df)}')
+    # 排序合并
+
+
+
+
     # 获取符合进入召回源条件的视频,进入条件:小时级回流>=20 && score>=0.005
     # 获取符合进入召回源条件的视频,进入条件:小时级回流>=20 && score>=0.005
     h_recall_df = df[(df['lastonehour_return'] >= return_count) & (df['score'] >= 0.005)]
     h_recall_df = df[(df['lastonehour_return'] >= return_count) & (df['score'] >= 0.005)]
     h_recall_videos = h_recall_df['videoid'].to_list()
     h_recall_videos = h_recall_df['videoid'].to_list()