liqian 2 年之前
父节点
当前提交
0add0b485f
共有 2 个文件被更改,包括 12 次插入4 次删除
  1. 2 1
      rule_rank_h_18_19.py
  2. 10 3
      whole_movies_update.py

+ 2 - 1
rule_rank_h_18_19.py

@@ -83,8 +83,9 @@ def cal_score(df):
     log_.info(f'initial_df count = {len(df)}')
     video_ids = [int(video_id) for video_id in df['videoid']]
     filtered_result = filter_video_status(video_ids=video_ids)
+    filter_result = set(video_ids) - set(filtered_result)
     df['videoid'] = df['videoid'].astype(int)
-    filter_df = df[df['videoid'].isin(filtered_result)]
+    filter_df = df[df['videoid'].isin(filter_result)]
     df = df.append(filter_df)
     df = df.drop_duplicates(['videoid'], keep=False)
     log_.info(f'filtered_df count = {len(df)}')

+ 10 - 3
whole_movies_update.py

@@ -70,24 +70,31 @@ def video_rank(app_type, df, now_date, now_h, return_count):
     :param return_count: 小时级数据回流限制数
     :return:
     """
+    df = df.fillna(0)
     # 视频状态过滤
     log_.info(f'initial_df count = {len(df)}')
     video_ids = [int(video_id) for video_id in df['videoid']]
     df['videoid'] = df['videoid'].astype(int)
+    df = df.drop_duplicates(['videoid'], keep=False)
+    log_.info(f'df length = {len(df)}')
 
     # 获取待推荐
     filtered_result_6 = filter_video_status_with_applet_rec(video_ids=video_ids, applet_rec_status=-6)
     filtered_df_6 = df[df['videoid'].isin(filtered_result_6)]
-    filtered_df_6 = filtered_df_6.drop_duplicates(['videoid'], keep=False)
+    filtered_df_6 = filtered_df_6.sort_values(by=['站外播放量'], ascending=False)
     log_.info(f'filtered_df_6 count = {len(filtered_df_6)}')
 
     # 获取普通推荐
     filtered_result_1 = filter_video_status_with_applet_rec(video_ids=video_ids, applet_rec_status=1)
     filtered_df_1 = df[df['videoid'].isin(filtered_result_1)]
-    filtered_df_1 = filtered_df_1.drop_duplicates(['videoid'], keep=False)
+    filtered_df_1 = filtered_df_1.sort_values(by=['站外播放量'], ascending=False)
     log_.info(f'filtered_df_1 count = {len(filtered_df_1)}')
 
-    log_.info(f'df length = {len(df)}')
+    # 排序合并
+
+
+
+
     # 获取符合进入召回源条件的视频,进入条件:小时级回流>=20 && score>=0.005
     h_recall_df = df[(df['lastonehour_return'] >= return_count) & (df['score'] >= 0.005)]
     h_recall_videos = h_recall_df['videoid'].to_list()