|
@@ -70,24 +70,31 @@ def video_rank(app_type, df, now_date, now_h, return_count):
|
|
|
:param return_count: 小时级数据回流限制数
|
|
|
:return:
|
|
|
"""
|
|
|
+ df = df.fillna(0)
|
|
|
# 视频状态过滤
|
|
|
log_.info(f'initial_df count = {len(df)}')
|
|
|
video_ids = [int(video_id) for video_id in df['videoid']]
|
|
|
df['videoid'] = df['videoid'].astype(int)
|
|
|
+ df = df.drop_duplicates(['videoid'], keep=False)
|
|
|
+ log_.info(f'df length = {len(df)}')
|
|
|
|
|
|
# 获取待推荐
|
|
|
filtered_result_6 = filter_video_status_with_applet_rec(video_ids=video_ids, applet_rec_status=-6)
|
|
|
filtered_df_6 = df[df['videoid'].isin(filtered_result_6)]
|
|
|
- filtered_df_6 = filtered_df_6.drop_duplicates(['videoid'], keep=False)
|
|
|
+ filtered_df_6 = filtered_df_6.sort_values(by=['站外播放量'], ascending=False)
|
|
|
log_.info(f'filtered_df_6 count = {len(filtered_df_6)}')
|
|
|
|
|
|
# 获取普通推荐
|
|
|
filtered_result_1 = filter_video_status_with_applet_rec(video_ids=video_ids, applet_rec_status=1)
|
|
|
filtered_df_1 = df[df['videoid'].isin(filtered_result_1)]
|
|
|
- filtered_df_1 = filtered_df_1.drop_duplicates(['videoid'], keep=False)
|
|
|
+ filtered_df_1 = filtered_df_1.sort_values(by=['站外播放量'], ascending=False)
|
|
|
log_.info(f'filtered_df_1 count = {len(filtered_df_1)}')
|
|
|
|
|
|
- log_.info(f'df length = {len(df)}')
|
|
|
+ # 排序合并
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
# 获取符合进入召回源条件的视频,进入条件:小时级回流>=20 && score>=0.005
|
|
|
h_recall_df = df[(df['lastonehour_return'] >= return_count) & (df['score'] >= 0.005)]
|
|
|
h_recall_videos = h_recall_df['videoid'].to_list()
|