|
@@ -5,6 +5,7 @@ from threading import Timer
|
|
|
from datetime import datetime, timedelta
|
|
|
from get_data import get_data_from_odps
|
|
|
from db_helper import RedisHelper
|
|
|
+from utils import filter_video_status
|
|
|
from config import set_config
|
|
|
from log import Log
|
|
|
|
|
@@ -114,14 +115,22 @@ def video_rank_h(df, now_date, now_h, rule_key, param):
|
|
|
day_recall_df = df[df['回流人数'] > return_count]
|
|
|
else:
|
|
|
day_recall_df = df
|
|
|
+ # videoid重复时,保留分值高
|
|
|
+ day_recall_df = day_recall_df.sort_values(by=['score'], ascending=False)
|
|
|
+ day_recall_df = day_recall_df.drop_duplicates(subset=['videoid'], keep='first')
|
|
|
+ day_recall_df['videoid'] = day_recall_df['videoid'].astype(int)
|
|
|
day_recall_videos = day_recall_df['videoid'].to_list()
|
|
|
log_.info(f'h_by24h_recall videos count = {len(day_recall_videos)}')
|
|
|
|
|
|
+ # 视频状态过滤
|
|
|
+ filtered_videos = filter_video_status(day_recall_videos)
|
|
|
+ log_.info('filtered_videos count = {}'.format(len(filtered_videos)))
|
|
|
+
|
|
|
# 写入对应的redis
|
|
|
now_dt = datetime.strftime(now_date, '%Y%m%d')
|
|
|
day_video_ids = []
|
|
|
day_recall_result = {}
|
|
|
- for video_id in day_recall_videos:
|
|
|
+ for video_id in filtered_videos:
|
|
|
score = day_recall_df[day_recall_df['videoid'] == video_id]['score']
|
|
|
day_recall_result[int(video_id)] = float(score)
|
|
|
day_video_ids.append(int(video_id))
|