|
@@ -110,26 +110,24 @@ def video_rank_h(df, now_date, rule_key, param, data_key):
|
|
|
df = df.drop_duplicates(subset=['videoid'], keep='first')
|
|
|
df['videoid'] = df['videoid'].astype(int)
|
|
|
|
|
|
- # 获取符合进入召回源条件的视频
|
|
|
- return_count = param.get('return_count')
|
|
|
- if return_count:
|
|
|
- day_recall_df = df[df['回流人数'] > return_count]
|
|
|
- else:
|
|
|
- day_recall_df = df
|
|
|
- platform_return_rate = param.get('platform_return_rate', 0)
|
|
|
- day_recall_df = day_recall_df[day_recall_df['platform_return_rate'] > platform_return_rate]
|
|
|
- day_recall_videos = day_recall_df['videoid'].to_list()
|
|
|
+ day_recall_videos = df['videoid'].to_list()
|
|
|
log_.info(f'day_by30day_recall videos count = {len(day_recall_videos)}')
|
|
|
|
|
|
# 视频状态过滤
|
|
|
filtered_videos = filter_video_status(day_recall_videos)
|
|
|
log_.info('filtered_videos count = {}'.format(len(filtered_videos)))
|
|
|
|
|
|
+ # 获取top视频
|
|
|
+ top = param.get('top')
|
|
|
+ day_recall_df = df[df['videoid'].isin(filtered_videos)]
|
|
|
+ day_recall_df = day_recall_df.sort_values(by=['score'], ascending=False)
|
|
|
+ day_recall_df = day_recall_df[:top]
|
|
|
+
|
|
|
# 写入对应的redis
|
|
|
now_dt = datetime.strftime(now_date, '%Y%m%d')
|
|
|
day_video_ids = []
|
|
|
day_recall_result = {}
|
|
|
- for video_id in filtered_videos:
|
|
|
+ for video_id in day_recall_df['videoid'].to_list():
|
|
|
score = day_recall_df[day_recall_df['videoid'] == video_id]['score']
|
|
|
day_recall_result[int(video_id)] = float(score)
|
|
|
day_video_ids.append(int(video_id))
|
|
@@ -243,7 +241,7 @@ def timer_check():
|
|
|
table = config_.TABLE_30DAY_APP_TYPE
|
|
|
rule_params = config_.RULE_PARAMS_30DAY_APP_TYPE
|
|
|
now_date = datetime.today()
|
|
|
- log_.info(f"now_date: {datetime.strftime(now_date, '%Y%m%d%H')}")
|
|
|
+ log_.info(f"now_date: {datetime.strftime(now_date, '%Y%m%d')}")
|
|
|
now_h = datetime.now().hour
|
|
|
# 查看当前天级更新的数据是否已准备好
|
|
|
data_count = data_check(project=project, table=table, now_date=now_date)
|
|
@@ -251,7 +249,7 @@ def timer_check():
|
|
|
log_.info(f'day_by30day_data_count = {data_count}')
|
|
|
# 数据准备好,进行更新
|
|
|
rank(now_date=now_date, rule_params=rule_params, project=project, table=table)
|
|
|
- elif now_h > 22:
|
|
|
+ elif now_h > 2:
|
|
|
log_.info('day_by30day_recall data is None!')
|
|
|
rank_bottom(now_date=now_date, rule_params=rule_params)
|
|
|
else:
|