|
@@ -166,7 +166,41 @@ def cal_score(df, param):
|
|
|
return df
|
|
|
|
|
|
|
|
|
-def add_videos(initial_df, now_date, rule_key, region, data_key, hour_count, top):
|
|
|
+def add_func1(initial_df, pre_h_df):
|
|
|
+ """当前小时级数据与前几个小时数据合并"""
|
|
|
+ score_list = initial_df['score'].to_list()
|
|
|
+ if len(score_list) > 0:
|
|
|
+ min_score = min(score_list)
|
|
|
+ else:
|
|
|
+ min_score = 0
|
|
|
+ pre_h_df = pre_h_df[pre_h_df['score'] > min_score]
|
|
|
+ df = pd.concat([initial_df, pre_h_df], ignore_index=True)
|
|
|
+ # videoid去重,保留分值高
|
|
|
+ df['videoid'] = df['videoid'].astype(int)
|
|
|
+ df = df.sort_values(by=['score'], ascending=False)
|
|
|
+ df = df.drop_duplicates(subset=['videoid'], keep="first")
|
|
|
+ return df
|
|
|
+
|
|
|
+
|
|
|
+def add_func2(initial_df, pre_h_df):
|
|
|
+ """当前小时级数据与前几个小时数据合并"""
|
|
|
+ score_list = initial_df['score'].to_list()
|
|
|
+ if len(score_list) > 0:
|
|
|
+ min_score = min(score_list)
|
|
|
+ else:
|
|
|
+ min_score = 0
|
|
|
+ initial_video_id_list = initial_df['videoid'].to_list()
|
|
|
+ pre_h_df = pre_h_df[pre_h_df['score'] > min_score]
|
|
|
+ pre_h_df = pre_h_df[~pre_h_df['videoid'].isin(initial_video_id_list)]
|
|
|
+ df = pd.concat([initial_df, pre_h_df], ignore_index=True)
|
|
|
+ # videoid去重,保留分值高
|
|
|
+ df['videoid'] = df['videoid'].astype(int)
|
|
|
+ df = df.sort_values(by=['score'], ascending=False)
|
|
|
+ df = df.drop_duplicates(subset=['videoid'], keep="first")
|
|
|
+ return df
|
|
|
+
|
|
|
+
|
|
|
+def add_videos(initial_df, now_date, rule_key, region, data_key, hour_count, top, add_func):
|
|
|
"""
|
|
|
地域小时级数据列表中增加前6h优质视频
|
|
|
:param initial_df: 地域小时级筛选结果
|
|
@@ -192,17 +226,10 @@ def add_videos(initial_df, now_date, rule_key, region, data_key, hour_count, top
|
|
|
continue
|
|
|
pre_h_data.extend(pre_h_top_data)
|
|
|
pre_h_df = pd.DataFrame(data=pre_h_data, columns=['videoid', 'score'])
|
|
|
- score_list = initial_df['score'].to_list()
|
|
|
- if len(score_list) > 0:
|
|
|
- min_score = min(score_list)
|
|
|
+ if add_func == 'func2':
|
|
|
+ df = add_func2(initial_df=initial_df, pre_h_df=pre_h_df)
|
|
|
else:
|
|
|
- min_score = 0
|
|
|
- pre_h_df = pre_h_df[pre_h_df['score'] > min_score]
|
|
|
- df = pd.concat([initial_df, pre_h_df], ignore_index=True)
|
|
|
- # videoid去重,保留分值高
|
|
|
- df['videoid'] = df['videoid'].astype(int)
|
|
|
- df = df.sort_values(by=['score'], ascending=False)
|
|
|
- df = df.drop_duplicates(subset=['videoid'], keep="first")
|
|
|
+ df = add_func1(initial_df=initial_df, pre_h_df=pre_h_df)
|
|
|
return df
|
|
|
|
|
|
|
|
@@ -234,8 +261,9 @@ def video_rank(df, now_date, now_h, rule_key, param, region, data_key, rule_rank
|
|
|
|
|
|
# 增加打捞的优质视频
|
|
|
if add_videos_with_pre_h is True:
|
|
|
+ add_func = param.get('add_func', None)
|
|
|
h_recall_df = add_videos(initial_df=h_recall_df, now_date=now_date, rule_key=rule_key,
|
|
|
- region=region, data_key=data_key, hour_count=hour_count, top=10)
|
|
|
+ region=region, data_key=data_key, hour_count=hour_count, top=10, add_func=add_func)
|
|
|
|
|
|
h_recall_videos = h_recall_df['videoid'].to_list()
|
|
|
# log_.info(f'h_recall videos count = {len(h_recall_videos)}')
|