Browse Source

add RULE_PARAMS_REGION_APP_TYPE rule18

liqian 2 years ago
parent
commit
17a92cd380
2 changed files with 44 additions and 12 deletions
  1. 4 0
      config.py
  2. 40 12
      region_rule_rank_h.py

+ 4 - 0
config.py

@@ -338,6 +338,10 @@ class BaseConfig(object):
             'rule17': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
                        'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3',
                        'add_videos_with_pre_h': True, 'hour_count': 47},
+            # 地域小时级列表中增加 前3小时 地域小时级的优质视频,排序优化1:半小时级列表中有的视频以本小时的分数为准
+            'rule18': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                       'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3',
+                       'add_videos_with_pre_h': True, 'hour_count': 3, 'add_func': 'func2'},
 
         },
         'data_params': DATA_PARAMS,

+ 40 - 12
region_rule_rank_h.py

@@ -166,7 +166,41 @@ def cal_score(df, param):
     return df
 
 
-def add_videos(initial_df, now_date, rule_key, region, data_key, hour_count, top):
+def add_func1(initial_df, pre_h_df):
+    """当前小时级数据与前几个小时数据合并"""
+    score_list = initial_df['score'].to_list()
+    if len(score_list) > 0:
+        min_score = min(score_list)
+    else:
+        min_score = 0
+    pre_h_df = pre_h_df[pre_h_df['score'] > min_score]
+    df = pd.concat([initial_df, pre_h_df], ignore_index=True)
+    # videoid去重,保留分值高
+    df['videoid'] = df['videoid'].astype(int)
+    df = df.sort_values(by=['score'], ascending=False)
+    df = df.drop_duplicates(subset=['videoid'], keep="first")
+    return df
+
+
+def add_func2(initial_df, pre_h_df):
+    """当前小时级数据与前几个小时数据合并"""
+    score_list = initial_df['score'].to_list()
+    if len(score_list) > 0:
+        min_score = min(score_list)
+    else:
+        min_score = 0
+    initial_video_id_list = initial_df['videoid'].to_list()
+    pre_h_df = pre_h_df[pre_h_df['score'] > min_score]
+    pre_h_df = pre_h_df[~pre_h_df['videoid'].isin(initial_video_id_list)]
+    df = pd.concat([initial_df, pre_h_df], ignore_index=True)
+    # videoid去重,保留分值高
+    df['videoid'] = df['videoid'].astype(int)
+    df = df.sort_values(by=['score'], ascending=False)
+    df = df.drop_duplicates(subset=['videoid'], keep="first")
+    return df
+
+
+def add_videos(initial_df, now_date, rule_key, region, data_key, hour_count, top, add_func):
     """
     地域小时级数据列表中增加前6h优质视频
     :param initial_df: 地域小时级筛选结果
@@ -192,17 +226,10 @@ def add_videos(initial_df, now_date, rule_key, region, data_key, hour_count, top
             continue
         pre_h_data.extend(pre_h_top_data)
     pre_h_df = pd.DataFrame(data=pre_h_data, columns=['videoid', 'score'])
-    score_list = initial_df['score'].to_list()
-    if len(score_list) > 0:
-        min_score = min(score_list)
+    if add_func == 'func2':
+        df = add_func2(initial_df=initial_df, pre_h_df=pre_h_df)
     else:
-        min_score = 0
-    pre_h_df = pre_h_df[pre_h_df['score'] > min_score]
-    df = pd.concat([initial_df, pre_h_df], ignore_index=True)
-    # videoid去重,保留分值高
-    df['videoid'] = df['videoid'].astype(int)
-    df = df.sort_values(by=['score'], ascending=False)
-    df = df.drop_duplicates(subset=['videoid'], keep="first")
+        df = add_func1(initial_df=initial_df, pre_h_df=pre_h_df)
     return df
 
 
@@ -234,8 +261,9 @@ def video_rank(df, now_date, now_h, rule_key, param, region, data_key, rule_rank
 
     # 增加打捞的优质视频
     if add_videos_with_pre_h is True:
+        add_func = param.get('add_func', None)
         h_recall_df = add_videos(initial_df=h_recall_df, now_date=now_date, rule_key=rule_key,
-                                 region=region, data_key=data_key, hour_count=hour_count, top=10)
+                                 region=region, data_key=data_key, hour_count=hour_count, top=10, add_func=add_func)
 
     h_recall_videos = h_recall_df['videoid'].to_list()
     # log_.info(f'h_recall videos count = {len(h_recall_videos)}')