Explorar el Código

merge reset-20221207

liqian hace 2 años
padre
commit
d1af9d07c4
Se han modificado 2 ficheros con 37 adiciones y 19 borrados
  1. 25 9
      config.py
  2. 12 10
      region_rule_rank_h.py

+ 25 - 9
config.py

@@ -324,7 +324,17 @@ class BaseConfig(object):
 
             # 地域小时级列表中增加 前6小时 地域小时级的优质视频
             'rule15': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
-                       'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3', 'add_videos_with_pre6h': True},
+                       'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3',
+                       'add_videos_with_pre_h': True, 'hour_count': 6},
+            # 地域小时级列表中增加 前3小时 地域小时级的优质视频
+            'rule16': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                       'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3',
+                       'add_videos_with_pre_h': True, 'hour_count': 3},
+            # 地域小时级列表中增加 前47小时 地域小时级的优质视频
+            'rule17': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                       'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3',
+                       'add_videos_with_pre_h': True, 'hour_count': 47},
+
         },
         'data_params': DATA_PARAMS,
         'params_list': [
@@ -350,6 +360,8 @@ class BaseConfig(object):
             # {'data': 'data1', 'rule': 'rule13'},  # 161
             # {'data': 'data1', 'rule': 'rule14'},  # 162
             {'data': 'data1', 'rule': 'rule15'},  # 200 vlog
+            {'data': 'data1', 'rule': 'rule16'},  # 214 vlog
+            {'data': 'data1', 'rule': 'rule17'},  # 215 vlog
         ],
     }
 
@@ -701,6 +713,7 @@ class BaseConfig(object):
             {'data': 'data1', 'rule': 'rule2'},
             {'data': 'data4', 'rule': 'rule1'},
             {'data': 'data1', 'rule': 'rule3'},
+            {'data': 'data4', 'rule': 'rule3'},
         ]
     }
 
@@ -709,19 +722,19 @@ class BaseConfig(object):
         # 票圈vlog
         '173-a': {'video': {'data': 'data1'},
                   'user': {'data': 'data1', 'rule': 'rule1'},
-                  'threshold': {'group': 7 / 12, 'mean_group': 7 / 12}},
+                  'threshold': {'group': 11 / 24, 'mean_group': 11 / 24}},
         '173-b': {'video': {'data': 'data1'},
                   'user': {'data': 'data1', 'rule': 'rule2'},
-                  'threshold': {'group': 7 / 12, 'mean_group': 7 / 12}},
+                  'threshold': {'group': 11 / 24, 'mean_group': 11 / 24}},
         '173-c': {'video': {'data': 'data1'},
                   'user': {'data': 'data1', 'rule': 'rule3'},
-                  'threshold': {'group': 7 / 12, 'mean_group': 7 / 12}},
+                  'threshold': {'group': 11 / 24, 'mean_group': 11 / 24}},
         # 票圈视频+
         '190-a': {'video': {'data': 'data1'},
                   'user': {'data': 'data1', 'rule': 'rule1'},
-                  'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
+                  'threshold': {'group': 13 / 24, 'mean_group': 13 / 24}},
         '190-b': {'video': {'data': 'data1'},
-                  'user': {'data': 'data1', 'rule': 'rule1'},
+                  'user': {'data': 'data1', 'rule': 'rule2'},
                   'threshold': {'group': 13 / 24, 'mean_group': 13 / 24}},
         # 票圈视频
         '194-a': {'video': {'data': 'data1'},
@@ -736,14 +749,17 @@ class BaseConfig(object):
                   'threshold': {'group': 11 / 24, 'mean_group': 11 / 24}},
         '195-b': {'video': {'data': 'data1'},
                   'user': {'data': 'data1', 'rule': 'rule2'},
-                  'threshold': {'group': 11 / 24, 'mean_group': 11 / 24}},
+                  'threshold': {'group': 23 / 48, 'mean_group': 23 / 48}},
         # 票圈短视频
         '196-a': {'video': {'data': 'data1'},
                   'user': {'data': 'data1', 'rule': 'rule1'},
                   'threshold': {'group': 1 / 2, 'mean_group': 1 / 2}},
         '196-b': {'video': {'data': 'data4'},
                   'user': {'data': 'data4', 'rule': 'rule1'},
-                  'threshold': {'group': 1 / 2, 'mean_group': 1 / 2}},
+                  'threshold': {'group': 23 / 48, 'mean_group': 23 / 48}},
+        '196-c': {'video': {'data': 'data4'},
+                  'user': {'data': 'data4', 'rule': 'rule3'},
+                  'threshold': {'group': 23 / 48, 'mean_group': 23 / 48}},
         # 老好看视频
         '197-a': {'video': {'data': 'data1'},
                   'user': {'data': 'data1', 'rule': 'rule1'},
@@ -757,7 +773,7 @@ class BaseConfig(object):
                   'threshold': {'group': 49 / 96, 'mean_group': 49 / 96}},
         '198-b': {'video': {'data': 'data1'},
                   'user': {'data': 'data1', 'rule': 'rule1'},
-                  'threshold': {'group': 5 / 18, 'mean_group': 5 / 18}},
+                  'threshold': {'group': 49 / 96, 'mean_group': 49 / 96}},
     }
 
     # 用户组有广告时的分享率预测结果存放 redis key 前缀,完整格式:ad:users:group:predict:share:rate:{user_data_key}:{user_rule_key}:{date}

+ 12 - 10
region_rule_rank_h.py

@@ -206,7 +206,8 @@ def add_videos(initial_df, now_date, rule_key, region, data_key, hour_count, top
     return df
 
 
-def video_rank(df, now_date, now_h, rule_key, param, region, data_key, rule_rank_h_flag, add_videos_with_pre6h=False):
+def video_rank(df, now_date, now_h, rule_key, param, region, data_key, rule_rank_h_flag,
+               add_videos_with_pre_h=False, hour_count=0):
     """
     获取符合进入召回源条件的视频,与每日更新的rov模型结果视频列表进行合并
     :param df:
@@ -232,9 +233,9 @@ def video_rank(df, now_date, now_h, rule_key, param, region, data_key, rule_rank
     h_recall_df['videoid'] = h_recall_df['videoid'].astype(int)
 
     # 增加打捞的优质视频
-    if add_videos_with_pre6h is True:
+    if add_videos_with_pre_h is True:
         h_recall_df = add_videos(initial_df=h_recall_df, now_date=now_date, rule_key=rule_key,
-                                 region=region, data_key=data_key, hour_count=6, top=10)
+                                 region=region, data_key=data_key, hour_count=hour_count, top=10)
 
     h_recall_videos = h_recall_df['videoid'].to_list()
     # log_.info(f'h_recall videos count = {len(h_recall_videos)}')
@@ -421,7 +422,7 @@ def merge_df_with_score(df_left, df_right):
 
 
 def process_with_region(region, df_merged, data_key, rule_key, rule_param, now_date, now_h,
-                        rule_rank_h_flag, add_videos_with_pre6h):
+                        rule_rank_h_flag, add_videos_with_pre_h, hour_count):
     log_.info(f"region = {region} start...")
     # 计算score
     region_df = df_merged[df_merged['code'] == region]
@@ -429,18 +430,18 @@ def process_with_region(region, df_merged, data_key, rule_key, rule_param, now_d
     score_df = cal_score(df=region_df, param=rule_param)
     video_rank(df=score_df, now_date=now_date, now_h=now_h, rule_key=rule_key, param=rule_param,
                region=region, data_key=data_key, rule_rank_h_flag=rule_rank_h_flag,
-               add_videos_with_pre6h=add_videos_with_pre6h)
+               add_videos_with_pre_h=add_videos_with_pre_h, hour_count=hour_count)
     log_.info(f"region = {region} end!")
 
 
 def process_with_region2(region, df_merged, data_key, rule_key, rule_param, now_date, now_h,
-                         rule_rank_h_flag, add_videos_with_pre6h):
+                         rule_rank_h_flag, add_videos_with_pre_h, hour_count):
     log_.info(f"region = {region} start...")
     region_score_df = df_merged[df_merged['code'] == region]
     log_.info(f'region = {region}, region_score_df count = {len(region_score_df)}')
     video_rank(df=region_score_df, now_date=now_date, now_h=now_h, region=region,
                rule_key=rule_key, param=rule_param, data_key=data_key, rule_rank_h_flag=rule_rank_h_flag,
-               add_videos_with_pre6h=add_videos_with_pre6h)
+               add_videos_with_pre_h=add_videos_with_pre_h, hour_count=hour_count)
     log_.info(f"region = {region} end!")
 
 
@@ -537,7 +538,8 @@ def process_with_param(param, data_params_item, rule_params_item, region_code_li
     log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
     merge_func = rule_param.get('merge_func', None)
     # 是否在地域小时级数据中增加打捞的优质视频
-    add_videos_with_pre6h = rule_param.get('add_videos_with_pre6h', False)
+    add_videos_with_pre_h = rule_param.get('add_videos_with_pre_h', False)
+    hour_count = rule_param.get('hour_count', 0)
 
     if merge_func == 2:
         score_df_list = []
@@ -554,7 +556,7 @@ def process_with_param(param, data_params_item, rule_params_item, region_code_li
         task_list = [
             gevent.spawn(process_with_region2,
                          region, df_merged, data_key, rule_key, rule_param, now_date, now_h, rule_rank_h_flag,
-                         add_videos_with_pre6h)
+                         add_videos_with_pre_h, hour_count)
             for region in region_code_list
         ]
     else:
@@ -563,7 +565,7 @@ def process_with_param(param, data_params_item, rule_params_item, region_code_li
         task_list = [
             gevent.spawn(process_with_region,
                          region, df_merged, data_key, rule_key, rule_param, now_date, now_h, rule_rank_h_flag,
-                         add_videos_with_pre6h)
+                         add_videos_with_pre_h, hour_count)
             for region in region_code_list
         ]