Pārlūkot izejas kodu

add abtest config: 316

liqian 2 gadi atpakaļ
vecāks
revīzija
ba63e2b92b
2 mainītis faili ar 29 papildinājumiem un 16 dzēšanām
  1. 4 1
      config.py
  2. 25 15
      region_rule_rank_h.py

+ 4 - 1
config.py

@@ -391,7 +391,9 @@ class BaseConfig(object):
             'rule18': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
                        'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3',
                        'add_videos_with_pre_h': True, 'hour_count': 3, 'add_func': 'func2'},
-
+            # 其余表与地域小时级表,不做去重,召回在线去重
+            'rule19': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                       'region_24h_rule_key': 'rule4', '24h_rule_key': 'rule4', 'merge_func': 2, 'dup_remove': False},
         },
         'data_params': DATA_PARAMS,
         'params_list': [
@@ -421,6 +423,7 @@ class BaseConfig(object):
             # {'data': 'data1', 'rule': 'rule16'},  # 214 vlog
             # {'data': 'data1', 'rule': 'rule17'},  # 215 vlog
             # {'data': 'data1', 'rule': 'rule18'},  # 224 vlog
+            {'data': 'data10', 'rule': 'rule19'},  # 316 票圈视频 + 召回在线去重
         ],
     }
 

+ 25 - 15
region_rule_rank_h.py

@@ -321,14 +321,16 @@ def video_rank(df, now_date, now_h, rule_key, param, region, data_key, rule_rank
     region_24h_rule_key = param.get('region_24h_rule_key', 'rule1')
     by_24h_rule_key = param.get('24h_rule_key', None)
     by_48h_rule_key = param.get('48h_rule_key', None)
+    dup_remove = param.get('dup_remove', True)
     # 与其他召回视频池去重,存入对应的redis
     dup_to_redis(h_video_ids=h_video_ids, now_date=now_date, now_h=now_h, rule_key=rule_key,
-                 region_24h_rule_key=region_24h_rule_key, by_24h_rule_key=by_24h_rule_key, by_48h_rule_key=by_48h_rule_key,
-                 region=region, data_key=data_key, rule_rank_h_flag=rule_rank_h_flag, political_filter=political_filter,
-                 shield_config=shield_config)
+                 region_24h_rule_key=region_24h_rule_key, by_24h_rule_key=by_24h_rule_key,
+                 by_48h_rule_key=by_48h_rule_key, region=region, data_key=data_key,
+                 rule_rank_h_flag=rule_rank_h_flag, political_filter=political_filter,
+                 shield_config=shield_config, dup_remove=dup_remove)
 
 
-def dup_data(h_video_ids, initial_key_name, dup_key_name, region, political_filter, shield_config):
+def dup_data(h_video_ids, initial_key_name, dup_key_name, region, political_filter, shield_config, dup_remove):
     redis_helper = RedisHelper()
     if redis_helper.key_exists(key_name=initial_key_name):
         initial_data = redis_helper.get_all_data_from_zset(key_name=initial_key_name, with_scores=True)
@@ -343,10 +345,16 @@ def dup_data(h_video_ids, initial_key_name, dup_key_name, region, political_filt
             initial_video_ids = filter_political_videos(video_ids=initial_video_ids)
 
         dup_data = {}
-        for video_id, score in initial_data:
-            if int(video_id) not in h_video_ids and int(video_id) in initial_video_ids:
-                dup_data[int(video_id)] = score
-                h_video_ids.append(int(video_id))
+        # 视频去重逻辑
+        if dup_remove is True:
+            for video_id, score in initial_data:
+                if int(video_id) not in h_video_ids and int(video_id) in initial_video_ids:
+                    dup_data[int(video_id)] = score
+                    h_video_ids.append(int(video_id))
+        else:
+            for video_id, score in initial_data:
+                if int(video_id) in initial_video_ids:
+                    dup_data[int(video_id)] = score
 
         if len(dup_data) > 0:
             redis_helper.add_data_with_zset(key_name=dup_key_name, data=dup_data, expire_time=2 * 24 * 3600)
@@ -356,7 +364,7 @@ def dup_data(h_video_ids, initial_key_name, dup_key_name, region, political_filt
 
 
 def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by_24h_rule_key, by_48h_rule_key,
-                 region, data_key, rule_rank_h_flag, political_filter, shield_config):
+                 region, data_key, rule_rank_h_flag, political_filter, shield_config, dup_remove):
     """将地域分组小时级数据与其他召回视频池去重,存入对应的redis"""
     # ##### 去重更新地域分组小时级24h列表,并另存为redis中
     region_24h_key_name = \
@@ -367,7 +375,7 @@ def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by
         f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
     h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=region_24h_key_name,
                            dup_key_name=region_24h_dup_key_name, region=region, political_filter=political_filter,
-                           shield_config=shield_config)
+                           shield_config=shield_config, dup_remove=dup_remove)
 
     if rule_rank_h_flag == '48h':
 
@@ -379,7 +387,7 @@ def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by
             f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
         h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_48h_key_name,
                                dup_key_name=h_48h_dup_key_name, region=region, political_filter=political_filter,
-                               shield_config=shield_config)
+                               shield_config=shield_config, dup_remove=dup_remove)
 
         # ##### 去重小程序相对48h 筛选后剩余数据 更新结果,并另存为redis中
         if by_48h_rule_key == 'rule1':
@@ -390,7 +398,8 @@ def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by
                 f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
             h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=other_h_48h_key_name,
                                    dup_key_name=other_h_48h_dup_key_name, region=region,
-                                   political_filter=political_filter, shield_config=shield_config)
+                                   political_filter=political_filter, shield_config=shield_config,
+                                   dup_remove=dup_remove)
 
     else:
         # ##### 去重小程序相对24h更新结果,并另存为redis中
@@ -401,7 +410,7 @@ def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by
             f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
         h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_24h_key_name,
                                dup_key_name=h_24h_dup_key_name, region=region, political_filter=political_filter,
-                               shield_config=shield_config)
+                               shield_config=shield_config, dup_remove=dup_remove)
 
         # ##### 去重小程序相对24h 筛选后剩余数据 更新结果,并另存为redis中
         # if by_24h_rule_key in ['rule3', 'rule4']:
@@ -412,7 +421,7 @@ def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by
             f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
         h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=other_h_24h_key_name,
                                dup_key_name=other_h_24h_dup_key_name, region=region, political_filter=political_filter,
-                               shield_config=shield_config)
+                               shield_config=shield_config, dup_remove=dup_remove)
 
     # ##### 去重小程序模型更新结果,并另存为redis中
     # model_key_name = get_rov_redis_key(now_date=now_date)
@@ -729,6 +738,7 @@ def h_bottom_process(param, rule_params_item, region_code_list, key_prefix, redi
     political_filter = param.get('political_filter', None)
     # 屏蔽视频过滤
     shield_config = param.get('shield_config', config_.SHIELD_CONFIG)
+    dup_remove = param.get('dup_remove', True)
     for region in region_code_list:
         log_.info(f"region = {region}")
         key_name = f"{key_prefix}{region}:{data_key}:{rule_key}:{redis_dt}:{redis_h}"
@@ -750,7 +760,7 @@ def h_bottom_process(param, rule_params_item, region_code_list, key_prefix, redi
                      region_24h_rule_key=region_24h_rule_key, region=region,
                      data_key=data_key, by_24h_rule_key=by_24h_rule_key,
                      by_48h_rule_key=by_48h_rule_key, rule_rank_h_flag=rule_rank_h_flag,
-                     political_filter=political_filter, shield_config=shield_config)
+                     political_filter=political_filter, shield_config=shield_config, dup_remove=dup_remove)
     # 特殊城市视频数据准备
     for region, city_list in config_.REGION_CITY_MAPPING.items():
         t = [