Browse Source

add region h ab test

liqian 3 years ago
parent
commit
c5b3dd08d3
4 changed files with 73 additions and 20 deletions
  1. 21 0
      config.py
  2. 8 1
      recommend.py
  3. 9 4
      utils.py
  4. 35 15
      video_recall.py

+ 21 - 0
config.py

@@ -31,6 +31,7 @@ class BaseConfig(object):
         'day_rule_rank2': '030',
         'ab_initial': '031',
         'old_video': '034',
+        'region_rule_rank1': '037',
     }
 
     # abTest
@@ -55,12 +56,18 @@ class BaseConfig(object):
             'rule_rank4': 20004,
             'rule_rank5': 20005,
         },  # 小时级别更新rov列表实验
+
         'rank_by_day': {
             'day_rule_rank1': 40001,
             'day_rule_rank2': 40002,
         },  # 天级别规则更新rov列表实验
+
         'ab_initial': 20000,  # ab实验相对实验组(无人工调整)
         'old_video': 50001,  # 固定位置插入老视频
+
+        'region_rank_by_h': {
+            'region_rule_rank1': 60001,
+        },  # 地域分组小时级规则实验
     }
 
     # 小程序小时级列表key不同实验标识
@@ -80,6 +87,12 @@ class BaseConfig(object):
         'day_rule_rank2': 'rule2',
     }
 
+    # 小程序地域分组小时级列表key不同实验标识
+    RULE_KEY_REGION = {
+        'initial': '',
+        'region_rule_rank1': 'rule1',
+    }
+
     # pushFrom
     PUSH_FROM = {
         'rov_recall': 'recall_pool',  # rov召回池
@@ -132,6 +145,14 @@ class BaseConfig(object):
     # 完整格式:com.weiqu.video.recall.hot.item.score.dup.day.pre.{rule_key}.{date}
     RECALL_KEY_NAME_PREFIX_DUP_DAY_PRE = 'com.weiqu.video.recall.hot.item.score.dup.day.pre.'
 
+    # 小程序地域分组小时级更新结果存放 redis key前缀,完整格式:com.weiqu.video.recall.item.score.region.h.{region}.{rule_key}.{date}.{h}
+    RECALL_KEY_NAME_PREFIX_REGION_BY_H = 'com.weiqu.video.recall.item.score.region.h.'
+    # 小程序离线ROV模型结果与小程序地域分组小时级更新结果去重后 存放 redis key前缀,
+    # 完整格式:com.weiqu.video.recall.hot.item.score.dup.region.h.{region}.{rule_key}.{date}.{h}
+    RECALL_KEY_NAME_PREFIX_DUP_REGION_H = 'com.weiqu.video.recall.hot.item.score.dup.h.'
+    # 地域分组小时级视频状态不符合推荐要求的列表 redis key,完整格式:com.weiqu.video.filter.region.h.item.{region}.{rule_key}
+    REGION_H_VIDEO_FILER = 'com.weiqu.video.filter.region.h.item.'
+
     # app应用 小程序离线ROV模型结果存放 redis key前缀,完整格式:com.weiqu.video.recall.hot.item.score.app.{date}
     RECALL_KEY_NAME_PREFIX_APP = 'com.weiqu.video.recall.hot.item.score.app.'
 

+ 8 - 1
recommend.py

@@ -174,7 +174,8 @@ def video_recommend(mid, uid, size, top_K, flow_pool_P, app_type, algo_type, cli
                              client_info=client_info, rule_key=rule_key, no_op_flag=no_op_flag)
     _, last_rov_recall_key, _ = pool_recall.get_video_last_idx()
     # 小时级实验
-    if ab_code in [code for _, code in config_.AB_CODE['rank_by_h'].items()]:
+    if ab_code in [code for _, code in config_.AB_CODE['rank_by_h'].items()] + \
+            [code for _, code in config_.AB_CODE['region_rank_by_h'].items()]:
         t = [gevent.spawn(pool_recall.rov_pool_recall_by_h, size, expire_time),
              gevent.spawn(pool_recall.flow_pool_recall, size)]
     # 最惊奇/老好看实验
@@ -445,6 +446,12 @@ def get_recommend_params(ab_exp_info):
             rule_key = config_.RULE_KEY_DAY['day_rule_rank2']
             no_op_flag = True
 
+        elif config_.AB_EXP_CODE['region_rule_rank1'] in ab_exp_code_list:
+            ab_code = config_.AB_CODE['region_rank_by_h'].get('region_rule_rank1')
+            expire_time = 3600
+            rule_key = config_.RULE_KEY_REGION['region_rule_rank1']
+            no_op_flag = True
+
         else:
             ab_code = config_.AB_CODE['initial']
             expire_time = 24 * 3600

+ 9 - 4
utils.py

@@ -148,19 +148,23 @@ class FilterVideos(object):
         self.uid = uid
         self.video_ids = video_ids
 
-    def filter_video_status_h(self, video_ids, return_count):
+    def filter_video_status_h(self, video_ids, rule_key, ab_code, province_code):
         """召回小时级更新的视频状态过滤"""
         # 根据Redis缓存中的数据过滤
         redis_helper = RedisHelper()
         # 获取不符合推荐状态的视频
-        filter_videos_list = redis_helper.get_data_from_set(key_name=f"{config_.H_VIDEO_FILER}{return_count}")
+        if ab_code in [code for _, code in config_.AB_CODE['region_rank_by_h'].items()]:
+            key_prefix = f"{config_.REGION_H_VIDEO_FILER}{province_code}."
+        else:
+            key_prefix = config_.H_VIDEO_FILER
+        filter_videos_list = redis_helper.get_data_from_set(key_name=f"{key_prefix}{rule_key}")
         if not filter_videos_list:
             return video_ids
         filter_videos = [int(video) for video in filter_videos_list]
         filtered_videos = [video_id for video_id in video_ids if video_id not in filter_videos]
         return filtered_videos
 
-    def filter_videos_h(self, return_count):
+    def filter_videos_h(self, rule_key, ab_code, province_code):
         """召回小时级更新的视频过滤"""
         # 预曝光过滤
         st_pre = time.time()
@@ -173,7 +177,8 @@ class FilterVideos(object):
 
         # 视频状态过滤
         st_status = time.time()
-        filtered_status_result = self.filter_video_status_h(video_ids=filtered_pre_result, return_count=return_count)
+        filtered_status_result = self.filter_video_status_h(video_ids=filtered_pre_result, rule_key=rule_key,
+                                                            ab_code=ab_code, province_code=province_code)
         et_status = time.time()
         log_.info('filter by video status: result = {}, execute time = {}ms'.format(
             filtered_status_result, (et_status - st_status) * 1000))

+ 35 - 15
video_recall.py

@@ -15,7 +15,8 @@ config_ = set_config()
 
 class PoolRecall(object):
     """召回"""
-    def __init__(self, app_type, client_info=None, mid='', uid='', ab_code='', rule_key='', no_op_flag=False):
+    def __init__(self, app_type, client_info=None, mid='', uid='', ab_code='',
+                 rule_key='', no_op_flag=False):
         """
         初始化
         :param app_type: 产品标识 type-int
@@ -48,7 +49,7 @@ class PoolRecall(object):
         else:
             return False
 
-    def update_mid_data(self):
+    def update_mid_data(self, province_code):
         # mid对应小时级视频列表 redis-key
         h_recall_mid_key = f"{config_.H_WITH_MID_RECALL_KEY_NAME_PREFIX}{self.mid}"
         # 判断mid对应小时级视频列表 时间记录
@@ -57,7 +58,11 @@ class PoolRecall(object):
         now_date = datetime.today()
         h = datetime.now().hour
         now_dt = datetime.strftime(now_date, '%Y%m%d')
-        now_h_recall_key = f"{config_.RECALL_KEY_NAME_PREFIX_BY_H}{self.rule_key}.{now_dt}.{h}"
+        if self.ab_code in [code for _, code in config_.AB_CODE['region_rank_by_h'].items()]:
+            key_prefix = f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H}{province_code}."
+        else:
+            key_prefix = config_.RECALL_KEY_NAME_PREFIX_BY_H
+        now_h_recall_key = f"{key_prefix}{self.rule_key}.{now_dt}.{h}"
         if self.redis_helper.key_exists(key_name=now_h_recall_key):
             flag = self.copy_redis_zset_data(from_key_name=now_h_recall_key, to_key_name=h_recall_mid_key)
             if flag:
@@ -70,20 +75,20 @@ class PoolRecall(object):
             else:
                 redis_dt = now_dt
                 redis_h = h - 1
-            now_h_recall_key = f"{config_.RECALL_KEY_NAME_PREFIX_BY_H}{self.rule_key}.{redis_dt}.{redis_h}"
+            now_h_recall_key = f"{key_prefix}{self.rule_key}.{redis_dt}.{redis_h}"
             flag = self.copy_redis_zset_data(from_key_name=now_h_recall_key, to_key_name=h_recall_mid_key)
             if flag:
                 value = {'date': redis_dt, 'h': redis_h}
                 self.redis_helper.set_data_to_redis(key_name=h_record_key, value=str(value), expire_time=2*3600)
 
-    def get_mid_h_key(self):
+    def get_mid_h_key(self, province_code):
         # mid对应小时级视频列表 redis-key
         h_recall_mid_key = f"{config_.H_WITH_MID_RECALL_KEY_NAME_PREFIX}{self.mid}"
         # 判断mid对应小时级视频列表 时间记录
         h_record_key = f"{config_.H_WITH_MID_RECORD_KEY_NAME_PREFIX}{self.mid}"
         if not self.redis_helper.key_exists(key_name=h_record_key):
             # ###### 记录key不存在,copy列表,更新记录
-            self.update_mid_data()
+            self.update_mid_data(province_code=province_code)
             # return h_recall_mid_key
         else:
             # ###### 记录key存在,判断date, h
@@ -100,7 +105,11 @@ class PoolRecall(object):
                 # return h_recall_mid_key
             elif (record_dt == now_dt and h-int(record_h) == 1) or (h == 0 and int(record_h) == 23):
                 # 记录的h - 当前h = 1,判断当前h数据是否已更新
-                now_h_recall_key = f"{config_.RECALL_KEY_NAME_PREFIX_BY_H}{self.rule_key}.{now_dt}.{h}"
+                if self.ab_code in [code for _, code in config_.AB_CODE['region_rank_by_h'].items()]:
+                    key_prefix = f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H}{province_code}."
+                else:
+                    key_prefix = config_.RECALL_KEY_NAME_PREFIX_BY_H
+                now_h_recall_key = f"{key_prefix}{self.rule_key}.{now_dt}.{h}"
                 # if not self.redis_helper.key_exists(key_name=now_h_recall_key):
                     # 未更新
                     # return h_recall_mid_key
@@ -114,7 +123,7 @@ class PoolRecall(object):
                         self.redis_helper.set_data_to_redis(key_name=h_record_key, value=str(new_record), expire_time=2*3600)
                     # return h_recall_mid_key
             else:
-                self.update_mid_data()
+                self.update_mid_data(province_code=province_code)
                 # return h_recall_mid_key
 
         return h_recall_mid_key
@@ -126,8 +135,10 @@ class PoolRecall(object):
         :param expire_time: 末位视频记录redis过期时间
         :return:
         """
+        # 获取provinceCode
+        province_code = self.client_info.get('provinceCode')
         # 获取mid对应的小时级列表redis-key
-        h_recall_mid_key = self.get_mid_h_key()
+        h_recall_mid_key = self.get_mid_h_key(province_code=province_code)
         if not self.redis_helper.key_exists(h_recall_mid_key):
             recall_result = self.rov_pool_recall(size=size, expire_time=expire_time)
         else:
@@ -158,7 +169,7 @@ class PoolRecall(object):
                     video_score[video_id] = value[1]
                 # 过滤
                 filter_ = FilterVideos(app_type=self.app_type, mid=self.mid, uid=self.uid, video_ids=video_ids)
-                ge = gevent.spawn(filter_.filter_videos_h, self.rule_key)
+                ge = gevent.spawn(filter_.filter_videos_h, self.rule_key, self.ab_code, province_code)
                 ge.join()
                 filtered_result = ge.get()
 
@@ -562,15 +573,18 @@ class PoolRecall(object):
         """获取用户上一次在rov召回池对应的位置"""
         # if self.ab_code in [config_.AB_CODE['rank_by_h']] or self.app_type == config_.APP_TYPE['APP']:
         # abCode = 30001   # 老好看视频 / 票圈最惊奇 首页/相关推荐逻辑更新实验
-        if self.ab_code in [config_.AB_CODE['rank_by_h'],
-                            config_.AB_CODE['rov_rank_appType_18_19'],
-                            config_.AB_CODE['rov_rank_appType_19']] or \
+        if self.ab_code in [code for _, code in config_.AB_CODE['rank_by_h'].items()] + \
+                [code for _, code in config_.AB_CODE['region_rank_by_h'].items()] + \
+                [config_.AB_CODE['rov_rank_appType_18_19'], config_.AB_CODE['rov_rank_appType_19']] or \
                 self.app_type == config_.APP_TYPE['APP']:
             rov_pool_key, redis_date = self.get_pool_redis_key_with_h('rov')
+
         elif self.ab_code in [code for _, code in config_.AB_CODE['rank_by_day'].items()]:
             rov_pool_key, redis_date = self.get_pool_redis_key_with_day('dup')
+
         else:
             rov_pool_key, redis_date = self.get_pool_redis_key('rov')
+
         if not rov_pool_key:
             return None, None, None
         if self.ab_code in [code for _, code in config_.AB_CODE['rank_by_day'].items()]:
@@ -774,7 +788,13 @@ class PoolRecall(object):
 
             else:
                 # 判断热度列表是否更新,未更新则使用前一小时的热度列表
-                key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP_H}{self.rule_key}.{now_date}.{h}"
+                if self.ab_code in [code for _, code in config_.AB_CODE['region_rank_by_h'].items()]:
+                    # 获取provinceCode
+                    province_code = self.client_info.get('provinceCode')
+                    key_prefix = f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}{province_code}."
+                else:
+                    key_prefix = config_.RECALL_KEY_NAME_PREFIX_DUP_H
+                key_name = f"{key_prefix}{self.rule_key}.{now_date}.{h}"
                 if self.redis_helper.key_exists(key_name):
                     return key_name, h
                 else:
@@ -784,7 +804,7 @@ class PoolRecall(object):
                     else:
                         redis_h = h - 1
                         redis_date = now_date
-                    key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP_H}{self.rule_key}.{redis_date}.{redis_h}"
+                    key_name = f"{key_prefix}{self.rule_key}.{redis_date}.{redis_h}"
                     # 判断当前时间是否晚于数据正常更新时间,发送消息到飞书
                     now_m = datetime.now().minute
                     feishu_text = '{} —— appType = {}, h = {} 数据未按时更新,请及时查看解决。'.format(