Browse Source

add whole_movies ab test

liqian 2 years ago
parent
commit
c20966930e
5 changed files with 111 additions and 11 deletions
  1. 10 1
      config.py
  2. 9 0
      recommend.py
  3. 1 0
      user2new.py
  4. 13 2
      video_rank.py
  5. 78 8
      video_recall.py

+ 10 - 1
config.py

@@ -37,7 +37,8 @@ class BaseConfig(object):
         'region_rule_rank2': '054',
         'region_rule_rank3': '055',
         'top_video_relevant_appType_19': '058',
-        '24h_rule_rank2': '059'
+        '24h_rule_rank2': '059',
+        'whole_movies': '061'
     }
 
     # abTest
@@ -84,6 +85,7 @@ class BaseConfig(object):
         },  # 小时级规则更新过去24h数据实验
 
         'top_video_relevant_appType_19': 80001,  # 最惊奇电影类视频分享页相关推荐相似视频实验
+        'whole_movies': 90001,  # 最惊奇首页/相关推荐 完整影视资源实验
     }
 
     # 小程序小时级列表key不同实验标识
@@ -136,6 +138,8 @@ class BaseConfig(object):
         'rov_recall_region_day': 'recall_pool_region_day',  # 地域分组天级更新列表
         'rov_recall_region_24h': 'recall_pool_region_24h',  # 地域分组小时级更新24h列表
         'top_video_relevant_appType_19': 'relevant_video',  # 相似视频
+        'whole_movies': 'whole_movies',  # 完整影视
+        'talk_videos': 'talk_videos',  # 影视解说
     }
 
     # category id mapping
@@ -262,6 +266,8 @@ class BaseConfig(object):
     LAST_VIDEO_FROM_ROV_POOL_NOW_PREFIX = 'com.weiqu.video.rov.pool.last.now.'
     # 用户上一次在region dup更新列表中对应的位置 redis key前缀,完整key格式:com.weiqu.video.region.dup.last.{dup}.{appType}.{mid}.{date}
     LAST_VIDEO_FROM_REGION_DUP_PREFIX = 'com.weiqu.video.region.dup.last.'
+    # 用户上一次在完整影视资源列表对应的位置 redis key前缀,完整key格式:com.weiqu.video.whole.movies.last.{appType}.{mid}.{date}
+    LAST_VIDEO_FROM_WHOLE_MOVIES_PREFIX = 'com.weiqu.video.whole.movies.last.'
 
     # 本地记录视频的可分发数,控制分发,完整key格式:com.weiqu.video.flowpool.local.distribute.count.{h}
     # LOCAL_DISTRIBUTE_COUNT_PREFIX = 'com.weiqu.video.flowpool.local.distribute.count.'
@@ -310,6 +316,9 @@ class BaseConfig(object):
     # 最惊奇电影类视频相关推荐列表存放 redis key前缀,完整格式: com.weiqu.movie.relevant.list.item.{videoId}
     MOVIE_RELEVANT_LIST_KEY_NAME_PREFIX = 'com.weiqu.movie.relevant.list.item.'
 
+    # 完整影视资源更新结果存放 redis key 前缀,完整格式:'com.weiqu.video.recall.whole.movies.item.{date}.{h}'
+    RECALL_KEY_NAME_PREFIX_WHOLE_MOVIES = 'com.weiqu.video.recall.whole.movies.item.'
+
 
 class DevelopmentConfig(BaseConfig):
     """开发环境配置"""

+ 9 - 0
recommend.py

@@ -190,6 +190,9 @@ def video_recommend(mid, uid, size, top_K, flow_pool_P, app_type, algo_type, cli
     elif ab_code == config_.AB_CODE['top_video_relevant_appType_19']:
         t = [gevent.spawn(pool_recall.relevant_recall_19, video_id, size, expire_time),
              gevent.spawn(pool_recall.flow_pool_recall_18_19, size)]
+    # 最惊奇完整影视实验
+    elif ab_code == config_.AB_CODE['whole_movies']:
+        t = [gevent.spawn(pool_recall.rov_pool_recall_19, size, expire_time)]
     # 最惊奇/老好看实验
     elif app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
         t = [gevent.spawn(pool_recall.rov_pool_recall, size, expire_time),
@@ -528,6 +531,12 @@ def get_recommend_params(ab_exp_info, page_type=0):
             flow_pool_P = config_.P_18_19
             no_op_flag = True
 
+        # 票圈最惊奇完整影视资源实验
+        elif config_.AB_EXP_CODE['whole_movies'] in ab_exp_code_list:
+            ab_code = config_.AB_CODE['whole_movies']
+            expire_time = 24 * 3600
+            no_op_flag = True
+
         # 老视频实验
         if config_.AB_EXP_CODE['old_video'] in ab_exp_code_list:
             ab_code = config_.AB_CODE['old_video']

+ 1 - 0
user2new.py

@@ -48,6 +48,7 @@ def user2new(app_type, mid, uid):
             config_.LAST_VIDEO_FROM_ROV_POOL_NOW_PREFIX,
             f"{config_.LAST_VIDEO_FROM_REGION_DUP_PREFIX}{1}.",
             f"{config_.LAST_VIDEO_FROM_REGION_DUP_PREFIX}{2}.",
+            config_.LAST_VIDEO_FROM_WHOLE_MOVIES_PREFIX,
         ]
         # 天级
         now_date = date.today().strftime('%Y%m%d')

+ 13 - 2
video_rank.py

@@ -27,6 +27,15 @@ def video_rank(data, size, top_K, flow_pool_P):
     relevant_recall = [item for item in data['rov_pool_recall']
                        if item.get('pushFrom') == config_.PUSH_FROM['top_video_relevant_appType_19']]
     relevant_recall_rank = sorted(relevant_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
+    # 最惊奇完整影视视频
+    whole_movies_recall = [item for item in data['rov_pool_recall']
+                           if item.get('pushFrom') == config_.PUSH_FROM['whole_movies']]
+    whole_movies_recall_rank = sorted(whole_movies_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
+    # 最惊奇影视解说视频
+    talk_videos_recall = [item for item in data['rov_pool_recall']
+                           if item.get('pushFrom') == config_.PUSH_FROM['talk_videos']]
+    talk_videos_recall_rank = sorted(talk_videos_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
+
     # 小时级更新数据
     h_recall = [item for item in data['rov_pool_recall'] if item.get('pushFrom') == config_.PUSH_FROM['rov_recall_h']]
     h_recall_rank = sorted(h_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
@@ -60,10 +69,12 @@ def video_rank(data, size, top_K, flow_pool_P):
             config_.PUSH_FROM['rov_recall_region_24h'],
             config_.PUSH_FROM['rov_recall_region_day'],
             config_.PUSH_FROM['rov_recall_24h'],
-            config_.PUSH_FROM['rov_recall_day']]
+            config_.PUSH_FROM['rov_recall_day'],
+            config_.PUSH_FROM['whole_movies'],
+            config_.PUSH_FROM['talk_videos']]
     ]
     rov_initial_recall_rank = sorted(rov_initial_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
-    rov_recall_rank = h_recall_rank + \
+    rov_recall_rank = whole_movies_recall_rank + talk_videos_recall_rank + h_recall_rank + \
                       region_h_recall_rank + region_24h_recall_rank + region_day_recall_rank + \
                       rule_24h_recall_rank + day_recall_rank + rov_initial_recall_rank
     # 流量池

+ 78 - 8
video_recall.py

@@ -301,11 +301,13 @@ class PoolRecall(object):
                             continue
         return recall_result[:size]
 
-    def rov_pool_recall(self, size=10, expire_time=24*3600):
+    def rov_pool_recall(self, size=10, expire_time=24*3600, video_type='', push_from=config_.PUSH_FROM['rov_recall']):
         """
         从ROV召回池中获取视频
         :param size: 获取视频个数
         :param expire_time: 末位视频记录redis过期时间
+        :param video_type: 视频列表类别
+        :param push_from: 视频来源标记
         :return:
         """
         # log_.info('====== rov pool recall')
@@ -334,7 +336,7 @@ class PoolRecall(object):
                 update_rov_dup_result.append(item)
 
         # 获取相关redis key, 用户上一次在rov召回池对应的位置
-        rov_pool_key, last_rov_recall_key, idx = self.get_video_last_idx()
+        rov_pool_key, last_rov_recall_key, idx = self.get_video_last_idx(video_type=video_type)
         if not rov_pool_key:
             log_.info('ROV召回池中无视频')
             if (not update_rov_dup_result) and (not top_video_result):
@@ -382,7 +384,7 @@ class PoolRecall(object):
             if filtered_result:
                 # 添加视频源参数 pushFrom, abCode
                 temp_result = [{'videoId': int(item), 'rovScore': video_score[int(item)],
-                                'pushFrom': config_.PUSH_FROM['rov_recall'], 'abCode': self.ab_code}
+                                'pushFrom': push_from, 'abCode': self.ab_code}
                                for item in filtered_result if video_score.get(int(item)) is not None]
                 rov_pool_recall_result.extend(temp_result)
             else:
@@ -596,7 +598,7 @@ class PoolRecall(object):
             log_.error('pool type error')
             return None, None
 
-    def get_video_last_idx(self):
+    def get_video_last_idx(self, video_type=''):
         """获取用户上一次在rov召回池对应的位置"""
         # if self.ab_code in [config_.AB_CODE['rank_by_h']] or self.app_type == config_.APP_TYPE['APP']:
         # abCode = 30001   # 老好看视频 / 票圈最惊奇 首页/相关推荐逻辑更新实验
@@ -605,7 +607,8 @@ class PoolRecall(object):
                 [config_.AB_CODE['rov_rank_appType_18_19'], config_.AB_CODE['rov_rank_appType_19'],
                  config_.AB_CODE['top_video_relevant_appType_19']] + \
                 [code for _, code in config_.AB_CODE['rank_by_24h'].items()] or \
-                self.app_type == config_.APP_TYPE['APP']:
+                self.app_type == config_.APP_TYPE['APP'] or \
+                video_type == 'whole_movies':
             rov_pool_key, redis_date = self.get_pool_redis_key_with_h('rov')
 
         elif self.ab_code in [code for _, code in config_.AB_CODE['rank_by_day'].items()]:
@@ -622,6 +625,8 @@ class PoolRecall(object):
                 last_key_prefix = config_.LAST_VIDEO_FROM_ROV_POOL_PRE_PREFIX
             else:
                 last_key_prefix = config_.LAST_VIDEO_FROM_ROV_POOL_NOW_PREFIX
+        elif video_type == 'whole_movies':
+            last_key_prefix = config_.LAST_VIDEO_FROM_WHOLE_MOVIES_PREFIX
         else:
             last_key_prefix = config_.LAST_VIDEO_FROM_ROV_POOL_PREFIX
         last_rov_recall_key = f'{last_key_prefix}{self.app_type}.{self.mid}.{redis_date}'
@@ -749,10 +754,11 @@ class PoolRecall(object):
             log_.error(traceback.format_exc())
             return [], []
 
-    def get_pool_redis_key_with_h(self, pool_type):
+    def get_pool_redis_key_with_h(self, pool_type, video_type=''):
         """
         拼接key,获取以小时级别更新的视频列表
         :param pool_type: type-string {'rov': rov召回池, 'flow': 流量池}
+        :param video_type: 视频列表区分 whole_movies - 完整影视资源
         :return: key_name
         """
         if pool_type == 'rov':
@@ -818,6 +824,29 @@ class PoolRecall(object):
                         send_msg_to_feishu(feishu_text)
                     return key_name, redis_h
 
+            # 完整影视资源
+            elif video_type == 'whole_movies':
+                # 判断完整影视资源列表是否更新,未更新则使用前一小时的热度列表
+                key_name_prefix = f"{config_.RECALL_KEY_NAME_PREFIX_WHOLE_MOVIES}"
+                key_name = f"{key_name_prefix}{now_date}.{h}"
+                if self.redis_helper.key_exists(key_name):
+                    return key_name, h
+                else:
+                    if h == 0:
+                        redis_h = 23
+                        redis_date = (date.today() - timedelta(days=1)).strftime('%Y%m%d')
+                    else:
+                        redis_h = h - 1
+                        redis_date = now_date
+                    key_name = f"{key_name_prefix}{redis_date}.{redis_h}"
+                    # 判断当前时间是否晚于数据正常更新时间,发送消息到飞书
+                    now_m = datetime.now().minute
+                    feishu_text = '{} —— appType = {}, h = {} 完整影视资源数据未按时更新,请及时查看解决。'.format(
+                        config_.ENV_TEXT, self.app_type, h)
+                    if now_m > config_.ROV_H_UPDATE_MINUTE:
+                        send_msg_to_feishu(feishu_text)
+                    return key_name, redis_h
+
             else:
                 # 判断热度列表是否更新,未更新则使用前一小时的热度列表
                 if self.ab_code in [code for _, code in config_.AB_CODE['region_rank_by_h'].items()]:
@@ -859,7 +888,7 @@ class PoolRecall(object):
             log_.error('pool type error')
             return None, None
 
-    def flow_pool_recall_18_19(self, size=10):
+    def flow_pool_recall_18_19(self, size=4, push_from=config_.PUSH_FROM['flow_recall']):
         """从流量池中获取视频"""
         # log_.info('====== flow pool recall')
         flow_pool_key = self.get_pool_redis_key('flow')
@@ -900,7 +929,7 @@ class PoolRecall(object):
             if filtered_result:
                 # 添加视频源参数 pushFrom, abCode
                 temp_result = [{'videoId': int(item), 'rovScore': video_score[int(item)],
-                                'pushFrom': config_.PUSH_FROM['flow_recall'], 'abCode': self.ab_code}
+                                'pushFrom': push_from, 'abCode': self.ab_code}
                                for item in filtered_result if video_score.get(int(item)) is not None]
                 flow_pool_recall_result.extend(temp_result)
             idx += get_size
@@ -1399,3 +1428,44 @@ class PoolRecall(object):
                     now_video_ids.append(video_id)
         return recall_result[:size]
 
+    def rov_pool_recall_19(self, size=4, expire_time=24*3600):
+        """
+        最惊奇视频召回
+        :param size: 获取视频个数
+        :param expire_time: 末位视频记录redis过期时间
+        :return:
+        """
+        t = [gevent.spawn(self.rov_pool_recall, size, expire_time=3600, video_type='whole_movies', push_from=config_.PUSH_FROM['whole_movies']),
+             gevent.spawn(self.flow_pool_recall_18_19, size, push_from=config_.PUSH_FROM['talk_videos'])]
+        gevent.joinall(t)
+        recall_result_list = [i.get() for i in t]
+        # 将已获取到的视频按顺序去重合并
+        now_video_ids = []
+        recall_result = []
+        for item in recall_result_list:
+            for video in item:
+                video_id = video.get('videoId')
+                if video_id not in now_video_ids:
+                    recall_result.append(video)
+                    now_video_ids.append(video_id)
+                    if len(recall_result) >= size:
+                        break
+                    else:
+                        continue
+        # 判断获取到的小时级数据数量
+        if len(recall_result) < size:
+            # 补充数据
+            rov_recall_result = self.rov_pool_recall(size=size, expire_time=expire_time)
+            # 去重合并
+            for video in rov_recall_result:
+                vid = video.get('videoId')
+                if vid not in now_video_ids:
+                    recall_result.append(video)
+                    now_video_ids.append(vid)
+                    if len(recall_result) >= size:
+                        break
+                    else:
+                        continue
+
+        return recall_result[:size]
+