liqian 2 years ago
parent
commit
e014b68374
5 changed files with 119 additions and 15 deletions
  1. 2 1
      app.py
  2. 7 0
      config.py
  3. 29 6
      recommend.py
  4. 14 6
      video_rank.py
  5. 67 2
      video_recall.py

+ 2 - 1
app.py

@@ -101,12 +101,13 @@ def relevant_recommend():
         app_type = request_data.get('appType')
         client_info = request_data.get('clientInfo')
         ab_exp_info = request_data.get('abExpInfo', None)
+        page_type = request_data.get('pageType')  # 1:详情页;2:分享页
         log_.info({'requestUri': '/applet/video/relevant/recommend', 'requestData': request_data})
         # log_.info('requestUri = "{}", requestData = "{}"'.format('/applet/video/relevant/recommend', request_data))
         # log_.info('relevant_recommend request data: {}'.format(request_data))
 
         videos = video_relevant_recommend(video_id=video_id, mid=mid, uid=uid, size=page_size, app_type=app_type,
-                                          ab_exp_info=ab_exp_info, client_info=client_info)
+                                          ab_exp_info=ab_exp_info, client_info=client_info, page_type=page_type)
 
         result = {'code': 200, 'message': 'success', 'data': {'videos': videos}}
         log_.info({'requestUri': '/applet/video/relevant/recommend',

+ 7 - 0
config.py

@@ -36,6 +36,7 @@ class BaseConfig(object):
         '24h_rule_rank1': '052',
         'region_rule_rank2': '054',
         'region_rule_rank3': '055',
+        'top_video_relevant_appType_19': '058'
     }
 
     # abTest
@@ -79,6 +80,8 @@ class BaseConfig(object):
         'rank_by_24h': {
             '24h_rule_rank1': 70001,
         },  # 小时级规则更新过去24h数据实验
+
+        'top_video_relevant_appType_19': 80001,  # 最惊奇电影类视频分享页相关推荐相似视频实验
     }
 
     # 小程序小时级列表key不同实验标识
@@ -129,6 +132,7 @@ class BaseConfig(object):
         'rov_recall_region_h': 'recall_pool_region_h',  # 地域分组小时级更新列表
         'rov_recall_region_day': 'recall_pool_region_day',  # 地域分组天级更新列表
         'rov_recall_region_24h': 'recall_pool_region_24h',  # 地域分组小时级更新24h列表
+        'top_video_relevant_appType_19': 'relevant_video',  # 相似视频
     }
 
     # category id mapping
@@ -297,6 +301,9 @@ class BaseConfig(object):
     # 头部视频对应运营强插的相关视频 redis 存储 key 前缀, 完整key格式:com.weiqu.video.relevant.videos.item.{videoId}
     RELEVANT_VIDEOS_WITH_OP_KEY_NAME = 'com.weiqu.video.relevant.videos.item.'
 
+    # 最惊奇电影类视频相关推荐列表存放 redis key前缀,完整格式: com.weiqu.movie.relevant.list.item.{videoId}
+    MOVIE_RELEVANT_LIST_KEY_NAME_PREFIX = 'com.weiqu.movie.relevant.list.item.'
+
 
 class DevelopmentConfig(BaseConfig):
     """开发环境配置"""

+ 29 - 6
recommend.py

@@ -136,7 +136,7 @@ def positon_duplicate(pos1_vids, pos2_vids, videos):
 
 
 def video_recommend(mid, uid, size, top_K, flow_pool_P, app_type, algo_type, client_info, expire_time=24*3600,
-                    ab_code=config_.AB_CODE['initial'], rule_key='', no_op_flag=False, old_video_index=-1):
+                    ab_code=config_.AB_CODE['initial'], rule_key='', no_op_flag=False, old_video_index=-1, video_id=None):
     """
     首页线上推荐逻辑
     :param mid: mid type-string
@@ -149,6 +149,7 @@ def video_recommend(mid, uid, size, top_K, flow_pool_P, app_type, algo_type, cli
     :param client_info: 用户位置信息 {"country": "国家",  "province": "省份",  "city": "城市"}
     :param expire_time: 末位视频记录redis过期时间
     :param ab_code: AB实验code
+    :param video_id: 相关推荐头部视频id
     :return:
     """
     # ####### 多进程召回
@@ -185,6 +186,10 @@ def video_recommend(mid, uid, size, top_K, flow_pool_P, app_type, algo_type, cli
     elif ab_code in [code for _, code in config_.AB_CODE['region_rank_by_h'].items()]:
         t = [gevent.spawn(pool_recall.rov_pool_recall_with_region, size, expire_time),
              gevent.spawn(pool_recall.flow_pool_recall, size)]
+    # 最惊奇相关推荐实验
+    elif ab_code == config_.AB_CODE['top_video_relevant_appType_19']:
+        t = [gevent.spawn(pool_recall.relevant_recall_19, video_id, size, expire_time),
+             gevent.spawn(pool_recall.flow_pool_recall_18_19, size)]
     # 最惊奇/老好看实验
     elif app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
         t = [gevent.spawn(pool_recall.rov_pool_recall, size, expire_time),
@@ -212,7 +217,11 @@ def video_recommend(mid, uid, size, top_K, flow_pool_P, app_type, algo_type, cli
     start_rank = time.time()
     # log_.info('====== rank')
     if app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
-        if ab_code in [config_.AB_CODE['rov_rank_appType_18_19'], config_.AB_CODE['rov_rank_appType_19']]:
+        if ab_code in [
+            config_.AB_CODE['rov_rank_appType_18_19'],
+            config_.AB_CODE['rov_rank_appType_19'],
+            config_.AB_CODE['top_video_relevant_appType_19']
+        ]:
             data = {
                 'rov_pool_recall': recall_result_list[0],
                 'flow_pool_recall': recall_result_list[1]
@@ -366,8 +375,13 @@ def update_local_distribute_count(videos):
         log_.error(traceback.format_exc())
 
 
-def get_recommend_params(ab_exp_info):
-    """根据实验分组给定对应的推荐参数"""
+def get_recommend_params(ab_exp_info, page_type=0):
+    """
+    根据实验分组给定对应的推荐参数
+    :param ab_exp_info: AB实验组参数
+    :param page_type: 页面区分参数,默认:0(首页)
+    :return:
+    """
     top_K = config_.K
     flow_pool_P = config_.P
     # 不获取人工干预数据标记
@@ -501,6 +515,13 @@ def get_recommend_params(ab_exp_info):
             flow_pool_P = config_.P_18_19
             no_op_flag = True
 
+        elif config_.AB_EXP_CODE['top_video_relevant_appType_19'] in ab_exp_code_list and page_type == 2:
+            ab_code = config_.AB_CODE['top_video_relevant_appType_19']
+            expire_time = 3600
+            top_K = 1
+            flow_pool_P = config_.P_18_19
+            no_op_flag = True
+
         # 老视频实验
         if config_.AB_EXP_CODE['old_video'] in ab_exp_code_list:
             ab_code = config_.AB_CODE['old_video']
@@ -581,7 +602,7 @@ def video_homepage_recommend(mid, uid, size, app_type, algo_type, client_info, a
     return rank_result
 
 
-def video_relevant_recommend(video_id, mid, uid, size, app_type, ab_exp_info, client_info):
+def video_relevant_recommend(video_id, mid, uid, size, app_type, ab_exp_info, client_info, page_type):
     """
     相关推荐逻辑
     :param video_id: 相关推荐的头部视频id
@@ -590,6 +611,8 @@ def video_relevant_recommend(video_id, mid, uid, size, app_type, ab_exp_info, cl
     :param size: 请求视频数量 type-int
     :param app_type: 产品标识  type-int
     :param ab_exp_info: ab实验分组参数 [{"expItemId":1, "configValue":{"size":4, "K":3, ...}}, ...]
+    :param client_info: 地域参数
+    :param page_type: 页面区分参数  1:详情页;2:分享页
     :return: videos type-list
     """
     top_K, flow_pool_P, ab_code, rule_key, expire_time, no_op_flag, old_video_index = \
@@ -601,7 +624,7 @@ def video_relevant_recommend(video_id, mid, uid, size, app_type, ab_exp_info, cl
                                                        algo_type='', client_info=client_info,
                                                        ab_code=ab_code, expire_time=expire_time,
                                                        rule_key=rule_key, no_op_flag=no_op_flag,
-                                                       old_video_index=old_video_index)
+                                                       old_video_index=old_video_index, video_id=video_id)
     # ab-test
     # result = ab_test_op(rank_result=rank_result,
     #                     ab_code_list=[config_.AB_CODE['position_insert'], config_.AB_CODE['relevant_video_op']],

+ 14 - 6
video_rank.py

@@ -23,6 +23,10 @@ def video_rank(data, size, top_K, flow_pool_P):
     if not data['rov_pool_recall'] and not data['flow_pool_recall']:
         return None
     # 将各路召回的视频按照score从大到小排序
+    # 最惊奇相关推荐相似视频
+    relevant_recall = [item for item in data['rov_pool_recall']
+                       if item.get('pushFrom') == config_.PUSH_FROM['top_video_relevant_appType_19']]
+    relevant_recall_rank = sorted(relevant_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
     # 小时级更新数据
     h_recall = [item for item in data['rov_pool_recall'] if item.get('pushFrom') == config_.PUSH_FROM['rov_recall_h']]
     h_recall_rank = sorted(h_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
@@ -50,7 +54,8 @@ def video_rank(data, size, top_K, flow_pool_P):
     rov_initial_recall = [
         item for item in data['rov_pool_recall']
         if item.get('pushFrom') not in
-           [config_.PUSH_FROM['rov_recall_h'],
+           [config_.PUSH_FROM['top_video_relevant_appType_19'],
+            config_.PUSH_FROM['rov_recall_h'],
             config_.PUSH_FROM['rov_recall_region_h'],
             config_.PUSH_FROM['rov_recall_region_24h'],
             config_.PUSH_FROM['rov_recall_region_day'],
@@ -68,12 +73,15 @@ def video_rank(data, size, top_K, flow_pool_P):
                                                          top_K=top_K)
     # log_.info('remove_duplicate finished! rov_recall_rank = {}, flow_recall_rank = {}'.format(
     #     rov_recall_rank, flow_recall_rank))
+
+    rank_result = relevant_recall_rank
+
     # 从ROV召回池中获取top k
     if len(rov_recall_rank) > 0:
-        rank_result = rov_recall_rank[:top_K]
+        rank_result.extend(rov_recall_rank[:top_K])
         rov_recall_rank = rov_recall_rank[top_K:]
     else:
-        rank_result = flow_recall_rank[:top_K]
+        rank_result.extend(flow_recall_rank[:top_K])
         flow_recall_rank = flow_recall_rank[top_K:]
 
     # 按概率 p 及score排序获取 size - k 个视频
@@ -88,16 +96,16 @@ def video_rank(data, size, top_K, flow_pool_P):
                 flow_recall_rank.remove(flow_recall_rank[0])
             else:
                 rank_result.extend(rov_recall_rank[:size - top_K - i])
-                return rank_result
+                return rank_result[:size]
         else:
             if rov_recall_rank:
                 rank_result.append(rov_recall_rank[0])
                 rov_recall_rank.remove(rov_recall_rank[0])
             else:
                 rank_result.extend(flow_recall_rank[:size - top_K - i])
-                return rank_result
+                return rank_result[:size]
         i += 1
-    return rank_result
+    return rank_result[:size]
 
 
 def remove_duplicate(rov_recall, flow_recall, top_K):

+ 67 - 2
video_recall.py

@@ -600,7 +600,8 @@ class PoolRecall(object):
         # abCode = 30001   # 老好看视频 / 票圈最惊奇 首页/相关推荐逻辑更新实验
         if self.ab_code in [code for _, code in config_.AB_CODE['rank_by_h'].items()] + \
                 [code for _, code in config_.AB_CODE['region_rank_by_h'].items()] + \
-                [config_.AB_CODE['rov_rank_appType_18_19'], config_.AB_CODE['rov_rank_appType_19']] + \
+                [config_.AB_CODE['rov_rank_appType_18_19'], config_.AB_CODE['rov_rank_appType_19'],
+                 config_.AB_CODE['top_video_relevant_appType_19']] + \
                 [code for _, code in config_.AB_CODE['rank_by_24h'].items()] or \
                 self.app_type == config_.APP_TYPE['APP']:
             rov_pool_key, redis_date = self.get_pool_redis_key_with_h('rov')
@@ -790,7 +791,10 @@ class PoolRecall(object):
 
             # elif self.app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
             # abCode = 30001   # 老好看视频 / 票圈最惊奇 首页/相关推荐逻辑更新实验
-            elif self.ab_code in [config_.AB_CODE['rov_rank_appType_18_19'], config_.AB_CODE['rov_rank_appType_19']]:
+            elif self.ab_code in [
+                config_.AB_CODE['rov_rank_appType_18_19'], config_.AB_CODE['rov_rank_appType_19'],
+                config_.AB_CODE['top_video_relevant_appType_19']
+            ]:
                 # 判断热度列表是否更新,未更新则使用前一小时的热度列表
                 key_name_prefix = f"{config_.RECALL_KEY_NAME_PREFIX_APP_TYPE}{self.app_type}."
                 key_name = f"{key_name_prefix}{now_date}.{h}"
@@ -1324,3 +1328,64 @@ class PoolRecall(object):
                 self.redis_helper.remove_value_from_zset(key_name=h_recall_mid_key, value=value)
 
         return recall_result[:size]
+
+    def get_relevant_videos_19(self, video_id, size=4):
+        """
+        获取最惊奇相关推荐视频
+        :param video_id: 头部视频id
+        :return: relevant_result
+        """
+        push_from = config_.PUSH_FROM['top_video_relevant_appType_19']
+        relevant_result = []
+        relevant_videos_key_name = f"{config_.MOVIE_RELEVANT_LIST_KEY_NAME_PREFIX}{video_id}"
+        redis_helper = RedisHelper()
+        if not redis_helper.key_exists(key_name=relevant_videos_key_name):
+            return relevant_result
+        # 获取数据
+        data = self.redis_helper.get_data_zset_with_index(key_name=relevant_videos_key_name, start=0, end=-1,
+                                                          with_scores=True)
+        if not data:
+            return relevant_result
+        # 获取视频id,并转换类型为int,并存储为key-value{videoId: score}
+        video_ids = []
+        video_score = {}
+        for value in data:
+            video_id = int(value[0])
+            video_ids.append(video_id)
+            video_score[video_id] = value[1]
+        # 过滤
+        filter_ = FilterVideos(app_type=self.app_type, mid=self.mid, uid=self.uid, video_ids=video_ids)
+        ge = gevent.spawn(filter_.filter_videos())
+        ge.join()
+        filtered_result = ge.get()
+
+        if filtered_result:
+            # 添加视频源参数 pushFrom, abCode
+            relevant_result = [{'videoId': int(item), 'rovScore': video_score[int(item)],
+                                'pushFrom': push_from, 'abCode': self.ab_code}
+                               for item in filtered_result if video_score.get(int(item)) is not None]
+        return relevant_result[:size]
+
+    def relevant_recall_19(self, video_id, size=4, expire_time=24*3600):
+        """
+        最惊奇相关推荐视频召回
+        :param video_id:
+        :param size:
+        :param expire_time:
+        :return:
+        """
+        t = [gevent.spawn(self.get_relevant_videos_19, video_id, size),
+             gevent.spawn(self.rov_pool_recall, size, expire_time)]
+        gevent.joinall(t)
+        relevant_recall_result_list = [i.get() for i in t]
+        # 将已获取到的视频按顺序去重合并
+        now_video_ids = []
+        recall_result = []
+        for relevant_result in relevant_recall_result_list:
+            for video in relevant_result:
+                video_id = video.get('videoId')
+                if video_id not in now_video_ids:
+                    recall_result.append(video)
+                    now_video_ids.append(video_id)
+        return recall_result[:size]
+