Jelajahi Sumber

Merge branch 'bottom-2022083114' into pre-master

liqian 2 tahun lalu
induk
melakukan
00780e5a83
4 mengubah file dengan 86 tambahan dan 18 penghapusan
  1. 8 1
      config.py
  2. 10 3
      recommend.py
  3. 44 0
      video_rank.py
  4. 24 14
      video_recall.py

+ 8 - 1
config.py

@@ -126,6 +126,7 @@ class BaseConfig(object):
             'abtest_120': 60021,
             'abtest_121': 60022,
             'abtest_122': 60023,
+            'abtest_130': 60024,
         },  # 地域分组小时级规则实验
 
         'rank_by_24h': {
@@ -207,7 +208,10 @@ class BaseConfig(object):
             'data_key': 'data4', 'rule_key': 'rule7',
             'ab_code': AB_CODE['region_rank_by_h'].get('abtest_122')
         },  # 票圈短视频 分数据实验 data-[vlog: 0.5, 票圈短视频: 0.5], rule-rule7
-
+        '130': {
+            'data_key': 'data1', 'rule_key': 'rule4',
+            'ab_code': AB_CODE['region_rank_by_h'].get('abtest_130')
+        },  # 095实验基础上去除大列表,进行兜底策略优化
     }
 
     # 小程序小时级列表key不同实验标识
@@ -265,6 +269,7 @@ class BaseConfig(object):
         'top': 'op_manual',  # 置顶
         'bottom': 'bottom_strategy',  # 一层兜底
         'bottom_last': 'bottom_strategy_last',  # 二层兜底
+        'bottom2': 'bottom_strategy2',  # 兜底策略2
         'position_insert': 'position_insert',  # 按位置插入
         'relevant_video_op': 'relevant_video_op',  # 相关推荐强插
         'rov_recall_h': 'recall_pool_h',  # 小时级更新列表
@@ -480,6 +485,8 @@ class BaseConfig(object):
 
     # 兜底视频redis存储key
     BOTTOM_KEY_NAME = 'bottom:videos'
+    # 兜底视频redis存储key-方案2:采用流量池视频作为兜底视频
+    BOTTOM2_KEY_NAME = 'bottom2:videos'
 
     # 生效中的置顶视频列表 redis key
     TOP_VIDEO_LIST_KEY_NAME = 'top:item:score:area:applet'

+ 10 - 3
recommend.py

@@ -10,7 +10,7 @@ import config
 from log import Log
 from config import set_config
 from video_recall import PoolRecall
-from video_rank import video_rank, bottom_strategy, video_rank_by_w_h_rate, video_rank_with_old_video
+from video_rank import video_rank, bottom_strategy, video_rank_by_w_h_rate, video_rank_with_old_video, bottom_strategy2
 from db_helper import RedisHelper
 import gevent
 from utils import FilterVideos
@@ -298,8 +298,15 @@ def video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type, al
         # 兜底策略
         # log_.info('====== bottom strategy')
         start_bottom = time.time()
-        rank_result = bottom_strategy(request_id=request_id, size=size, app_type=app_type, ab_code=ab_code, params=params)
-        # end_bottom = time.time()
+        if ab_code == config_.AB_CODE['region_rank_by_h'].get('abtest_130'):
+            rank_result = bottom_strategy2(
+                size=size, app_type=app_type, mid=mid, uid=uid, ab_code=ab_code, client_info=client_info, params=params
+            )
+        else:
+            rank_result = bottom_strategy(
+                request_id=request_id, size=size, app_type=app_type, ab_code=ab_code, params=params
+            )
+
         log_.info({
             'logTimestamp': int(time.time() * 1000),
             'request_id': request_id,

+ 44 - 0
video_rank.py

@@ -227,6 +227,50 @@ def bottom_strategy_last(size, app_type, ab_code, params):
     return bottom_data
 
 
+def bottom_strategy2(size, app_type, mid, uid, ab_code, client_info, params):
+    """
+    兜底策略: 从兜底视频中随机获取视频,进行过滤后的视频
+    :param size: 需要获取的视频数
+    :param app_type: 产品标识 type-int
+    :param mid: mid
+    :param uid: uid
+    :param ab_code: abCode
+    :param client_info: 地域信息
+    :param params:
+    :return:
+    """
+    # 获取存在城市分组数据的城市编码列表
+    city_code_list = [code for _, code in config_.CITY_CODE.items()]
+    # 获取provinceCode
+    province_code = client_info.get('provinceCode', '-1')
+    # 获取cityCode
+    city_code = client_info.get('cityCode', '-1')
+    if city_code in city_code_list:
+        # 分城市数据存在时,获取城市分组数据
+        region_code = city_code
+    else:
+        region_code = province_code
+    if region_code == '':
+        region_code = '-1'
+
+    redis_helper = RedisHelper(params=params)
+    bottom_data = redis_helper.get_data_from_set(key_name=config_.BOTTOM2_KEY_NAME)
+    bottom_result = []
+    if len(bottom_data) > 0:
+        try:
+            random_data = numpy.random.choice(bottom_data, size * 5, False)
+        except Exception as e:
+            random_data = bottom_data
+        video_ids = [int(item) for item in random_data]
+        # 过滤
+        filter_ = FilterVideos(request_id=params.request_id, app_type=app_type, mid=mid, uid=uid, video_ids=video_ids)
+        filtered_data = filter_.filter_videos(pool_type='flow', region_code=region_code)
+        if filtered_data:
+            bottom_result = [{'videoId': int(video_id), 'pushFrom': config_.PUSH_FROM['bottom2'], 'abCode': ab_code}
+                             for video_id in filtered_data[:size]]
+    return bottom_result
+
+
 def video_rank_by_w_h_rate(videos):
     """
     视频宽高比实验(每组的前两个视频调整为横屏视频),根据视频宽高比信息对视频进行重排

+ 24 - 14
video_recall.py

@@ -547,6 +547,10 @@ class PoolRecall(object):
             'operation': 'flow_pool_recall',
             'executeTime': (time.time() - start_time) * 1000
         })
+
+        # debug测试
+        flow_pool_recall_result = []
+
         return flow_pool_recall_result[:size]
 
     def check_video_counts(self, video_ids, flow_pool_mapping):
@@ -1271,20 +1275,26 @@ class PoolRecall(object):
                         break
                     else:
                         continue
-        # 判断获取到的小时级数据数量
-        if len(recall_result) < size:
-            # 补充数据
-            rov_recall_result = self.rov_pool_recall(size=size, expire_time=expire_time)
-            # 去重合并
-            for video in rov_recall_result:
-                vid = video.get('videoId')
-                if vid not in now_video_ids:
-                    recall_result.append(video)
-                    now_video_ids.append(vid)
-                    if len(recall_result) >= size:
-                        break
-                    else:
-                        continue
+
+        # debug测试
+        recall_result = []
+
+        # 130实验组不获取大列表的数据
+        if self.ab_code != config_.AB_CODE['region_rank_by_h'].get('abtest_130'):
+            # 判断获取到的小时级数据数量
+            if len(recall_result) < size:
+                # 补充数据
+                rov_recall_result = self.rov_pool_recall(size=size, expire_time=expire_time)
+                # 去重合并
+                for video in rov_recall_result:
+                    vid = video.get('videoId')
+                    if vid not in now_video_ids:
+                        recall_result.append(video)
+                        now_video_ids.append(vid)
+                        if len(recall_result) >= size:
+                            break
+                        else:
+                            continue
 
         log_.info({
             'logTimestamp': int(time.time() * 1000),