Browse Source

add w-h-rate experiment
Merge branch 'w-h-rate' into test

liqian 3 years ago
parent
commit
3506e2c498
4 changed files with 170 additions and 23 deletions
  1. 19 1
      config.py
  2. 21 8
      recommend.py
  3. 125 10
      video_rank.py
  4. 5 4
      video_recall.py

+ 19 - 1
config.py

@@ -10,7 +10,19 @@ class BaseConfig(object):
         'ZUI_JING_QI': 19,  # 票圈最惊奇
     }
     # abCode
-    AB_CODE = 10000
+    AB_CODE = {
+        'initial': 10000,  # 初始
+        'w_h_rate': 10001,  # 视频宽高比实验(每组的前两个视频调整为横屏视频)
+    }
+
+    # pushFrom
+    PUSH_FROM = {
+        'rov_recall': 'recall_pool',  # rov召回池
+        'flow_recall': 'flow_pool',  # 流量池
+        'top': 'op_manual',  # 置顶
+        'bottom': 'bottom_strategy',  # 一层兜底
+        'bottom_last': 'bottom_strategy_last',  # 二层兜底
+    }
     # category id mapping
     CATEGORY = {
         'recommend': [55],  # 推荐
@@ -60,6 +72,12 @@ class BaseConfig(object):
     # 置顶视频区域 为 全部 的code
     ALL_AREA_CODE = '000000'
 
+    # width : height > 1 的视频列表 redis key, zset存储,value为videoId,score为w_h_rate
+    W_H_RATE_UP_1_VIDEO_LIST_KEY_NAME = {
+        'rov_recall': 'com.weiqu.video.rov.w.h.rate.1.item',  # rov召回池视频
+        'bottom_last': 'com.weiqu.video.bottom.last.w.h.rate.1.item'  # 二次兜底视频
+    }
+
 
 class DevelopmentConfig(BaseConfig):
     """测试环境配置"""

+ 21 - 8
recommend.py

@@ -6,7 +6,7 @@ from datetime import datetime
 from log import Log
 from config import set_config
 from video_recall import PoolRecall
-from video_rank import video_rank, bottom_strategy
+from video_rank import video_rank, bottom_strategy, video_rank_by_w_h_rate
 from db_helper import RedisHelper
 import gevent
 
@@ -25,7 +25,7 @@ def video_recommend(mid, uid, size, app_type, algo_type, client_info):
     :param client_info: 用户位置信息 {"country": "国家",  "province": "省份",  "city": "城市"}
     :return:
     """
-    ab_code = config_.AB_CODE
+    ab_code = config_.AB_CODE['initial']
     # ####### 多进程召回
     start_recall = time.time()
     # log_.info('====== recall')
@@ -76,6 +76,14 @@ def video_recommend(mid, uid, size, app_type, algo_type, client_info):
         log_.info('mid: {}, uid: {}, bottom strategy result: {}, execute time = {}ms'.format(
             mid, uid, rank_result, (end_bottom - start_bottom) * 1000))
 
+    # ####### 视频宽高比AB实验
+    # 对内容精选进行 视频宽高比分发实验
+    if app_type == config_.APP_TYPE['LONG_VIDEO']:
+        videos = rank_result
+        rank_result = video_rank_by_w_h_rate(videos=videos)
+        log_.info('app_type: {}, mid: {}, uid: {}, rank_result: {}, execute time = {}ms'.format(
+            app_type, mid, uid, rank_result, (end_rank - start_rank) * 1000))
+
     # ####### redis数据刷新
     # log_.info('====== update redis')
     # 预曝光数据同步刷新到Redis, 过期时间为0.5h
@@ -88,17 +96,22 @@ def video_recommend(mid, uid, size, app_type, algo_type, client_info):
         log_.info('preview redis update success!')
 
     # 将此次获取的ROV召回池config_.K末位视频id同步刷新到Redis中,方便下次快速定位到召回位置,过期时间为1天
-    rov_recall_video = [item['videoId'] for item in rank_result[:3] if item['pushFrom'] == 'recall_pool']
-    if 0 < len(rov_recall_video) <= config_.K:
+    rov_recall_video = [item['videoId'] for item in rank_result[:config_.K]
+                        if item['pushFrom'] == config_.PUSH_FROM['rov_recall']]
+    if len(rov_recall_video) > 0:
         if not redis_helper.get_score_with_value(key_name=config_.UPDATE_ROV_KEY_NAME, value=rov_recall_video[-1]):
             redis_helper.set_data_to_redis(key_name=last_rov_recall_key, value=rov_recall_video[-1])
-    elif len(rov_recall_video) > config_.K:
-        if not redis_helper.get_score_with_value(key_name=config_.UPDATE_ROV_KEY_NAME, value=rov_recall_video[config_.K - 1]):
-            redis_helper.set_data_to_redis(key_name=last_rov_recall_key, value=rov_recall_video[config_.K - 1])
+
+    # if 0 < len(rov_recall_video) <= config_.K:
+    #     if not redis_helper.get_score_with_value(key_name=config_.UPDATE_ROV_KEY_NAME, value=rov_recall_video[-1]):
+    #         redis_helper.set_data_to_redis(key_name=last_rov_recall_key, value=rov_recall_video[-1])
+    # elif len(rov_recall_video) > config_.K:
+    #     if not redis_helper.get_score_with_value(key_name=config_.UPDATE_ROV_KEY_NAME, value=rov_recall_video[config_.K - 1]):
+    #         redis_helper.set_data_to_redis(key_name=last_rov_recall_key, value=rov_recall_video[config_.K - 1])
     log_.info('last video redis update success!')
 
     # 将此次分发的流量池视频,对 本地分发数-1 进行记录
-    flow_recall_video = [item for item in rank_result if item['pushFrom'] == 'flow_pool']
+    flow_recall_video = [item for item in rank_result if item['pushFrom'] == config_.PUSH_FROM['flow_recall']]
     if flow_recall_video:
         update_local_distribute_count(flow_recall_video)
         log_.info('update local distribute count success!')

+ 125 - 10
video_rank.py

@@ -120,7 +120,8 @@ def bottom_strategy(size, app_type, ab_code):
         random_data = numpy.random.choice(data, size, False)
     else:
         random_data = data
-    bottom_data = [{'videoId': int(item), 'pushFrom': 'bottom_strategy', 'abCode': ab_code} for item in random_data]
+    bottom_data = [{'videoId': int(item), 'pushFrom': config_.PUSH_FROM['bottom'], 'abCode': ab_code}
+                   for item in random_data]
     return bottom_data
 
 
@@ -139,17 +140,131 @@ def bottom_strategy_last(size, app_type, ab_code):
     # 状态过滤
     # filter_videos = FilterVideos(app_type=app_type, video_ids=random_data)
     # filtered_data = filter_videos.filter_video_status(video_ids=random_data)
-    bottom_data = [{'videoId': int(video_id), 'pushFrom': 'bottom_strategy_last', 'abCode': ab_code}
+    bottom_data = [{'videoId': int(video_id), 'pushFrom': config_.PUSH_FROM['bottom_last'], 'abCode': ab_code}
                    for video_id in random_data[:size]]
     return bottom_data
 
 
+def video_rank_by_w_h_rate(videos):
+    """
+    视频宽高比实验(每组的前两个视频调整为横屏视频),根据视频宽高比信息对视频进行重排
+    :param videos:
+    :return:
+    """
+    redis_helper = RedisHelper()
+
+    # ##### 判断前两个视频是否是置顶视频 或者 流量池视频
+    top_2_push_from_flag = [False, False]
+    for i, video in enumerate(videos[:2]):
+        if video['pushFrom'] in [config_.PUSH_FROM['top'], config_.PUSH_FROM['flow_recall']]:
+            top_2_push_from_flag[i] = True
+    if top_2_push_from_flag[0] and top_2_push_from_flag[1]:
+        return videos
+
+    # ##### 判断前两个视频是否为横屏
+    top_2_w_h_rate_flag = [False, False]
+    for i, video in enumerate(videos[:2]):
+        if video['pushFrom'] in [config_.PUSH_FROM['top'], config_.PUSH_FROM['flow_recall']]:
+            # 视频来源为置顶 或 流量池时,不做判断
+            top_2_w_h_rate_flag[i] = True
+        elif video['pushFrom'] in [config_.PUSH_FROM['rov_recall'], config_.PUSH_FROM['bottom']]:
+            # 视频来源为 rov召回池 或 一层兜底时,判断是否是横屏
+            w_h_rate = redis_helper.get_score_with_value(
+                key_name=config_.W_H_RATE_UP_1_VIDEO_LIST_KEY_NAME['rov_recall'], value=video['videoId'])
+            if w_h_rate is not None:
+                top_2_w_h_rate_flag[i] = True
+        elif video['pushFrom'] == config_.PUSH_FROM['bottom_last']:
+            # 视频来源为 二层兜底时,判断是否是横屏
+            w_h_rate = redis_helper.get_score_with_value(
+                key_name=config_.W_H_RATE_UP_1_VIDEO_LIST_KEY_NAME['bottom_last'], value=video['videoId'])
+            if w_h_rate is not None:
+                top_2_w_h_rate_flag[i] = True
+    if top_2_w_h_rate_flag[0] and top_2_w_h_rate_flag[1]:
+        return videos
+
+    # ##### 前两个视频中有不符合前面两者条件的,对视频进行位置调整
+    # 记录横屏视频位置
+    horizontal_video_index = []
+    # 记录流量池视频位置
+    flow_video_index = []
+    # 记录置顶视频位置
+    top_video_index = []
+    for i, video in enumerate(videos):
+        # 视频来源为置顶
+        if video['pushFrom'] == config_.PUSH_FROM['top']:
+            top_video_index.append(i)
+        # 视频来源为流量池
+        elif video['pushFrom'] == config_.PUSH_FROM['flow_recall']:
+            flow_video_index.append(i)
+        # 视频来源为rov召回池 或 一层兜底
+        elif video['pushFrom'] in [config_.PUSH_FROM['rov_recall'], config_.PUSH_FROM['bottom']]:
+            w_h_rate = redis_helper.get_score_with_value(
+                key_name=config_.W_H_RATE_UP_1_VIDEO_LIST_KEY_NAME['rov_recall'], value=video['videoId'])
+            if w_h_rate is not None:
+                horizontal_video_index.append(i)
+            else:
+                continue
+        # 视频来源为 二层兜底
+        elif video['pushFrom'] == config_.PUSH_FROM['bottom_last']:
+            w_h_rate = redis_helper.get_score_with_value(
+                key_name=config_.W_H_RATE_UP_1_VIDEO_LIST_KEY_NAME['bottom_last'], value=video['videoId'])
+            if w_h_rate is not None:
+                horizontal_video_index.append(i)
+            else:
+                continue
+    # 重新排序
+    top2_index = []
+    for i in range(2):
+        if i in top_video_index:
+            top2_index.append(i)
+        elif i in flow_video_index:
+            top2_index.append(i)
+            flow_video_index.remove(i)
+        elif i in horizontal_video_index:
+            top2_index.append(i)
+            horizontal_video_index.remove(i)
+        elif len(horizontal_video_index) > 0:
+            # 调整横屏视频到第一位
+            top2_index.append(horizontal_video_index[0])
+            # 从横屏位置记录中移除
+            horizontal_video_index.pop(0)
+        elif i == 1:
+            return videos
+        elif i == 2:
+            top2_index.append(i)
+    # 重排
+    flow_result = [videos[i] for i in flow_video_index]
+    other_result = [videos[i] for i in range(len(videos)) if i not in top2_index and i not in flow_video_index]
+
+    top2_result = []
+    for i, j in enumerate(top2_index):
+        item = videos[j]
+        if i != j:
+            # 修改abCode
+            item['abCode'] = config_.AB_CODE['w_h_rate']
+        top2_result.append(item)
+
+    new_rank_result = top2_result
+    for i in range(2, len(videos)):
+        if i in flow_video_index:
+            new_rank_result.append(flow_result[0])
+            flow_result.pop(0)
+        else:
+            new_rank_result.append(other_result[0])
+            other_result.pop(0)
+    return new_rank_result
+
+
 if __name__ == '__main__':
-    d_test = [[{'videoId': 3674236, 'rovScore': 99.24105262298141, 'pushFrom': 'recall_pool', 'abCode': 10000}, {'videoId': 1915009, 'rovScore': 99.248872388032, 'pushFrom': 'recall_pool', 'abCode': 10000}, {'videoId': 9033859, 'rovScore': 99.21956695197761, 'pushFrom': 'recall_pool', 'abCode': 10000}, {'videoId': 4258137, 'rovScore': 99.24737622823497, 'pushFrom': 'recall_pool', 'abCode': 10000}, {'videoId': 9034962, 'rovScore': 99.18993382219318, 'pushFrom': 'recall_pool', 'abCode': 10000}, {'videoId': 1922051, 'rovScore': 99.2351969813565, 'pushFrom': 'recall_pool', 'abCode': 10000}, {'videoId': 7829308, 'rovScore': 99.25465474490638, 'pushFrom': 'recall_pool', 'abCode': 10000}, {'videoId': 3247671, 'rovScore': 99.24601245746983, 'pushFrom': 'recall_pool', 'abCode': 10000}, {'videoId': 5831941, 'rovScore': 99.16776814766304, 'pushFrom': 'recall_pool', 'abCode': 10000}, {'videoId': 7837973, 'rovScore': 99.253749334822, 'pushFrom': 'recall_pool', 'abCode': 10000}], [{'videoId': 9035245, 'flowPool': '1#1#1#1636085384424', 'rovScore': 1.0, 'pushFrom': 'flow_pool', 'abCode': 10000}, {'videoId': 9034828, 'flowPool': '1#1#1#1636090368461', 'rovScore': 1.0, 'pushFrom': 'flow_pool', 'abCode': 10000}, {'videoId': 9035244, 'flowPool': '1#1#1#1636085467105', 'rovScore': 1.0, 'pushFrom': 'flow_pool', 'abCode': 10000}, {'videoId': 9035237, 'flowPool': '1#1#1#1636086478074', 'rovScore': 1.0, 'pushFrom': 'flow_pool', 'abCode': 10000}]]
-    data = {
-        'rov_pool_recall': d_test[0],
-        'flow_pool_recall': d_test[1]
-    }
-    res = video_rank(data, size=10)
-    for item in res:
-        print(item)
+    d_test = [{'videoId': 1, 'rovScore': 10, 'pushFrom': 'op_manual', 'abCode': 10000},
+              {'videoId': 1919925, 'rovScore': 9, 'pushFrom': 'recall_pool', 'abCode': 10000},
+              {'videoId': 3, 'rovScore': 8, 'pushFrom': 'recall_pool', 'abCode': 10000},
+              {'videoId': 4, 'rovScore': 7, 'pushFrom': 'flow_pool', 'abCode': 10000},
+              {'videoId': 5, 'rovScore': 6, 'pushFrom': 'flow_pool', 'abCode': 10000},
+              {'videoId': 6, 'rovScore': 5, 'pushFrom': 'recall_pool', 'abCode': 10000},
+              {'videoId': 3674236, 'rovScore': 4, 'pushFrom': 'recall_pool', 'abCode': 10000},
+              {'videoId': 3247671, 'rovScore': 3, 'pushFrom': 'recall_pool', 'abCode': 10000},
+              {'videoId': 7837973, 'rovScore': 2, 'pushFrom': 'recall_pool', 'abCode': 10000}]
+    res = video_rank_by_w_h_rate(videos=d_test)
+    for tmp in res:
+        print(tmp)

+ 5 - 4
video_recall.py

@@ -99,7 +99,7 @@ class PoolRecall(object):
             if filtered_result:
                 # 添加视频源参数 pushFrom, abCode
                 temp_result = [{'videoId': int(item), 'rovScore': video_score[int(item)],
-                                'pushFrom': 'recall_pool', 'abCode': self.ab_code}
+                                'pushFrom': config_.PUSH_FROM['rov_recall'], 'abCode': self.ab_code}
                                for item in filtered_result if video_score.get(int(item)) is not None]
                 rov_pool_recall_result.extend(temp_result)
             else:
@@ -175,7 +175,8 @@ class PoolRecall(object):
                         # 添加视频源参数 pushFrom, abCode
                         flow_pool_recall_result.append(
                             {'videoId': video_id, 'flowPool': flow_pool,
-                             'rovScore': video_score[video_id], 'pushFrom': 'flow_pool', 'abCode': self.ab_code}
+                             'rovScore': video_score[video_id], 'pushFrom': config_.PUSH_FROM['flow_recall'],
+                             'abCode': self.ab_code}
                         )
                         # flow_pool_recall_result.append(
                         #     {'videoId': item[0], 'flowPool': item[1], 'distributeCount': item[2],
@@ -365,7 +366,7 @@ class PoolRecall(object):
                     continue
                 update_rov_video_ids.append(video_id)
                 update_rov_result.append({'videoId': video_id, 'rovScore': rov_score,
-                                          'pushFrom': 'recall_pool', 'abCode': self.ab_code})
+                                          'pushFrom': config_.PUSH_FROM['rov_recall'], 'abCode': self.ab_code})
             return update_rov_video_ids, update_rov_result
 
         except Exception as e:
@@ -427,7 +428,7 @@ class PoolRecall(object):
                     continue
                 top_video_ids.append(video_id)
                 top_video_result.append({'videoId': video_id, 'rovScore': item_info.get('score'),
-                                         'pushFrom': 'op_manual', 'abCode': self.ab_code})
+                                         'pushFrom': config_.PUSH_FROM['top'], 'abCode': self.ab_code})
             return top_video_ids, top_video_result
 
         except Exception as e: