Explorar el Código

add abtest: 105

liqian hace 2 años
padre
commit
95f8a65774
Se han modificado 5 ficheros con 98 adiciones y 4 borrados
  1. 29 3
      config.py
  2. 23 0
      recommend.py
  3. 4 0
      user2new.py
  4. 16 1
      video_rank.py
  5. 26 0
      video_recall.py

+ 29 - 3
config.py

@@ -64,6 +64,7 @@ class BaseConfig(object):
         'region_rule_rank4_appType_6_data2': '098',
         'region_rule_rank4_appType_6_data3': '099',
         'region_rule_rank4_appType_18_data2': '100',
+        'region_rule_rank6_appType_0_data1': '105',
     }
 
     # abTest
@@ -116,6 +117,7 @@ class BaseConfig(object):
             'region_rule_rank4_appType_6_data2': 60013,
             'region_rule_rank4_appType_6_data3': 60014,
             'region_rule_rank4_appType_18_data2': 60015,
+            'region_rule_rank6_appType_0_data1': 60016,
         },  # 地域分组小时级规则实验
 
         'rank_by_24h': {
@@ -158,6 +160,7 @@ class BaseConfig(object):
         'region_rule_rank4_appType_6_data2': {'rule_key': 'rule3', 'data_key': 'data2'},
         'region_rule_rank4_appType_6_data3': {'rule_key': 'rule3', 'data_key': 'data3'},
         'region_rule_rank4_appType_18_data2': {'rule_key': 'rule3', 'data_key': 'data2'},
+        'region_rule_rank6_appType_0_data1': {'rule_key': 'rule5', 'data_key': 'data1'},
     }
 
     # 小程序地域分组小时级列表key不同实验标识
@@ -183,13 +186,15 @@ class BaseConfig(object):
         'position_insert': 'position_insert',  # 按位置插入
         'relevant_video_op': 'relevant_video_op',  # 相关推荐强插
         'rov_recall_h': 'recall_pool_h',  # 小时级更新列表
-        'rov_recall_24h': 'recall_pool_24h',  # 小时级更新列表
+        'rov_recall_24h': 'recall_pool_24h',  # 小时级更新相对24h列表
         'rov_recall_day': 'recall_pool_day',  # 天级规则更新列表
         'old_video': 'old_video_recall',  # 老视频
         'rov_recall_region_h': 'recall_pool_region_h',  # 地域分组小时级更新列表
         'rov_recall_region_day': 'recall_pool_region_day',  # 地域分组天级更新列表
         'rov_recall_region_24h': 'recall_pool_region_24h',  # 地域分组小时级更新24h列表
         'rov_recall_24h_dup': 'rov_recall_24h_dup',  # 小时级更新24h筛选后剩余数据列表
+        'rov_recall_48h': 'recall_pool_48h',  # 小时级更新相对48h列表
+        'rov_recall_48h_dup': 'rov_recall_48h_dup',  # 小时级更新48h筛选后剩余数据列表
         'top_video_relevant_appType_19': 'relevant_video',  # 相似视频
         'whole_movies': 'whole_movies',  # 完整影视
         'talk_videos': 'talk_videos',  # 影视解说
@@ -254,6 +259,16 @@ class BaseConfig(object):
     # 记录 mid-上一次在dup3 24h更新列表中对应的位置key 中数据所属(date,h),完整格式:recall:last:record:region:dup2:24h:{appType}:{mid}
     RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_DUP3_24H = 'recall:last:record:region:dup3:24h:'
 
+    # 用户上一次在dup2 48h更新列表中对应的位置 redis key前缀,完整key格式:recall:last:item:region:dup2:48h:{appType}:{mid}
+    LAST_VIDEO_FROM_REGION_DUP2_48H_PREFIX = 'recall:last:item:region:dup2:48h:'
+    # 记录 mid-上一次在dup2 48h更新列表中对应的位置key 中数据所属(date,h),完整格式:recall:last:record:region:dup2:48h:{appType}:{mid}
+    RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_DUP2_48H = 'recall:last:record:region:dup2:48h:'
+
+    # 用户上一次在dup3 48h更新列表中对应的位置 redis key前缀,完整key格式:recall:last:item:region:dup3:48h:{appType}:{mid}
+    LAST_VIDEO_FROM_REGION_DUP3_48H_PREFIX = 'recall:last:item:region:dup3:48h:'
+    # 记录 mid-上一次在dup3 48h更新列表中对应的位置key 中数据所属(date,h),完整格式:recall:last:record:region:dup2:48h:{appType}:{mid}
+    RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_DUP3_48H = 'recall:last:record:region:dup3:48h:'
+
     # # 小时级视频状态不符合推荐要求的列表 redis key,完整格式:com.weiqu.video.filter.h.item.{rule_key}
     # H_VIDEO_FILER = 'com.weiqu.video.filter.h.item.'
 
@@ -297,6 +312,12 @@ class BaseConfig(object):
     # 小程序小时级24h数据 筛选后的剩余数据 更新结果 与 小程序24h更新结果/小程序地域分组24h更新结果/小程序地域分组小时级更新结果 去重后 存放 redis key前缀,
     # 完整格式:recall:item:score:apptype:region:dup3:24h:{region}:{appType}:{data_key}:{rule_key}:{date}:{h}
     RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H = 'recall:item:score:apptype:region:dup3:24h:'
+    # 小程序48h更新结果与 小程序地域分组24h更新结果/小程序地域分组小时级更新结果 去重后 存放 redis key前缀,
+    # 完整格式:recall:item:score:apptype:region:dup2:48h:{region}:{appType}:{data_key}:{rule_key}:{date}:{h}
+    RECALL_KEY_NAME_PREFIX_DUP2_REGION_48H_H = 'recall:item:score:apptype:region:dup2:48h:'
+    # 小程序小时级48h数据 筛选后的剩余数据 更新结果 与 小程序48h更新结果/小程序地域分组24h更新结果/小程序地域分组小时级更新结果 去重后 存放 redis key前缀,
+    # 完整格式:recall:item:score:apptype:region:dup3:48h:{region}:{appType}:{data_key}:{rule_key}:{date}:{h}
+    RECALL_KEY_NAME_PREFIX_DUP3_REGION_48H_H = 'recall:item:score:apptype:region:dup3:48h:'
     # 小程序离线ROV模型结果与 小程序天级更新结果/小程序地域分组天级更新结果/小程序地域分组小时级更新结果 去重后 存放 redis key前缀,
     # 完整格式:recall:item:score:apptype:region:dup4:rov:{region}:{appType}:{data_key}:{rule_key}:{date}:{h}
     RECALL_KEY_NAME_PREFIX_DUP_REGION_H = 'recall:item:score:apptype:region:dup4:rov:'
@@ -435,14 +456,18 @@ class BaseConfig(object):
                 {"dataListCode": 2, "keyPrefix": RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H},
                 {"dataListCode": 3, "keyPrefix": RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H},
                 {"dataListCode": 4, "keyPrefix": RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H},
-                {"dataListCode": 5, "keyPrefix": RECALL_KEY_NAME_PREFIX_DUP_REGION_H},
+                {"dataListCode": 5, "keyPrefix": RECALL_KEY_NAME_PREFIX_DUP2_REGION_48H_H},
+                {"dataListCode": 6, "keyPrefix": RECALL_KEY_NAME_PREFIX_DUP3_REGION_48H_H},
+                {"dataListCode": 7, "keyPrefix": RECALL_KEY_NAME_PREFIX_DUP_REGION_H},
             ]
         },
         1: {"dataListDesc": "地域小时级", "dataListCode": 1, "keyPrefix": RECALL_KEY_NAME_PREFIX_REGION_BY_H},
         2: {"dataListDesc": "地域相对24小时级", "dataListCode": 2, "keyPrefix": RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H},
         3: {"dataListDesc": "非地域相对24小时级", "dataListCode": 3, "keyPrefix": RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H},
         4: {"dataListDesc": "非地域相对24小时级列表2", "dataListCode": 4, "keyPrefix": RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H},
-        5: {"dataListDesc": "大列表", "dataListCode": 5, "keyPrefix": RECALL_KEY_NAME_PREFIX_DUP_REGION_H},
+        5: {"dataListDesc": "非地域相对48小时级", "dataListCode": 5, "keyPrefix": RECALL_KEY_NAME_PREFIX_DUP2_REGION_48H_H},
+        6: {"dataListDesc": "非地域相对48小时级列表2", "dataListCode": 6, "keyPrefix": RECALL_KEY_NAME_PREFIX_DUP3_REGION_48H_H},
+        7: {"dataListDesc": "大列表", "dataListCode": 7, "keyPrefix": RECALL_KEY_NAME_PREFIX_DUP_REGION_H},
     }
     VIDEO_DATA_LIST_AB_EXP_CODE_MAPPING = {
         # "055": {"app_type": 0, "data_key": "data1", "rule_key": "rule2"},
@@ -464,6 +489,7 @@ class BaseConfig(object):
         "098": {"app_type": 6, "data_key": "data2", "rule_key": "rule3"},
         "099": {"app_type": 6, "data_key": "data3", "rule_key": "rule3"},
         "100": {"app_type": 18, "data_key": "data2", "rule_key": "rule3"},
+        "105": {"app_type": 0, "data_key": "data1", "rule_key": "rule5"},
     }
     REGION_CODE = {
         '北京': '110000', '天津': '120000', '河北省': '130000', '山西省': '140000', '内蒙古': '150000',

+ 23 - 0
recommend.py

@@ -423,6 +423,22 @@ def update_redis_data(result, app_type, mid, last_rov_recall_key, top_K, expire_
                 redis_helper.set_data_to_redis(key_name=last_video_key, value=rov_recall_24h_dup3_video[-1],
                                                expire_time=expire_time)
 
+            # 将此次获取的 相对48h筛选数据列表 中的视频id同步刷新到redis中,方便下次快速定位到召回位置
+            rov_recall_48h_dup2_video = [item['videoId'] for item in result[:top_K]
+                                         if item['pushFrom'] == config_.PUSH_FROM['rov_recall_48h']]
+            if len(rov_recall_48h_dup2_video) > 0:
+                last_video_key = f'{config_.LAST_VIDEO_FROM_REGION_DUP2_48H_PREFIX}{app_type}:{mid}'
+                redis_helper.set_data_to_redis(key_name=last_video_key, value=rov_recall_48h_dup2_video[-1],
+                                               expire_time=expire_time)
+
+            # 将此次获取的 相对48h筛选后剩余数据列表 中的视频id同步刷新到redis中,方便下次快速定位到召回位置
+            rov_recall_48h_dup3_video = [item['videoId'] for item in result[:top_K]
+                                         if item['pushFrom'] == config_.PUSH_FROM['rov_recall_48h_dup']]
+            if len(rov_recall_48h_dup3_video) > 0:
+                last_video_key = f'{config_.LAST_VIDEO_FROM_REGION_DUP3_48H_PREFIX}{app_type}:{mid}'
+                redis_helper.set_data_to_redis(key_name=last_video_key, value=rov_recall_48h_dup3_video[-1],
+                                               expire_time=expire_time)
+
         # 将此次分发的流量池视频,对 本地分发数-1 进行记录
         if app_type not in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
             flow_recall_video = [item for item in result if item['pushFrom'] == config_.PUSH_FROM['flow_recall']]
@@ -715,6 +731,13 @@ def get_recommend_params(recommend_type, ab_exp_info, ab_info_data, page_type=0)
             data_key = config_.RULE_KEY_REGION['region_rule_rank4_appType_18_data2'].get('data_key')
             no_op_flag = True
 
+        elif config_.AB_EXP_CODE['region_rule_rank6_appType_0_data1'] in ab_exp_code_list:
+            ab_code = config_.AB_CODE['region_rank_by_h'].get('region_rule_rank6_appType_0_data1')
+            expire_time = 3600
+            rule_key = config_.RULE_KEY_REGION['region_rule_rank6_appType_0_data1'].get('rule_key')
+            data_key = config_.RULE_KEY_REGION['region_rule_rank6_appType_0_data1'].get('data_key')
+            no_op_flag = True
+
         else:
             ab_code = config_.AB_CODE['initial']
             expire_time = 24 * 3600

+ 4 - 0
user2new.py

@@ -35,6 +35,10 @@ def user2new(app_type, mid, uid):
             # config_.H_WITH_MID_RECORD_KEY_NAME_PREFIX_24H,
             # config_.H_WITH_MID_RECALL_KEY_NAME_PREFIX_REGION_24H,
             # config_.H_WITH_MID_RECORD_KEY_NAME_PREFIX_REGION_24H,
+            config_.LAST_VIDEO_FROM_REGION_DUP3_48H_PREFIX,
+            config_.RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_DUP3_48H,
+            config_.LAST_VIDEO_FROM_REGION_DUP2_48H_PREFIX,
+            config_.RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_DUP2_48H,
             config_.LAST_VIDEO_FROM_REGION_DUP3_24H_PREFIX,
             config_.RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_DUP3_24H,
             config_.LAST_VIDEO_FROM_REGION_DUP2_24H_PREFIX,

+ 16 - 1
video_rank.py

@@ -52,6 +52,7 @@ def video_rank(data, size, top_K, flow_pool_P):
     region_day_recall = [item for item in data['rov_pool_recall']
                          if item.get('pushFrom') == config_.PUSH_FROM['rov_recall_region_day']]
     region_day_recall_rank = sorted(region_day_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
+
     # 相对24h规则更新数据
     rule_24h_recall = [item for item in data['rov_pool_recall']
                        if item.get('pushFrom') == config_.PUSH_FROM['rov_recall_24h']]
@@ -60,6 +61,16 @@ def video_rank(data, size, top_K, flow_pool_P):
     rule_24h_dup_recall = [item for item in data['rov_pool_recall']
                            if item.get('pushFrom') == config_.PUSH_FROM['rov_recall_24h_dup']]
     rule_24h_dup_recall_rank = sorted(rule_24h_dup_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
+
+    # 相对48h规则更新数据
+    rule_48h_recall = [item for item in data['rov_pool_recall']
+                       if item.get('pushFrom') == config_.PUSH_FROM['rov_recall_48h']]
+    rule_48h_recall_rank = sorted(rule_48h_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
+    # 相对48h规则筛选后剩余更新数据
+    rule_48h_dup_recall = [item for item in data['rov_pool_recall']
+                           if item.get('pushFrom') == config_.PUSH_FROM['rov_recall_48h_dup']]
+    rule_48h_dup_recall_rank = sorted(rule_48h_dup_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
+
     # 天级规则更新数据
     day_recall = [item for item in data['rov_pool_recall'] if item.get('pushFrom') == config_.PUSH_FROM['rov_recall_day']]
     day_recall_rank = sorted(day_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
@@ -74,6 +85,8 @@ def video_rank(data, size, top_K, flow_pool_P):
             config_.PUSH_FROM['rov_recall_region_day'],
             config_.PUSH_FROM['rov_recall_24h'],
             config_.PUSH_FROM['rov_recall_24h_dup'],
+            config_.PUSH_FROM['rov_recall_48h'],
+            config_.PUSH_FROM['rov_recall_48h_dup'],
             config_.PUSH_FROM['rov_recall_day'],
             config_.PUSH_FROM['whole_movies'],
             config_.PUSH_FROM['talk_videos']]
@@ -81,7 +94,9 @@ def video_rank(data, size, top_K, flow_pool_P):
     rov_initial_recall_rank = sorted(rov_initial_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
     rov_recall_rank = whole_movies_recall_rank + talk_videos_recall_rank + h_recall_rank + \
                       region_h_recall_rank + region_24h_recall_rank + region_day_recall_rank + \
-                      rule_24h_recall_rank + rule_24h_dup_recall_rank + day_recall_rank + rov_initial_recall_rank
+                      rule_24h_recall_rank + rule_24h_dup_recall_rank + \
+                      rule_48h_recall_rank + rule_48h_dup_recall_rank + \
+                      day_recall_rank + rov_initial_recall_rank
     # 流量池
     flow_recall_rank = sorted(data['flow_pool_recall'], key=lambda k: k.get('rovScore', 0), reverse=True)
     # 对各路召回的视频进行去重

+ 26 - 0
video_recall.py

@@ -1205,6 +1205,17 @@ class PoolRecall(object):
                      gevent.spawn(self.recall_region_dup_24h, province_code, size, 'region_24h', expire_time),
                      gevent.spawn(self.recall_region_dup_24h, province_code, size, '24h_dup2', expire_time),
                      gevent.spawn(self.recall_region_dup_24h, province_code, size, '24h_dup3', expire_time)]
+        elif self.ab_code == config_.AB_CODE['region_rank_by_h'].get('region_rule_rank6_appType_0_data1'):
+            if province_code == '-1':
+                t = [
+                     gevent.spawn(self.recall_region_dup_24h, province_code, size, '48h_dup2', expire_time),
+                     gevent.spawn(self.recall_region_dup_24h, province_code, size, '48h_dup3', expire_time)]
+            else:
+                t = [
+                     gevent.spawn(self.recall_region_dup_24h, province_code, size, 'region_h', expire_time),
+                     gevent.spawn(self.recall_region_dup_24h, province_code, size, 'region_24h', expire_time),
+                     gevent.spawn(self.recall_region_dup_24h, province_code, size, '48h_dup2', expire_time),
+                     gevent.spawn(self.recall_region_dup_24h, province_code, size, '48h_dup3', expire_time)]
         else:
             if province_code == '-1':
                 # t = [gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size, 'day_24h')]
@@ -1735,6 +1746,21 @@ class PoolRecall(object):
             pool_key_prefix = config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H
             last_video_key_prefix = config_.LAST_VIDEO_FROM_REGION_DUP3_24H_PREFIX
             push_from = config_.PUSH_FROM['rov_recall_24h_dup']
+        elif key_flag == '48h_dup2':
+            # 不分地域相对48h的筛选结果
+            # mid对应上一次视频位置 时间记录
+            record_key_prefix = config_.RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_DUP2_48H
+            # 视频列表
+            pool_key_prefix = config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_48H_H
+            # mid对应上一次视频记录
+            last_video_key_prefix = config_.LAST_VIDEO_FROM_REGION_DUP2_48H_PREFIX
+            push_from = config_.PUSH_FROM['rov_recall_48h']
+        elif key_flag == '48h_dup3':
+            # 不分地域相对48h的筛选后剩余数据
+            record_key_prefix = config_.RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_DUP3_48H
+            pool_key_prefix = config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_48H_H
+            last_video_key_prefix = config_.LAST_VIDEO_FROM_REGION_DUP3_48H_PREFIX
+            push_from = config_.PUSH_FROM['rov_recall_48h_dup']
         else:
             return []
         # 获取相关redis key, 用户上一次在rov召回池对应的位置