Browse Source

add abtest: 510

liqian 1 year ago
parent
commit
9d0c5c0fef
4 changed files with 53 additions and 14 deletions
  1. 18 3
      config.py
  2. 12 4
      recommend.py
  3. 8 2
      video_rank.py
  4. 15 5
      video_recall.py

+ 18 - 3
config.py

@@ -189,6 +189,7 @@ class BaseConfig(object):
             'abtest_503': 60090,
             'abtest_505': 60091,
             'abtest_506': 60092,
+            'abtest_510': 60094,
         },  # 地域分组小时级规则实验
 
         'rank_by_24h': {
@@ -545,6 +546,10 @@ class BaseConfig(object):
             'data_key': 'data10', 'rule_key': 'rule28', 'rank_key_prefix': 'rank:score8:',
             'ab_code': AB_CODE['region_rank_by_h'].get('abtest_506')
         },  # simrecall+融合+backrate指数加权_2&其他降权_0.5 + rank_score8
+        '510': {
+            'data_key': 'data10', 'rule_key': 'rule30',
+            'ab_code': AB_CODE['region_rank_by_h'].get('abtest_510')
+        },  # simrecall+融合+增加不区分地域小时级列表
 
     }
 
@@ -611,6 +616,7 @@ class BaseConfig(object):
         'old_video': 'old_video_recall',  # 老视频
         'rov_recall_region_h': 'recall_pool_region_h',  # 地域分组小时级更新列表
         'rov_recall_region_day': 'recall_pool_region_day',  # 地域分组天级更新列表
+        'rov_recall_h_h': 'rov_recall_h_h',  # 不区分地域小时级更新列表
         'rov_recall_region_24h': 'recall_pool_region_24h',  # 地域分组小时级更新24h列表
         'rov_recall_24h_dup': 'rov_recall_24h_dup',  # 小时级更新24h筛选后剩余数据列表
         'rov_recall_48h': 'recall_pool_48h',  # 小时级更新相对48h列表
@@ -680,6 +686,11 @@ class BaseConfig(object):
     # 记录 mid-上一次在 地域分组小时级 更新列表中对应的位置key 中数据所属(date,h),完整格式:recall:last:record:region:h:{appType}:{mid}
     RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_H = 'recall:last:record:region:h:'
 
+    # 用户上一次在 不分地域小时级 更新列表中对应的位置 redis key前缀,完整key格式:recall:last:item:region:dup:h:{appType}:{mid}
+    LAST_VIDEO_FROM_REGION_DUP_H_PREFIX = 'recall:last:item:region:dup:h:'
+    # 记录 mid-上一次在 不分地域小时级 更新列表中对应的位置key 中数据所属(date,h),完整格式:recall:last:record:region:dup:h:{appType}:{mid}
+    RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_DUP_H = 'recall:last:record:region:dup:h:'
+
     # 用户上一次在dup1 分地域24h更新列表中对应的位置 redis key前缀,完整key格式:recall:last:item:region:dup1:24h:{appType}:{mid}
     LAST_VIDEO_FROM_REGION_DUP1_24H_PREFIX = 'recall:last:item:region:dup1:24h:'
     # 记录 mid-上一次在dup1 分地域24h更新列表中对应的位置key 中数据所属(date,h),完整格式:recall:last:record:region:dup1:24h:{appType}:{mid}
@@ -734,11 +745,15 @@ class BaseConfig(object):
     # 完整格式:recall:item:score:region:h:{region}:{data_key}:{rule_key}:{date}:{h}
     RECALL_KEY_NAME_PREFIX_REGION_BY_H = 'recall:item:score:region:h:'
 
+    # 小程序不分地域小时级更新结果与小程序地域分组小时级更新结果去重后 存放 redis key前缀,
+    # 完整格式:recall:item:score:region:dup:h:{region}:{data_key}:{rule_key}:{date}:{h}
+    RECALL_KEY_NAME_PREFIX_DUP_H_H = 'recall:item:score:region:dup:h:'
+
     # 小程序地域分组天级更新结果与小程序地域分组小时级更新结果去重后 存放 redis key前缀,
     # 完整格式:com.weiqu.video.recall.hot.item.score.dup1.region.day.h.{region}.{rule_key}.{date}.{h}
     # RECALL_KEY_NAME_PREFIX_DUP1_REGION_DAY_H = 'com.weiqu.video.recall.hot.item.score.dup1.region.day.h.'
 
-    # 小程序地域分组小时级更新24h结果与小程序地域分组小时级更新结果去重后 存放 redis key前缀,
+    # 小程序地域分组小时级更新24h结果与 小程序地域分组小时级更新结果/小程序不分地域小时级更新结果 去重后 存放 redis key前缀,
     # 完整格式:recall:item:score:region:dup1:region24h:{region}:{data_key}:{rule_key}:{date}:{h}
     RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H = 'recall:item:score:region:dup1:region24h:'
 
@@ -746,10 +761,10 @@ class BaseConfig(object):
     # 完整格式:com.weiqu.video.recall.hot.item.score.dup2.region.day.h.{region}.{rule_key}.{date}.{h}
     # RECALL_KEY_NAME_PREFIX_DUP2_REGION_DAY_H = 'com.weiqu.video.recall.hot.item.score.dup2.region.day.h.'
 
-    # 小程序24h更新结果与 小程序地域分组24h更新结果/小程序地域分组小时级更新结果 去重后 存放 redis key前缀,
+    # 小程序24h更新结果与 小程序地域分组24h更新结果/小程序地域分组小时级更新结果/小程序不分地域小时级更新结果 去重后 存放 redis key前缀,
     # 完整格式:recall:item:score:region:dup2:24h:{region}:{data_key}:{rule_key}:{date}:{h}
     RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H = 'recall:item:score:region:dup2:24h:'
-    # 小程序小时级24h数据 筛选后的剩余数据 更新结果 与 小程序24h更新结果/小程序地域分组24h更新结果/小程序地域分组小时级更新结果 去重后 存放 redis key前缀,
+    # 小程序小时级24h数据 筛选后的剩余数据 更新结果 与 小程序24h更新结果/小程序地域分组24h更新结果/小程序地域分组小时级更新结果/小程序不分地域小时级更新结果 去重后 存放 redis key前缀,
     # 完整格式:recall:item:score:region:dup3:24h:{region}:{data_key}:{rule_key}:{date}:{h}
     RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H = 'recall:item:score:region:dup3:24h:'
     # 小程序48h更新结果与 小程序地域分组24h更新结果/小程序地域分组小时级更新结果 去重后 存放 redis key前缀,

+ 12 - 4
recommend.py

@@ -525,7 +525,7 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
     if ab_code == 60054 or ab_code == 60066 or ab_code == 60072 or ab_code == 60073 or ab_code == 60074 \
             or ab_code == 60075 or ab_code == 60076 or ab_code == 60077 or ab_code == 60078 or ab_code == 60079 \
             or ab_code == 60087 or ab_code == 60088 or ab_code == 60089 or ab_code == 60090 \
-            or ab_code == 60091 or ab_code == 60092:
+            or ab_code == 60091 or ab_code == 60092 or ab_code == 60094:
         t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
     elif ab_code == 60056 or ab_code == 60071:
         t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
@@ -550,7 +550,7 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
     if ab_code == 60054 or ab_code == 60066 or ab_code == 60072 or ab_code == 60073 or ab_code == 60074 \
             or ab_code == 60075 or ab_code == 60076 or ab_code == 60077 or ab_code == 60078 or ab_code == 60079 \
             or ab_code == 60087 or ab_code == 60088 or ab_code == 60089 or ab_code == 60090 \
-            or ab_code == 60091 or ab_code == 60092:
+            or ab_code == 60091 or ab_code == 60092 or ab_code == 60094:
         rov_pool_recall = []
         if len(recall_result_list) >= 2:
             region_recall = recall_result_list[0]
@@ -1060,6 +1060,14 @@ def update_redis_data(result, app_type, mid, top_K, expire_time=24*3600, level_w
                 redis_helper.set_data_to_redis(key_name=last_video_key, value=rov_recall_h_video[-1],
                                                expire_time=expire_time)
 
+            # 将此次获取的 不分地域小时级数据列表 中的视频id同步刷新到redis中,方便下次快速定位到召回位置
+            rov_recall_h_h_video = [item['videoId'] for item in result[:top_K]
+                                    if item['pushFrom'] == config_.PUSH_FROM['rov_recall_h_h']]
+            if len(rov_recall_h_h_video) > 0:
+                last_video_key = f'{config_.LAST_VIDEO_FROM_REGION_DUP_H_PREFIX}{app_type}:{mid}'
+                redis_helper.set_data_to_redis(key_name=last_video_key, value=rov_recall_h_h_video[-1],
+                                               expire_time=expire_time)
+
             # 将此次获取的 地域分组相对24h数据列表 中的视频id同步刷新到redis中,方便下次快速定位到召回位置
             rov_recall_24h_dup1_video = [item['videoId'] for item in result[:top_K]
                                          if item['pushFrom'] == config_.PUSH_FROM['rov_recall_region_24h']]
@@ -1997,7 +2005,7 @@ def video_homepage_recommend(request_id, mid, uid, size, app_type, algo_type,
             or ab_code == 60078 or ab_code == 60079 or ab_code == 60080 or ab_code == 60081 or ab_code == 60082\
             or ab_code == 60083 or ab_code == 60084 or ab_code == 60085 or ab_code == 60086 \
             or ab_code == 60087 or ab_code == 60088 or ab_code == 60089 or ab_code == 60090 \
-            or ab_code == 60091 or ab_code == 60092:
+            or ab_code == 60091 or ab_code == 60092 or ab_code == 60094:
         result, fea_info = video_old_recommend(request_id=request_id, mid=mid, uid=uid, app_type=app_type, size=size,
                                                top_K=top_K, flow_pool_P=flow_pool_P, algo_type='',
                                                client_info=client_info, ab_code=ab_code, expire_time=expire_time,
@@ -2122,7 +2130,7 @@ def video_relevant_recommend(request_id, video_id, mid, uid, size, app_type, ab_
             or ab_code == 60078 or ab_code == 60079 or ab_code == 60080 or ab_code == 60081 or ab_code == 60082 \
             or ab_code == 60083 or ab_code == 60084 or ab_code == 60085 or ab_code == 60086 \
             or ab_code == 60087 or ab_code == 60088 or ab_code == 60089 or ab_code == 60090 \
-            or ab_code == 60091 or ab_code == 60092:
+            or ab_code == 60091 or ab_code == 60092 or ab_code == 60094:
         result, fea_info = video_old_recommend(request_id=request_id, mid=mid, uid=uid, app_type=app_type, size=size,
                                                top_K=top_K, flow_pool_P=flow_pool_P, algo_type='',
                                                client_info=client_info, ab_code=ab_code, expire_time=expire_time,

+ 8 - 2
video_rank.py

@@ -57,8 +57,14 @@ def video_rank(data, size, top_K, flow_pool_P, flow_pool_recall_process=None):
 
     # 地域分组小时级规则更新数据
     region_h_recall = [item for item in data['rov_pool_recall']
-                         if item.get('pushFrom') == config_.PUSH_FROM['rov_recall_region_h']]
+                       if item.get('pushFrom') == config_.PUSH_FROM['rov_recall_region_h']]
     region_h_recall_rank = sorted(region_h_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
+
+    # 不分地域小时级规则更新数据
+    rule_h_recall = [item for item in data['rov_pool_recall']
+                     if item.get('pushFrom') == config_.PUSH_FROM['rov_recall_h_h']]
+    rule_h_recall_rank = sorted(rule_h_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
+
     # 地域分组小时级更新24h规则更新数据
     region_24h_recall = [item for item in data['rov_pool_recall']
                          if item.get('pushFrom') == config_.PUSH_FROM['rov_recall_region_24h']]
@@ -114,7 +120,7 @@ def video_rank(data, size, top_K, flow_pool_P, flow_pool_recall_process=None):
     #                   rule_48h_recall_rank + rule_48h_dup_recall_rank + \
     #                   day_recall_rank + rov_initial_recall_rank
     rov_recall_rank = day_30_recall_rank + \
-                      region_h_recall_rank + region_24h_recall_rank + \
+                      region_h_recall_rank + rule_h_recall_rank + region_24h_recall_rank + \
                       rule_24h_recall_rank + rule_24h_dup_recall_rank + \
                       rule_48h_recall_rank + rule_48h_dup_recall_rank
     # 流量池

+ 15 - 5
video_recall.py

@@ -2035,10 +2035,11 @@ class PoolRecall(object):
                 # gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size),
                  # gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size, 'region_24h'),
                  # gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size, 'day_24h'),
-                 gevent.spawn(self.recall_region_dup_24h, region_code, size, 'region_h', expire_time),
-                 gevent.spawn(self.recall_region_dup_24h, region_code, size, 'region_24h', expire_time),
-                 gevent.spawn(self.recall_region_dup_24h, region_code, size, '24h_dup2', expire_time),
-                 gevent.spawn(self.recall_region_dup_24h, region_code, size, '24h_dup3', expire_time)
+                gevent.spawn(self.recall_region_dup_24h, region_code, size, 'region_h', expire_time),
+                gevent.spawn(self.recall_region_dup_24h, region_code, size, 'h', expire_time),
+                gevent.spawn(self.recall_region_dup_24h, region_code, size, 'region_24h', expire_time),
+                gevent.spawn(self.recall_region_dup_24h, region_code, size, '24h_dup2', expire_time),
+                gevent.spawn(self.recall_region_dup_24h, region_code, size, '24h_dup3', expire_time)
                  #
             ]
 
@@ -2626,7 +2627,7 @@ class PoolRecall(object):
         """
         start_time = time.time()
         if key_flag == 'region_h':
-            # 分地域相对24h的筛选结果
+            # 分地域小时级的筛选结果
             # mid对应上一次视频位置 时间记录
             record_key_prefix = config_.RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_H
             # 视频列表
@@ -2634,6 +2635,15 @@ class PoolRecall(object):
             # mid对应上一次视频记录
             last_video_key_prefix = config_.LAST_VIDEO_FROM_REGION_H_PREFIX
             push_from = config_.PUSH_FROM['rov_recall_region_h']
+        elif key_flag == 'h':
+            # 不分地域小时级的筛选结果
+            # mid对应上一次视频位置 时间记录
+            record_key_prefix = config_.RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_DUP_H
+            # 视频列表
+            pool_key_prefix = config_.RECALL_KEY_NAME_PREFIX_DUP_H_H
+            # mid对应上一次视频记录
+            last_video_key_prefix = config_.LAST_VIDEO_FROM_REGION_DUP_H_PREFIX
+            push_from = config_.PUSH_FROM['rov_recall_h_h']
         elif key_flag == 'region_24h':
             # 分地域相对24h的筛选结果
             # mid对应上一次视频位置 时间记录