Browse Source

update recall_region_dup_24h

liqian 2 years ago
parent
commit
32d95269b3
3 changed files with 60 additions and 18 deletions
  1. 6 1
      config.py
  2. 12 4
      recommend.py
  3. 42 13
      video_recall.py

+ 6 - 1
config.py

@@ -234,10 +234,15 @@ class BaseConfig(object):
     # 记录 mid-小时级key 中数据所属(date,h),完整格式:com.weiqu.video.h.region.24h.record.mid.{appType}.{mid}
     H_WITH_MID_RECORD_KEY_NAME_PREFIX_REGION_24H = 'com.weiqu.video.h.region.24h.record.mid.'
 
+    # 用户上一次在dup2 24h更新列表中对应的位置 redis key前缀,完整key格式:com.weiqu.video.region.dup2.24h.last.item.{appType}.{mid}
+    LAST_VIDEO_FROM_REGION_DUP2_24H_PREFIX = 'com.weiqu.video.region.dup2.24h.last.item.'
+    # 记录 mid-上一次在dup2 24h更新列表中对应的位置key 中数据所属(date,h),完整格式:com.weiqu.video.h.region.dup2.24h.last.record.mid.{appType}.{mid}
+    RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_DUP2_24H = 'com.weiqu.video.h.region.dup2.24h.last.record.mid.'
+
     # 用户上一次在dup3 24h更新列表中对应的位置 redis key前缀,完整key格式:com.weiqu.video.region.dup3.24h.last.item.{appType}.{mid}
     LAST_VIDEO_FROM_REGION_DUP3_24H_PREFIX = 'com.weiqu.video.region.dup3.24h.last.item.'
     # 记录 mid-上一次在dup3 24h更新列表中对应的位置key 中数据所属(date,h),完整格式:com.weiqu.video.h.region.24h.last.record.mid.{appType}.{mid}
-    RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_DUP3_24H = 'com.weiqu.video.h.region.24h.last.record.mid.'
+    RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_DUP3_24H = 'com.weiqu.video.h.region.dup2.24h.last.record.mid.'
 
     # 小时级视频状态不符合推荐要求的列表 redis key,完整格式:com.weiqu.video.filter.h.item.{rule_key}
     H_VIDEO_FILER = 'com.weiqu.video.filter.h.item.'

+ 12 - 4
recommend.py

@@ -391,12 +391,20 @@ def update_redis_data(result, app_type, mid, last_rov_recall_key, top_K, expire_
                                                    expire_time=expire_time)
                 # log_.info('last video redis update success!')
 
+            # 将此次获取的 相对24h筛选数据列表 中的视频id同步刷新到redis中,方便下次快速定位到召回位置
+            rov_recall_24h_dup2_video = [item['videoId'] for item in result[:top_K]
+                                         if item['pushFrom'] == config_.PUSH_FROM['rov_recall_24h']]
+            if len(rov_recall_24h_dup2_video) > 0:
+                last_video_key = f'{config_.LAST_VIDEO_FROM_REGION_DUP2_24H_PREFIX}{app_type}.{mid}'
+                redis_helper.set_data_to_redis(key_name=last_video_key, value=rov_recall_24h_dup2_video[-1],
+                                               expire_time=expire_time)
+
             # 将此次获取的 相对24h筛选后剩余数据列表 中的视频id同步刷新到redis中,方便下次快速定位到召回位置
-            rov_recall_24h_dup_video = [item['videoId'] for item in result[:top_K]
-                                        if item['pushFrom'] == config_.PUSH_FROM['rov_recall_24h_dup']]
-            if len(rov_recall_24h_dup_video) > 0:
+            rov_recall_24h_dup3_video = [item['videoId'] for item in result[:top_K]
+                                         if item['pushFrom'] == config_.PUSH_FROM['rov_recall_24h_dup']]
+            if len(rov_recall_24h_dup3_video) > 0:
                 last_video_key = f'{config_.LAST_VIDEO_FROM_REGION_DUP3_24H_PREFIX}{app_type}.{mid}'
-                redis_helper.set_data_to_redis(key_name=last_video_key, value=rov_recall_24h_dup_video[-1],
+                redis_helper.set_data_to_redis(key_name=last_video_key, value=rov_recall_24h_dup3_video[-1],
                                                expire_time=expire_time)
 
         # 将此次分发的流量池视频,对 本地分发数-1 进行记录

+ 42 - 13
video_recall.py

@@ -1192,20 +1192,27 @@ class PoolRecall(object):
                      gevent.spawn(self.region_dup_recall, province_code, 2, size, expire_time)]
         elif self.ab_code == config_.AB_CODE['region_rank_by_h'].get('region_rule_rank5_appType_0_data1'):
             if province_code == '-1':
-                t = [gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size, 'day_24h'),
-                     gevent.spawn(self.recall_region_dup_24h, province_code, size)]
+                t = [
+                    # gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size, 'day_24h'),
+                     gevent.spawn(self.recall_region_dup_24h, province_code, size, '24h_dup2'),
+                     gevent.spawn(self.recall_region_dup_24h, province_code, size, '24h_dup3')]
             else:
                 t = [gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size),
                      gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size, 'region_24h'),
-                     gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size, 'day_24h'),
-                     gevent.spawn(self.recall_region_dup_24h, province_code, size)]
+                     # gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size, 'day_24h'),
+                     gevent.spawn(self.recall_region_dup_24h, province_code, size, '24h_dup2'),
+                     gevent.spawn(self.recall_region_dup_24h, province_code, size, '24h_dup3')]
         else:
             if province_code == '-1':
-                t = [gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size, 'day_24h')]
+                # t = [gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size, 'day_24h')]
+                t = [gevent.spawn(self.recall_region_dup_24h, province_code, size, '24h_dup2')]
+
             else:
                 t = [gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size),
                      gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size, 'region_24h'),
-                     gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size, 'day_24h'),
+                     # gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size, 'day_24h'),
+                     gevent.spawn(self.recall_region_dup_24h, province_code, size, '24h_dup2'),
+
                      # gevent.spawn(self.region_dup_recall, province_code, 1, size, expire_time),
                      # gevent.spawn(self.region_dup_recall, province_code, 2, size, expire_time)
                      ]
@@ -1634,10 +1641,12 @@ class PoolRecall(object):
             idx = 0
         return idx
 
-    def get_last_recommend_video_idx(self, province_code):
+    def get_last_recommend_video_idx(self, province_code, record_key_prefix, pool_key_prefix):
         # 判断mid对应上一次视频位置 时间记录
-        record_key = f"{config_.RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_DUP3_24H}{self.app_type}.{self.mid}"
-        pool_key_prefix = config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H
+        # record_key = f"{config_.RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_DUP3_24H}{self.app_type}.{self.mid}"
+        # pool_key_prefix = config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H
+        record_key = f"{record_key_prefix}{self.app_type}.{self.mid}"
+
         if not self.redis_helper.key_exists(key_name=record_key):
             # ###### 记录key不存在
             idx = 0
@@ -1674,17 +1683,36 @@ class PoolRecall(object):
 
         return pool_recall_key, idx
 
-    def recall_region_dup_24h(self, province_code, size=4, expire_time=2*3600, push_from=config_.PUSH_FROM['rov_recall_24h_dup']):
+    def recall_region_dup_24h(self, province_code, size=4, expire_time=2*3600, key_flag=''):
         """
         从小程序小时级24h数据 筛选后的剩余数据 更新结果中获取视频
         :param size: 获取视频个数
         :param expire_time: 末位视频记录redis过期时间
-        :param push_from: 视频来源标记
+        :param key_flag: 视频表标记
         :return:
         """
         start_time = time.time()
+        if key_flag == '24h_dup2':
+            # 不分地域相对24h的筛选结果
+            # mid对应上一次视频位置 时间记录
+            record_key_prefix = config_.RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_DUP2_24H
+            # 视频列表
+            pool_key_prefix = config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H
+            # mid对应上一次视频记录
+            last_video_key_prefix = config_.LAST_VIDEO_FROM_REGION_DUP2_24H_PREFIX
+            push_from = config_.PUSH_FROM['rov_recall_24h']
+        elif key_flag == '24h_dup3':
+            # 不分地域相对24h的筛选后剩余数据
+            record_key_prefix = config_.RECORD_KEY_NAME_PREFIX_LAST_VIDEO_REGION_DUP3_24H
+            pool_key_prefix = config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H
+            last_video_key_prefix = config_.LAST_VIDEO_FROM_REGION_DUP3_24H_PREFIX
+            push_from = config_.PUSH_FROM['rov_recall_24h_dup']
+        else:
+            return []
         # 获取相关redis key, 用户上一次在rov召回池对应的位置
-        pool_key, idx = self.get_last_recommend_video_idx(province_code=province_code)
+        pool_key, idx = self.get_last_recommend_video_idx(province_code=province_code,
+                                                          record_key_prefix=record_key_prefix,
+                                                          pool_key_prefix=pool_key_prefix)
         if not pool_key:
             return []
         pool_recall_result = []
@@ -1726,7 +1754,8 @@ class PoolRecall(object):
                 # 将此次获取的末位视频id同步刷新到Redis中,方便下次快速定位到召回位置,过期时间为1天
                 if self.mid:
                     # mid为空时,不做记录
-                    last_video_key = f'{config_.LAST_VIDEO_FROM_REGION_DUP3_24H_PREFIX}{self.app_type}.{self.mid}'
+                    # last_video_key = f'{config_.LAST_VIDEO_FROM_REGION_DUP3_24H_PREFIX}{self.app_type}.{self.mid}'
+                    last_video_key = f'{last_video_key_prefix}{self.app_type}.{self.mid}'
                     self.redis_helper.set_data_to_redis(key_name=last_video_key, value=data[-1][0],
                                                         expire_time=expire_time)
             idx += get_size