فهرست منبع

opt rule rank by h

liqian 3 سال پیش
والد
کامیت
0d77e9eb4d
3فایلهای تغییر یافته به همراه22 افزوده شده و 6 حذف شده
  1. 4 2
      config.py
  2. 14 3
      rule_rank_h.py
  3. 4 1
      videos_filter.py

+ 4 - 2
config.py

@@ -72,10 +72,12 @@ class BaseConfig(object):
 
     # 小程序离线ROV模型结果存放 redis key前缀,完整格式:com.weiqu.video.recall.hot.item.score.{date}
     RECALL_KEY_NAME_PREFIX = 'com.weiqu.video.recall.hot.item.score.'
-    # 小程序小时级更新结果存放 redis key前缀,完整格式:com.weiqu.video.recall.hot.item.score.h.{date}.{h}
-    RECALL_KEY_NAME_PREFIX_BY_H = 'com.weiqu.video.recall.hot.item.score.h.'
+    # 小程序小时级更新结果存放 redis key前缀,完整格式:com.weiqu.video.recall.item.score.h.{date}.{h}
+    RECALL_KEY_NAME_PREFIX_BY_H = 'com.weiqu.video.recall.item.score.h.'
     # 小程序离线ROV模型结果与小程序小时级更新结果去重后 存放 redis key前缀,完整格式:com.weiqu.video.recall.hot.item.score.dup.h.{date}.{h}
     RECALL_KEY_NAME_PREFIX_DUP_H = 'com.weiqu.video.recall.hot.item.score.dup.h.'
+    # 小时级视频状态不符合推荐要求的列表 redis key,完整格式:com.weiqu.video.filter.h.item
+    H_VIDEO_FILER = 'com.weiqu.video.filter.h.item'
 
     # app应用 小程序离线ROV模型结果存放 redis key前缀,完整格式:com.weiqu.video.recall.hot.item.score.app.{date}
     RECALL_KEY_NAME_PREFIX_APP = 'com.weiqu.video.recall.hot.item.score.app.'

+ 14 - 3
rule_rank_h.py

@@ -116,17 +116,20 @@ def video_rank(df, now_date, now_h):
     h_video_ids =[]
     h_recall_result = {}
     for video_id in h_recall_videos:
-        score = h_recall_df[h_recall_df['videoid'] == video_id]
+        score = h_recall_df[h_recall_df['videoid'] == video_id]['score']
         h_recall_result[int(video_id)] = float(score)
         h_video_ids.append(int(video_id))
     h_recall_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_H}{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
     redis_helper.add_data_with_zset(key_name=h_recall_key_name, data=h_recall_result, expire_time=24 * 3600)
+    # 清空线上过滤应用列表
+    redis_helper.del_keys(key_name=config_.H_VIDEO_FILER)
 
     # 去重更新rov模型结果,并另存为redis中
     initial_data_dup = {}
     for video_id, score in initial_data:
         if int(video_id) not in h_video_ids:
             initial_data_dup[int(video_id)] = score
+    log_.info(f"initial data dup count = {len(initial_data_dup)}")
     initial_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP_H}{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
     redis_helper.add_data_with_zset(key_name=initial_key_name, data=initial_data_dup, expire_time=24 * 3600)
 
@@ -161,10 +164,16 @@ def rank_by_h(now_date, now_h):
 
 
 def h_rank_bottom(now_date, now_h):
-    """未按时更新数据,用rov模型结果作为当前小时的数据"""
+    """未按时更新数据,用上一小时结果作为当前小时的数据"""
     # 获取rov模型结果
     redis_helper = RedisHelper()
-    key_name = get_rov_redis_key(now_date=now_date)
+    if now_h == 0:
+        redis_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
+        redis_h = 23
+    else:
+        redis_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
+        redis_h = now_h - 1
+    key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_H}{redis_dt}.{redis_h}"
     initial_data = redis_helper.get_data_zset_with_index(key_name=key_name, start=0, end=-1, with_scores=True)
     final_data = dict()
     for video_id, score in initial_data:
@@ -172,6 +181,8 @@ def h_rank_bottom(now_date, now_h):
     # 存入对应的redis
     final_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_H}{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
     redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=24 * 3600)
+    # 清空线上过滤应用列表
+    redis_helper.del_keys(key_name=config_.H_VIDEO_FILER)
 
 
 def h_timer_check():

+ 4 - 1
videos_filter.py

@@ -376,7 +376,7 @@ def filter_rov_h():
     log_.info(f'now_date = {now_date}, now_h = {now_h}.')
     # 需过滤两个视频列表
     key_prefix_list = [config_.RECALL_KEY_NAME_PREFIX_BY_H, config_.RECALL_KEY_NAME_PREFIX_DUP_H]
-    for key_prefix in key_prefix_list:
+    for i, key_prefix in enumerate(key_prefix_list):
         # 拼接key
         key_name = f"{key_prefix}{now_date}.{now_h}"
         log_.info(f"key_name: {key_name}")
@@ -398,6 +398,9 @@ def filter_rov_h():
             log_.info("filter end!")
             continue
         redis_helper.remove_value_from_zset(key_name=key_name, value=list(filter_videos))
+        if i == 0:
+            # 将小时级的数据需要过滤的视频加入到线上过滤应用列表中
+            redis_helper.add_data_with_set(key_name=config_.H_VIDEO_FILER, values=filter_videos, expire_time=2*3600)
     log_.info("rov_h pool filter end!")