瀏覽代碼

update rule rank h

liqian 2 年之前
父節點
當前提交
8d62c99ccb
共有 4 個文件被更改,包括 64 次插入32 次删除
  1. 4 1
      config.py
  2. 53 28
      rule_rank_h.py
  3. 2 2
      rule_rank_h_task.sh
  4. 5 1
      videos_filter.py

+ 4 - 1
config.py

@@ -150,7 +150,10 @@ class BaseConfig(object):
     RECALL_KEY_NAME_PREFIX = 'com.weiqu.video.recall.hot.item.score.'
     # 小程序小时级更新结果存放 redis key前缀,完整格式:com.weiqu.video.recall.item.score.h.{rule_key}.{date}.{h}
     RECALL_KEY_NAME_PREFIX_BY_H = 'com.weiqu.video.recall.item.score.h.'
-    # 小程序离线ROV模型结果与小程序小时级更新结果去重后 存放 redis key前缀,
+    # 小程序相对24h数据更新结果与 小程序小时级更新结果 去重后 存放 redis key前缀,
+    # 完整格式:com.weiqu.video.recall.hot.item.score.dup.24h.h.{rule_key}.{date}.{h}
+    RECALL_KEY_NAME_PREFIX_DUP_24H_H = 'com.weiqu.video.recall.hot.item.score.dup.24h.h.'
+    # 小程序离线ROV模型结果与 小程序小时级更新结果/小程序相对24h数据更新结果 去重后 存放 redis key前缀,
     # 完整格式:com.weiqu.video.recall.hot.item.score.dup.h.{rule_key}.{date}.{h}
     RECALL_KEY_NAME_PREFIX_DUP_H = 'com.weiqu.video.recall.hot.item.score.dup.h.'
     # 小时级视频状态不符合推荐要求的列表 redis key,完整格式:com.weiqu.video.filter.h.item.{rule_key}

+ 53 - 28
rule_rank_h.py

@@ -155,7 +155,7 @@ def video_rank(df, now_date, now_h, rule_key, param):
     filtered_videos = filter_video_status(h_recall_videos)
     log_.info('filtered_videos count = {}'.format(len(filtered_videos)))
     # 写入对应的redis
-    h_video_ids =[]
+    h_video_ids = []
     h_recall_result = {}
     for video_id in filtered_videos:
         score = h_recall_df[h_recall_df['videoid'] == video_id]['score']
@@ -168,33 +168,55 @@ def video_rank(df, now_date, now_h, rule_key, param):
         # 清空线上过滤应用列表
         redis_helper.del_keys(key_name=f"{config_.H_VIDEO_FILER}{rule_key}")
 
+    dup_to_redis(h_video_ids, now_date, now_h, rule_key)
+
     # 去重更新rov模型结果,并另存为redis中
-    initial_data_dup = {}
-    for video_id, score in initial_data:
+    # initial_data_dup = {}
+    # for video_id, score in initial_data:
+    #     if int(video_id) not in h_video_ids:
+    #         initial_data_dup[int(video_id)] = score
+    # log_.info(f"initial data dup count = {len(initial_data_dup)}")
+    # initial_key_name = \
+    #     f"{config_.RECALL_KEY_NAME_PREFIX_DUP_H}{rule_key}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
+    # if len(initial_data_dup) > 0:
+    #     redis_helper.add_data_with_zset(key_name=initial_key_name, data=initial_data_dup, expire_time=23 * 3600)
+
+
+def dup_to_redis(h_video_ids, now_date, now_h, rule_key):
+    """将小时级数据与其他召回视频池去重,存入对应的redis"""
+    redis_helper = RedisHelper()
+
+    # ##### 去重小程序相对24h数据更新结果,并另存为redis中
+    rule_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H}rule1.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
+    if redis_helper.key_exists(key_name=rule_24h_key_name):
+        rule_24h_data = redis_helper.get_data_zset_with_index(
+            key_name=rule_24h_key_name, start=0, end=-1, with_scores=True)
+        log_.info(f'rule_24h data count = {len(rule_24h_data)}')
+        rule_24h_dup = {}
+        for video_id, score in rule_24h_data:
+            if int(video_id) not in h_video_ids:
+                rule_24h_dup[int(video_id)] = score
+                h_video_ids.append(int(video_id))
+        log_.info(f"rule_24h data dup count = {len(rule_24h_dup)}")
+        rule_24h_dup_key_name = \
+            f"{config_.RECALL_KEY_NAME_PREFIX_DUP_24H_H}{rule_key}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
+        if len(rule_24h_dup) > 0:
+            redis_helper.add_data_with_zset(key_name=rule_24h_dup_key_name, data=rule_24h_dup, expire_time=23 * 3600)
+
+    # ##### 去重小程序模型更新结果,并另存为redis中
+    model_key_name = get_rov_redis_key(now_date=now_date)
+    model_data = redis_helper.get_data_zset_with_index(key_name=model_key_name, start=0, end=-1, with_scores=True)
+    log_.info(f'model data count = {len(model_data)}')
+    model_data_dup = {}
+    for video_id, score in model_data:
         if int(video_id) not in h_video_ids:
-            initial_data_dup[int(video_id)] = score
-    log_.info(f"initial data dup count = {len(initial_data_dup)}")
-    initial_key_name = \
+            model_data_dup[int(video_id)] = score
+            h_video_ids.append(int(video_id))
+    log_.info(f"model data dup count = {len(model_data_dup)}")
+    model_data_dup_key_name = \
         f"{config_.RECALL_KEY_NAME_PREFIX_DUP_H}{rule_key}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
-    if len(initial_data_dup) > 0:
-        redis_helper.add_data_with_zset(key_name=initial_key_name, data=initial_data_dup, expire_time=23 * 3600)
-
-
-    # # 去重合并
-    # final_videos = [int(item) for item in h_recall_videos]
-    # temp_videos = [int(video_id) for video_id, _ in initial_data if int(video_id) not in final_videos]
-    # final_videos = final_videos + temp_videos
-    # log_.info(f'final videos count = {len(final_videos)}')
-    #
-    # # 重新给定score
-    # final_data = {}
-    # for i, video_id in enumerate(final_videos):
-    #     score = 100 - i * config_.ROV_SCORE_D
-    #     final_data[video_id] = score
-    #
-    # # 存入对应的redis
-    # final_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_H}{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
-    # redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=24 * 3600)
+    if len(model_data_dup) > 0:
+        redis_helper.add_data_with_zset(key_name=model_data_dup_key_name, data=model_data_dup, expire_time=23 * 3600)
 
 
 def rank_by_h(now_date, now_h, rule_params):
@@ -233,20 +255,23 @@ def h_rank_bottom(now_date, now_h, rule_key):
     else:
         redis_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
         redis_h = now_h - 1
-    key_prefix_list = [config_.RECALL_KEY_NAME_PREFIX_BY_H, config_.RECALL_KEY_NAME_PREFIX_DUP_H]
+    key_prefix_list = [config_.RECALL_KEY_NAME_PREFIX_BY_H]
     for key_prefix in key_prefix_list:
         key_name = f"{key_prefix}{rule_key}.{redis_dt}.{redis_h}"
         initial_data = redis_helper.get_data_zset_with_index(key_name=key_name, start=0, end=-1, with_scores=True)
         final_data = dict()
+        h_video_ids = []
         for video_id, score in initial_data:
             final_data[video_id] = score
+            h_video_ids.append(int(video_id))
         # 存入对应的redis
         final_key_name = \
             f"{key_prefix}{rule_key}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
         if len(final_data) > 0:
             redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=23 * 3600)
-    # 清空线上过滤应用列表
-    redis_helper.del_keys(key_name=f"{config_.H_VIDEO_FILER}{rule_key}")
+        # 清空线上过滤应用列表
+        redis_helper.del_keys(key_name=f"{config_.H_VIDEO_FILER}{rule_key}")
+        dup_to_redis(h_video_ids, now_date, now_h, rule_key)
 
 
 def h_timer_check():

+ 2 - 2
rule_rank_h_task.sh

@@ -1,7 +1,7 @@
 source /etc/profile
 echo $ROV_OFFLINE_ENV
 if [[ $ROV_OFFLINE_ENV == 'test' ]]; then
-    cd /data2/rov-offline && /root/anaconda3/bin/python /data2/rov-offline/rule_rank_h.py
+    cd /data2/rov-offline && /root/anaconda3/bin/python /data2/rov-offline/rule_rank_h_by_24h.py && /root/anaconda3/bin/python /data2/rov-offline/rule_rank_h.py
 elif [[ $ROV_OFFLINE_ENV == 'pro' ]]; then
-    cd /data/rov-offline && /root/anaconda3/bin/python /data/rov-offline/rule_rank_h.py
+    cd /data/rov-offline && /root/anaconda3/bin/python /data/rov-offline/rule_rank_h_by_24h.py && /root/anaconda3/bin/python /data/rov-offline/rule_rank_h.py
 fi

+ 5 - 1
videos_filter.py

@@ -446,7 +446,11 @@ def filter_rov_h():
     for key, value in rule_params.items():
         log_.info(f"rule = {key}, param = {value}")
         # 需过滤两个视频列表
-        key_prefix_list = [config_.RECALL_KEY_NAME_PREFIX_BY_H, config_.RECALL_KEY_NAME_PREFIX_DUP_H]
+        key_prefix_list = [
+            config_.RECALL_KEY_NAME_PREFIX_BY_H,
+            config_.RECALL_KEY_NAME_PREFIX_DUP_24H_H,
+            config_.RECALL_KEY_NAME_PREFIX_DUP_H
+        ]
         for i, key_prefix in enumerate(key_prefix_list):
             # 拼接key
             key_name = f"{key_prefix}{key}.{now_date}.{now_h}"