فهرست منبع

stop rov-list update

liqian 2 سال پیش
والد
کامیت
74a036518e
4فایلهای تغییر یافته به همراه18 افزوده شده و 173 حذف شده
  1. 6 6
      check_video_limit_distribute.py
  2. 2 2
      redis_data_monitor.py
  3. 6 161
      region_rule_rank_h.py
  4. 4 4
      videos_filter.py

+ 6 - 6
check_video_limit_distribute.py

@@ -120,7 +120,7 @@ def process_with_region(data_key, rule_key, region, stop_distribute_video_id_lis
         config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H,  # 地域分组相对24h列表
         config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H,  # 不区分地域相对24h列表
         config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H,  # 不区分地域相对24h列表2
-        config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H,  # 大列表
+        # config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H,  # 大列表
     ]
 
     # if rule_key == 'rule4':
@@ -231,11 +231,11 @@ def check_region_videos(rule_params):
         gevent.joinall(task_list)
 
     # 将已超分发视频 移除 原始大列表
-    key_name = f"{config_.RECALL_KEY_NAME_PREFIX}{datetime.datetime.strftime(now_date, '%Y%m%d')}"
-    if not redis_helper.key_exists(key_name=key_name):
-        redis_date = now_date - datetime.timedelta(days=1)
-        key_name = f"{config_.RECALL_KEY_NAME_PREFIX}{datetime.datetime.strftime(redis_date, '%Y%m%d')}"
-    redis_helper.remove_value_from_zset(key_name=key_name, value=stop_distribute_video_id_list)
+    # key_name = f"{config_.RECALL_KEY_NAME_PREFIX}{datetime.datetime.strftime(now_date, '%Y%m%d')}"
+    # if not redis_helper.key_exists(key_name=key_name):
+    #     redis_date = now_date - datetime.timedelta(days=1)
+    #     key_name = f"{config_.RECALL_KEY_NAME_PREFIX}{datetime.datetime.strftime(redis_date, '%Y%m%d')}"
+    # redis_helper.remove_value_from_zset(key_name=key_name, value=stop_distribute_video_id_list)
 
 
 if __name__ == '__main__':

+ 2 - 2
redis_data_monitor.py

@@ -126,13 +126,13 @@ def get_redis_data_keys(now_date, now_h):
 
 
 def monitor(now_date, now_h):
-    rov_data_monitor(now_date=now_date, now_h=now_h)
+    # rov_data_monitor(now_date=now_date, now_h=now_h)
     key_prefix_dict_24h = {
         '地域分组小时级数据': config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H,
         '地域分组相对24h去重后数据': config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H,
         '不区分地域相对24h去重后数据': config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H,
         '不区分地域相对24h筛选后剩余去重后数据': config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H,
-        'rov模型预测列表去重后数据': config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H,
+        # 'rov模型预测列表去重后数据': config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H,
     }
     region_data_monitor(now_date=now_date, now_h=now_h, rule_params=config_.RULE_PARAMS_REGION_APP_TYPE,
                         key_prefix_dict=key_prefix_dict_24h)

+ 6 - 161
region_rule_rank_h.py

@@ -277,167 +277,12 @@ def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by
                                dup_key_name=other_h_24h_dup_key_name, region=region)
 
     # ##### 去重小程序模型更新结果,并另存为redis中
-    model_key_name = get_rov_redis_key(now_date=now_date)
-    model_data_dup_key_name = \
-        f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}{region}:{data_key}:{rule_key}:" \
-        f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
-    h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=model_key_name,
-                           dup_key_name=model_data_dup_key_name, region=region)
-
-    """
-    redis_helper = RedisHelper()
-    # # ##### 去重更新地域分组天级列表,并另存为redis中
-    # region_day_key_name = \
-    #     f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_DAY}{region}.rule1." \
-    #     f"{datetime.datetime.strftime(now_date, '%Y%m%d')}"
-    # if redis_helper.key_exists(key_name=region_day_key_name):
-    #     region_day_data = redis_helper.get_data_zset_with_index(
-    #         key_name=region_day_key_name, start=0, end=-1, with_scores=True)
-    #     log_.info(f'region day data count = {len(region_day_data)}')
-    #     region_day_dup = {}
-    #     for video_id, score in region_day_data:
-    #         if int(video_id) not in h_video_ids:
-    #             region_day_dup[int(video_id)] = score
-    #             h_video_ids.append(int(video_id))
-    #     log_.info(f"region day data dup count = {len(region_day_dup)}")
-    #     region_day_dup_key_name = \
-    #         f"{config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_DAY_H}{region}.{rule_key}." \
-    #         f"{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
-    #     if len(region_day_dup) > 0:
-    #         redis_helper.add_data_with_zset(key_name=region_day_dup_key_name, data=region_day_dup, expire_time=23 * 3600)
-
-
-    if redis_helper.key_exists(key_name=region_24h_key_name):
-        region_24h_data = redis_helper.get_all_data_from_zset(key_name=region_24h_key_name, with_scores=True)
-        # log_.info(f'region 24h data count = {len(region_24h_data)}')
-
-        # 屏蔽视频过滤
-        region_24h_video_ids = [int(video_id) for video_id, _ in region_24h_data]
-        shield_key_name_list = config_.SHIELD_CONFIG.get(region, None)
-        if shield_key_name_list is not None:
-            region_24h_video_ids = filter_shield_video(video_ids=region_24h_video_ids, shield_key_name_list=shield_key_name_list)
-            # log_.info(f"shield filtered_videos count = {len(region_24h_video_ids)}")
-
-        region_24h_dup = {}
-        for video_id, score in region_24h_data:
-            if int(video_id) not in h_video_ids and int(video_id) in region_24h_video_ids:
-                region_24h_dup[int(video_id)] = score
-                h_video_ids.append(int(video_id))
-        # log_.info(f"region 24h data dup count = {len(region_24h_dup)}")
-        region_24h_dup_key_name = \
-            f"{config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H}{region}:{app_type}:{data_key}:{rule_key}:" \
-            f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
-        if len(region_24h_dup) > 0:
-            redis_helper.add_data_with_zset(key_name=region_24h_dup_key_name, data=region_24h_dup, expire_time=23 * 3600)
-            # 限流视频score调整
-            update_limit_video_score(initial_videos=region_24h_dup, key_name=region_24h_dup_key_name)
-            # 清空线上过滤应用列表
-            # redis_helper.del_keys(key_name=f"{config_.REGION_H_VIDEO_FILER_24H}{app_type}.{data_key}.{region}.{rule_key}")
-
-    # ##### 去重小程序天级更新结果,并另存为redis中
-    # day_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_DAY}rule2.{datetime.datetime.strftime(now_date, '%Y%m%d')}"
-    # if redis_helper.key_exists(key_name=day_key_name):
-    #     day_data = redis_helper.get_data_zset_with_index(
-    #         key_name=day_key_name, start=0, end=-1, with_scores=True)
-    #     log_.info(f'day data count = {len(day_data)}')
-    #     day_dup = {}
-    #     for video_id, score in day_data:
-    #         if int(video_id) not in h_video_ids:
-    #             day_dup[int(video_id)] = score
-    #             h_video_ids.append(int(video_id))
-    #     log_.info(f"day data dup count = {len(day_dup)}")
-    #     day_dup_key_name = \
-    #         f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_DAY_H}{region}.{rule_key}." \
-    #         f"{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
-    #     if len(day_dup) > 0:
-    #         redis_helper.add_data_with_zset(key_name=day_dup_key_name, data=day_dup, expire_time=23 * 3600)
-
-
-    # ##### 去重小程序相对24h更新结果,并另存为redis中
-    day_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H}{app_type}:{data_key}:{by_24h_rule_key}:" \
-                   f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
-    if redis_helper.key_exists(key_name=day_key_name):
-        day_data = redis_helper.get_all_data_from_zset(key_name=day_key_name, with_scores=True)
-        # log_.info(f'24h data count = {len(day_data)}')
-
-        # 屏蔽视频过滤
-        day_video_ids = [int(video_id) for video_id, _ in day_data]
-        shield_key_name_list = config_.SHIELD_CONFIG.get(region, None)
-        if shield_key_name_list is not None:
-            day_video_ids = filter_shield_video(video_ids=day_video_ids, shield_key_name_list=shield_key_name_list)
-            # log_.info(f"shield filtered_videos count = {len(day_video_ids)}")
-
-        day_dup = {}
-        for video_id, score in day_data:
-            if int(video_id) not in h_video_ids and int(video_id) in day_video_ids:
-                day_dup[int(video_id)] = score
-                h_video_ids.append(int(video_id))
-        # log_.info(f"24h data dup count = {len(day_dup)}")
-        day_dup_key_name = \
-            f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H}{region}:{app_type}:{data_key}:{rule_key}:" \
-            f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
-        if len(day_dup) > 0:
-            redis_helper.add_data_with_zset(key_name=day_dup_key_name, data=day_dup, expire_time=23 * 3600)
-            # 限流视频score调整
-            update_limit_video_score(initial_videos=day_dup, key_name=day_dup_key_name)
-            # 清空线上过滤应用列表
-            # redis_helper.del_keys(key_name=f"{config_.H_VIDEO_FILER_24H}{region}.{app_type}.{data_key}.{rule_key}")
-
-    # ##### 去重小程序相对24h 筛选后剩余数据 更新结果,并另存为redis中
-    if by_24h_rule_key == 'rule3':
-        other_h_24h_recall_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H_OTHER}{app_type}:{data_key}:" \
-                                      f"{by_24h_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
-        if redis_helper.key_exists(key_name=other_h_24h_recall_key_name):
-            other_24h_data = redis_helper.get_all_data_from_zset(key_name=other_h_24h_recall_key_name, with_scores=True)
-            log_.info(f'24h other data count = {len(other_24h_data)}')
-
-            # 屏蔽视频过滤
-            other_24h_video_ids = [int(video_id) for video_id, _ in other_24h_data]
-            shield_key_name_list = config_.SHIELD_CONFIG.get(region, None)
-            if shield_key_name_list is not None:
-                other_24h_video_ids = filter_shield_video(video_ids=other_24h_video_ids, shield_key_name_list=shield_key_name_list)
-                log_.info(f"shield filtered_videos count = {len(other_24h_video_ids)}")
-
-            other_24h_dup = {}
-            for video_id, score in other_24h_data:
-                if int(video_id) not in h_video_ids and int(video_id) in other_24h_video_ids:
-                    other_24h_dup[int(video_id)] = score
-                    h_video_ids.append(int(video_id))
-            log_.info(f"other 24h data dup count = {len(other_24h_dup)}")
-            other_24h_dup_key_name = \
-                f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H}{region}:{app_type}:{data_key}:{rule_key}:" \
-                f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
-            if len(other_24h_dup) > 0:
-                redis_helper.add_data_with_zset(key_name=other_24h_dup_key_name, data=other_24h_dup, expire_time=23 * 3600)
-                # 限流视频score调整
-                update_limit_video_score(initial_videos=other_24h_dup, key_name=other_24h_dup_key_name)
-
-    # ##### 去重小程序模型更新结果,并另存为redis中
-    model_key_name = get_rov_redis_key(now_date=now_date)
-    model_data = redis_helper.get_all_data_from_zset(key_name=model_key_name, with_scores=True)
-    # log_.info(f'model data count = {len(model_data)}')
-
-    # 屏蔽视频过滤
-    model_video_ids = [int(video_id) for video_id, _ in model_data]
-    shield_key_name_list = config_.SHIELD_CONFIG.get(region, None)
-    if shield_key_name_list is not None:
-        model_video_ids = filter_shield_video(video_ids=model_video_ids, shield_key_name_list=shield_key_name_list)
-        # log_.info(f"shield filtered_videos count = {len(model_video_ids)}")
-
-    model_data_dup = {}
-    for video_id, score in model_data:
-        if int(video_id) not in h_video_ids and int(video_id) in model_video_ids:
-            model_data_dup[int(video_id)] = score
-            h_video_ids.append(int(video_id))
-    # log_.info(f"model data dup count = {len(model_data_dup)}")
-    model_data_dup_key_name = \
-        f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}{region}:{app_type}:{data_key}:{rule_key}:" \
-        f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
-    if len(model_data_dup) > 0:
-        redis_helper.add_data_with_zset(key_name=model_data_dup_key_name, data=model_data_dup, expire_time=23 * 3600)
-        # 限流视频score调整
-        update_limit_video_score(initial_videos=model_data_dup, key_name=model_data_dup_key_name)
-    """
+    # model_key_name = get_rov_redis_key(now_date=now_date)
+    # model_data_dup_key_name = \
+    #     f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}{region}:{data_key}:{rule_key}:" \
+    #     f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
+    # h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=model_key_name,
+    #                        dup_key_name=model_data_dup_key_name, region=region)
 
 
 def merge_df(df_left, df_right):

+ 4 - 4
videos_filter.py

@@ -581,7 +581,7 @@ def filter_process_with_region(data_key, rule_key, region, now_date, now_h):
         config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H,
         config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_48H_H,
         config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_48H_H,
-        config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H
+        # config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H
     ]
     for i, key_prefix in enumerate(key_prefix_list):
         # 拼接key
@@ -888,7 +888,7 @@ def filter_whole_movies():
 def main():
     try:
         # ROV召回池视频过滤
-        filter_rov_pool()
+        # filter_rov_pool()
         # appType = 6,ROV召回池视频过滤
         # filter_rov_pool(app_type=config_.APP_TYPE['SHORT_VIDEO'])
         # appType = 13,票圈视频APP视频过滤
@@ -902,8 +902,8 @@ def main():
         # 兜底视频过滤
         filter_bottom()
         # 修改过ROV的视频过滤
-        filter_rov_updated()
-        filter_rov_updated_app()
+        # filter_rov_updated()
+        # filter_rov_updated_app()
         # 运营强插相关推荐视频过滤
         # filter_relevant_videos()
         # 按位置排序视频过滤