Quellcode durchsuchen

add limit-video-recommend

liqian vor 2 Jahren
Ursprung
Commit
0b530428ab
4 geänderte Dateien mit 196 neuen und 52 gelöschten Zeilen
  1. 117 0
      check_video_limit_distribute.py
  2. 25 0
      config.py
  3. 53 0
      get_video_limit_list.py
  4. 1 52
      region_rule_rank_h.py

+ 117 - 0
check_video_limit_distribute.py

@@ -0,0 +1,117 @@
+import datetime
+from config import set_config
+from log import Log
+from utils import RedisHelper
+
+config_ = set_config()
+log_ = Log()
+redis_helper = RedisHelper()
+
+
+def update_limit_video_score(initial_videos, key_name):
+    """
+    调整限流视频的分数: 将视频移至所在列表的中位数之后,多个视频时,按照原本的顺序进行排列
+    :param initial_videos: 视频列表及score type-dict, {videoId: score, ...}
+    :param key_name: 视频列表对应的key
+    :return:
+    """
+    # 获取当前限流视频
+    data = redis_helper.get_data_from_redis(key_name=config_.KEY_NAME_PREFIX_LIMIT_VIDEOS)
+    if data is None:
+        return
+    limit_video_id_list = [int(video[0]) for video in data]
+    # 获取限流视频对应的score
+    limit_video_initial_score = {}
+    for video_id in limit_video_id_list:
+        initial_score = initial_videos.get(video_id, None)
+        if initial_score is not None:
+            limit_video_initial_score[video_id] = initial_score
+    if len(limit_video_initial_score) == 0:
+        return
+
+    # 获取原始列表的分数的中位数
+
+
+
+def check_videos_distribute():
+    """
+    检查当前限流视频分发数
+    :return: stop_distribute_video_id_list
+    """
+    # 获取当前限流视频及最大分发数
+    data = redis_helper.get_data_from_redis(key_name=config_.KEY_NAME_PREFIX_LIMIT_VIDEOS)
+    if data is None:
+        return []
+    # 判断是否已超分发
+    stop_distribute_video_id_list = []
+    for video_id, max_distribute_count in eval(data):
+        distributed_count = redis_helper.get_data_from_redis(
+            key_name=f"{config_.KEY_NAME_PREFIX_LIMIT_VIDEO_DISTRIBUTE_COUNT}{video_id}"
+        )
+        if distributed_count is None:
+            continue
+        if int(distributed_count) >= int(max_distribute_count):
+            stop_distribute_video_id_list.append(int(video_id))
+
+    return stop_distribute_video_id_list
+
+
+def check_region_videos():
+    """检查限流视频分发数"""
+    # 获取当前日期
+    now_date = datetime.datetime.today()
+    # 获取当前所在小时
+    now_h = datetime.datetime.now().hour
+    log_.info(f'now_date = {now_date}, now_h = {now_h}.')
+
+    # 获取已超分发视频
+    stop_distribute_video_id_list = check_videos_distribute()
+    if len(stop_distribute_video_id_list) == 0:
+        return
+
+    # 对已超分发的视频进行移除
+    region_code_list = [code for region, code in config_.REGION_CODE.items()]
+    rule_params = config_.RULE_PARAMS_REGION
+
+    for region in region_code_list:
+        log_.info(f"region = {region}")
+        for key, value in rule_params.items():
+            log_.info(f"rule = {key}, param = {value}")
+            # 将已超分发视频加入到地域小时级线上过滤应用列表中
+            redis_helper.add_data_with_set(
+                key_name=f"{config_.REGION_H_VIDEO_FILER}{region}.{key}",
+                values=stop_distribute_video_id_list,
+                expire_time=2 * 3600
+            )
+            # 将已超分发视频加入到地域分组24h的数据线上过滤应用列表中
+            redis_helper.add_data_with_set(
+                key_name=f"{config_.REGION_H_VIDEO_FILER_24H}{region}.{key}",
+                values=stop_distribute_video_id_list,
+                expire_time=2 * 3600
+            )
+            # 将已超分发视频加入到不区分相对24h线上过滤应用列表中
+            redis_helper.add_data_with_set(
+                key_name=f"{config_.H_VIDEO_FILER_24H}{region}.{key}",
+                values=stop_distribute_video_id_list,
+                expire_time=2 * 3600
+            )
+            # 将已超分发视频 移除 大列表
+            key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}{region}.{key}." \
+                       f"{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
+            if not redis_helper.key_exists(key_name=key_name):
+                if now_h == 0:
+                    redis_date = now_date - datetime.timedelta(days=1)
+                    redis_h = 23
+                else:
+                    redis_date = now_date
+                    redis_h = now_h - 1
+                key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}{region}.{key}." \
+                           f"{datetime.datetime.strftime(redis_date, '%Y%m%d')}.{redis_h}"
+            redis_helper.remove_value_from_zset(key_name=key_name, value=stop_distribute_video_id_list)
+
+        log_.info(f"region = {region} videos check end!")
+    log_.info("region_h videos check end!")
+
+
+if __name__ == '__main__':
+    check_region_videos()

+ 25 - 0
config.py

@@ -115,6 +115,16 @@ class BaseConfig(object):
         'rule2': {'cal_score_func': 2, 'return_count': 100, 'platform_return_rate': 0.001, 'view_type': 'preview'},
     }
 
+    REGION_CODE = {
+        '河北省': '130000', '山西省': '140000', '辽宁省': '210000', '吉林省': '220000', '黑龙江省': '230000', '江苏省': '320000',
+        '浙江省': '330000', '安徽省': '340000', '福建省': '350000', '江西省': '360000', '山东省': '370000', '河南省': '410000',
+        '湖北省': '420000', '湖南省': '430000', '广东省': '440000', '海南省': '460000', '四川省': '510000', '贵州省': '520000',
+        '云南省': '530000', '陕西省': '610000', '甘肃省': '620000', '青海省': '630000', '台湾省': '710000', '北京': '110000',
+        '天津': '120000', '内蒙古': '150000', '上海': '310000', '广西': '450000', '重庆': '500000', '西藏': '540000',
+        '宁夏': '640000', '新疆': '650000', '香港': '810000', '澳门': '820000',
+        'None': '-1'
+    }
+
     # 地域分组小时级规则更新使用数据
     PROJECT_REGION = 'loghubods'
     TABLE_REGION = 'video_each_hour_update_province'
@@ -298,6 +308,13 @@ class BaseConfig(object):
     # 特殊mid对应指定视频列表更新结果存放 redis key 前缀,完整格式:'com.weiqu.video.special.videos.item.{date}'
     KEY_NAME_PREFIX_SPECIAL_VIDEOS = 'com.weiqu.video.special.videos.item.'
 
+    # 限流视频集合存放 redis key前缀,完整格式:'com.weiqu.video.limit.set.{date}'
+    KEY_NAME_PREFIX_LIMIT_VIDEO_SET = 'com.weiqu.video.limit.set.'
+    # 限流视频最大分发数记录 redis key,完整格式:'com.weiqu.video.limit.item'
+    KEY_NAME_PREFIX_LIMIT_VIDEOS = 'com.weiqu.video.limit.item'
+    # 限流视频分发数记录 redis key前缀,完整格式:'com.weiqu.video.limit.distribute.count.{videoId}'
+    KEY_NAME_PREFIX_LIMIT_VIDEO_DISTRIBUTE_COUNT = 'com.weiqu.video.limit.distribute.count.'
+
 
 class DevelopmentConfig(BaseConfig):
     """开发环境配置"""
@@ -370,6 +387,8 @@ class DevelopmentConfig(BaseConfig):
     BOTTOM_JSON_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/video/distribute/structure/video/list'
     # 通知后端更新兜底视频接口地址
     NOTIFY_BACKEND_updateFallBackVideoList_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/recommend/updateFallBackVideoList'
+    # 获取限流视频接口地址
+    GET_VIDEO_LIMIT_LIST_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/recommend/getVideoLimitList'
 
     # logs 上传oss 目标Bucket指定目录
     OSS_FOLDER_LOGS = 'rov-offline/dev/logs/'
@@ -448,6 +467,8 @@ class TestConfig(BaseConfig):
     BOTTOM_JSON_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/video/distribute/structure/video/list'
     # 通知后端更新兜底视频接口地址
     NOTIFY_BACKEND_updateFallBackVideoList_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/recommend/updateFallBackVideoList'
+    # 获取限流视频接口地址
+    GET_VIDEO_LIMIT_LIST_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/recommend/getVideoLimitList'
 
     # logs 上传oss 目标Bucket指定目录
     OSS_FOLDER_LOGS = 'rov-offline/test/logs/'
@@ -526,6 +547,8 @@ class PreProductionConfig(BaseConfig):
     BOTTOM_JSON_URL = 'http://speedpre.wx.com/longvideoapi/openapi/video/distribute/structure/video/list'
     # 通知后端更新兜底视频接口地址
     NOTIFY_BACKEND_updateFallBackVideoList_URL = 'http://videopre-internal.piaoquantv.com/longvideoapi/openapi/recommend/updateFallBackVideoList'
+    # 获取限流视频接口地址
+    GET_VIDEO_LIMIT_LIST_URL = 'http://prespeed-internal.piaoquantv.com/longvideoapi/openapi/recommend/getVideoLimitList'
 
     # logs 上传oss 目标Bucket指定目录
     OSS_FOLDER_LOGS = 'rov-offline/pre/logs/'
@@ -604,6 +627,8 @@ class ProductionConfig(BaseConfig):
     BOTTOM_JSON_URL = 'http://recommend-common-internal.piaoquantv.com/longvideoapi/openapi/video/distribute/structure/video/list'
     # 通知后端更新兜底视频接口地址
     NOTIFY_BACKEND_updateFallBackVideoList_URL = 'http://recommend-common-internal.piaoquantv.com/longvideoapi/openapi/recommend/updateFallBackVideoList'
+    # 获取限流视频接口地址
+    GET_VIDEO_LIMIT_LIST_URL = 'http://recommend-common-internal.piaoquantv.com/longvideoapi/openapi/recommend/getVideoLimitList'
 
     # logs 上传oss 目标Bucket指定目录
     OSS_FOLDER_LOGS = 'rov-offline/pro/logs/'

+ 53 - 0
get_video_limit_list.py

@@ -0,0 +1,53 @@
+import datetime
+import traceback
+from config import set_config
+from log import Log
+from utils import request_post, RedisHelper
+
+config_, _ = set_config()
+log_ = Log()
+redis_helper = RedisHelper()
+
+
+def get_limit_videos(now_date):
+    """获取限流视频并存入redis"""
+    # 通过接口获取需要限流的视频
+    data = request_post(request_url=config_.GET_VIDEO_LIMIT_LIST_URL)
+    video_limit_list = []  # [(videoId, maxDistributeCount), ...]
+    video_id_list = []
+    # 视频对应最大分发数 存入redis
+    redis_helper.set_data_to_redis(key_name=config_.KEY_NAME_PREFIX_LIMIT_VIDEOS, value=video_limit_list)
+    # 限流视频videoId 存入当日redis key
+    redis_helper.add_data_with_set(
+        key_name=f"{config_.KEY_NAME_PREFIX_LIMIT_VIDEO_SET}{datetime.datetime.strftime(now_date, '%Y%m%d')}",
+        values=tuple(video_id_list),
+        expire_time=2*24*3600
+    )
+
+
+def del_yesterday_limit_videos_record(now_date):
+    """清除前一天的视频分发数记录"""
+    dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
+    # 获取前一天限流视频
+    video_list = redis_helper.get_data_from_set(key_name=f"{config_.KEY_NAME_PREFIX_LIMIT_VIDEO_SET}{dt}")
+    if video_list is None:
+        return
+    for video_id in video_list:
+        redis_helper.del_keys(key_name=f"{config_.KEY_NAME_PREFIX_LIMIT_VIDEO_DISTRIBUTE_COUNT}{video_id}")
+
+
+if __name__ == '__main__':
+    try:
+        now_date = datetime.datetime.today()
+        now_h = datetime.datetime.now().hour
+        log_.info(f"now_date = {now_date}, now_h = {now_h}")
+        if now_h == 0:
+            # 0点清除前一天限流视频分发记录
+            del_yesterday_limit_videos_record(now_date=now_date)
+            log_.info("成功清除前一天限流视频分发记录!")
+        # 获取最新限流视频
+        get_limit_videos(now_date=now_date)
+        log_.info("成功获取最新限流视频!")
+    except Exception as e:
+        log_.error("限流视频更新失败!")
+        log_.error(traceback.format_exc())

+ 1 - 52
region_rule_rank_h.py

@@ -16,43 +16,7 @@ from log import Log
 config_, _ = set_config()
 log_ = Log()
 
-region_code = {
-    '河北省': '130000',
-    '山西省': '140000',
-    '辽宁省': '210000',
-    '吉林省': '220000',
-    '黑龙江省': '230000',
-    '江苏省': '320000',
-    '浙江省': '330000',
-    '安徽省': '340000',
-    '福建省': '350000',
-    '江西省': '360000',
-    '山东省': '370000',
-    '河南省': '410000',
-    '湖北省': '420000',
-    '湖南省': '430000',
-    '广东省': '440000',
-    '海南省': '460000',
-    '四川省': '510000',
-    '贵州省': '520000',
-    '云南省': '530000',
-    '陕西省': '610000',
-    '甘肃省': '620000',
-    '青海省': '630000',
-    '台湾省': '710000',
-    '北京': '110000',
-    '天津': '120000',
-    '内蒙古': '150000',
-    '上海': '310000',
-    '广西': '450000',
-    '重庆': '500000',
-    '西藏': '540000',
-    '宁夏': '640000',
-    '新疆': '650000',
-    '香港': '810000',
-    '澳门': '820000',
-    'None': '-1'
-}
+region_code = config_.REGION_CODE
 
 features = [
     'code',
@@ -171,10 +135,6 @@ def video_rank(df, now_date, now_h, rule_key, param, region):
     :return:
     """
     redis_helper = RedisHelper()
-    # # 获取rov模型结果
-    # key_name = get_rov_redis_key(now_date=now_date)
-    # initial_data = redis_helper.get_data_zset_with_index(key_name=key_name, start=0, end=-1, with_scores=True)
-    # log_.info(f'initial data count = {len(initial_data)}')
 
     # 获取符合进入召回源条件的视频,进入条件:小时级回流>=20 && score>=0.005
     return_count = param.get('return_count', 1)
@@ -208,17 +168,6 @@ def video_rank(df, now_date, now_h, rule_key, param, region):
         # 清空线上过滤应用列表
         redis_helper.del_keys(key_name=f"{config_.REGION_H_VIDEO_FILER}{region}.{rule_key}")
 
-    # 去重更新rov模型结果,并另存为redis中
-    # initial_data_dup = {}
-    # for video_id, score in initial_data:
-    #     if int(video_id) not in h_video_ids:
-    #         initial_data_dup[int(video_id)] = score
-    # log_.info(f"initial data dup count = {len(initial_data_dup)}")
-    # initial_key_name = \
-    #     f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}{region}.{rule_key}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
-    # if len(initial_data_dup) > 0:
-    #     redis_helper.add_data_with_zset(key_name=initial_key_name, data=initial_data_dup, expire_time=23 * 3600)
-
     region_24h_rule_key = param.get('region_24h_rule_key', 'rule1')
     # 与其他召回视频池去重,存入对应的redis
     dup_to_redis(h_video_ids=h_video_ids, now_date=now_date, now_h=now_h, rule_key=rule_key,