Bladeren bron

update shield videos

liqian 2 jaren geleden
bovenliggende
commit
fa9f1124da
5 gewijzigde bestanden met toevoegingen van 54 en 37 verwijderingen
  1. 13 8
      config.py
  2. 23 17
      region_rule_rank_h.py
  3. 5 1
      shield_videos.py
  4. 2 2
      shield_videos_task.sh
  5. 11 9
      utils.py

+ 13 - 8
config.py

@@ -116,12 +116,13 @@ class BaseConfig(object):
     }
 
     REGION_CODE = {
-        '河北省': '130000', '山西省': '140000', '辽宁省': '210000', '吉林省': '220000', '黑龙江省': '230000', '江苏省': '320000',
-        '浙江省': '330000', '安徽省': '340000', '福建省': '350000', '江西省': '360000', '山东省': '370000', '河南省': '410000',
-        '湖北省': '420000', '湖南省': '430000', '广东省': '440000', '海南省': '460000', '四川省': '510000', '贵州省': '520000',
-        '云南省': '530000', '陕西省': '610000', '甘肃省': '620000', '青海省': '630000', '台湾省': '710000', '北京': '110000',
-        '天津': '120000', '内蒙古': '150000', '上海': '310000', '广西': '450000', '重庆': '500000', '西藏': '540000',
-        '宁夏': '640000', '新疆': '650000', '香港': '810000', '澳门': '820000',
+        '北京': '110000', '天津': '120000', '河北省': '130000', '山西省': '140000', '内蒙古': '150000',
+        '辽宁省': '210000', '吉林省': '220000', '黑龙江省': '230000',
+        '上海': '310000', '江苏省': '320000', '浙江省': '330000', '安徽省': '340000', '福建省': '350000', '江西省': '360000', '山东省': '370000',
+        '河南省': '410000', '湖北省': '420000', '湖南省': '430000', '广东省': '440000', '广西': '450000', '海南省': '460000',
+        '重庆': '500000',  '四川省': '510000', '贵州省': '520000', '云南省': '530000', '西藏': '540000',
+        '陕西省': '610000', '甘肃省': '620000', '青海省': '630000', '宁夏': '640000', '新疆': '650000',
+        '台湾省': '710000', '香港': '810000', '澳门': '820000',
         'None': '-1'
     }
 
@@ -567,8 +568,12 @@ class BaseConfig(object):
     TABLE_BENSHAN_ZHUFU = 'benshanzhufu_videolist'
     # 本山祝福视频redis存储key
     BENSHAN_ZHUFU_KEY_NAME = 'com.weiqu.video.benshanzf'
-    # 本山祝福视频 过滤 地域
-    BENSHAN_ZHUFU_REGION_CODE = ['110000', '500000', '-1']
+    # 屏蔽视频配置 key:region_code, value:videos key list
+    SHIELD_CONFIG = {
+        '110000': [BENSHAN_ZHUFU_KEY_NAME, ],
+        '500000': [BENSHAN_ZHUFU_KEY_NAME, ],
+        '-1': [BENSHAN_ZHUFU_KEY_NAME, ],
+    }
 
 
 class DevelopmentConfig(BaseConfig):

+ 23 - 17
region_rule_rank_h.py

@@ -11,7 +11,7 @@ import math
 from functools import reduce
 from odps import ODPS
 from threading import Timer
-from utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_benshanzf_video
+from utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video
 from config import set_config
 from log import Log
 from check_video_limit_distribute import update_limit_video_score
@@ -157,10 +157,11 @@ def video_rank(df, now_date, now_h, rule_key, param, region, app_type, data_key)
     filtered_videos = filter_video_status(h_recall_videos)
     log_.info('filtered_videos count = {}'.format(len(filtered_videos)))
 
-    # 本山祝福视频过滤
-    if region in config_.BENSHAN_ZHUFU_REGION_CODE:
-        filtered_videos = filter_benshanzf_video(video_ids=filtered_videos)
-        log_.info(f"benshanzhufu filtered_videos count = {len(filtered_videos)}")
+    # 屏蔽视频过滤
+    shield_key_name_list = config_.SHIELD_CONFIG.get(region, None)
+    if shield_key_name_list is not None:
+        filtered_videos = filter_shield_video(video_ids=filtered_videos, shield_key_name_list=shield_key_name_list)
+        log_.info(f"shield filtered_videos count = {len(filtered_videos)}")
 
     # 写入对应的redis
     h_video_ids = []
@@ -217,11 +218,12 @@ def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, re
         region_24h_data = redis_helper.get_all_data_from_zset(key_name=region_24h_key_name, with_scores=True)
         log_.info(f'region 24h data count = {len(region_24h_data)}')
 
-        # 本山祝福视频过滤
+        # 屏蔽视频过滤
         region_24h_video_ids = [int(video_id) for video_id, _ in region_24h_data]
-        if region in config_.BENSHAN_ZHUFU_REGION_CODE:
-            region_24h_video_ids = filter_benshanzf_video(video_ids=region_24h_video_ids)
-            log_.info(f"benshanzhufu filtered_videos count = {len(region_24h_video_ids)}")
+        shield_key_name_list = config_.SHIELD_CONFIG.get(region, None)
+        if shield_key_name_list is not None:
+            region_24h_video_ids = filter_shield_video(video_ids=region_24h_video_ids, shield_key_name_list=shield_key_name_list)
+            log_.info(f"shield filtered_videos count = {len(region_24h_video_ids)}")
 
         region_24h_dup = {}
         for video_id, score in region_24h_data:
@@ -264,11 +266,12 @@ def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, re
         day_data = redis_helper.get_all_data_from_zset(key_name=day_key_name, with_scores=True)
         log_.info(f'24h data count = {len(day_data)}')
 
+        # 屏蔽视频过滤
         day_video_ids = [int(video_id) for video_id, _ in day_data]
-        # 本山祝福视频过滤
-        if region in config_.BENSHAN_ZHUFU_REGION_CODE:
-            day_video_ids = filter_benshanzf_video(video_ids=day_video_ids)
-            log_.info(f"benshanzhufu filtered_videos count = {len(day_video_ids)}")
+        shield_key_name_list = config_.SHIELD_CONFIG.get(region, None)
+        if shield_key_name_list is not None:
+            day_video_ids = filter_shield_video(video_ids=day_video_ids, shield_key_name_list=shield_key_name_list)
+            log_.info(f"shield filtered_videos count = {len(day_video_ids)}")
 
         day_dup = {}
         for video_id, score in day_data:
@@ -290,11 +293,14 @@ def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, re
     model_key_name = get_rov_redis_key(now_date=now_date)
     model_data = redis_helper.get_all_data_from_zset(key_name=model_key_name, with_scores=True)
     log_.info(f'model data count = {len(model_data)}')
+
+    # 屏蔽视频过滤
     model_video_ids = [int(video_id) for video_id, _ in model_data]
-    # 本山祝福视频过滤
-    if region in config_.BENSHAN_ZHUFU_REGION_CODE:
-        model_video_ids = filter_benshanzf_video(video_ids=model_video_ids)
-        log_.info(f"benshanzhufu filtered_videos count = {len(model_video_ids)}")
+    shield_key_name_list = config_.SHIELD_CONFIG.get(region, None)
+    if shield_key_name_list is not None:
+        model_video_ids = filter_shield_video(video_ids=model_video_ids, shield_key_name_list=shield_key_name_list)
+        log_.info(f"shield filtered_videos count = {len(model_video_ids)}")
+
     model_data_dup = {}
     for video_id, score in model_data:
         if int(video_id) not in h_video_ids and int(video_id) in model_video_ids:

+ 5 - 1
benshanzhufu_videos.py → shield_videos.py

@@ -31,5 +31,9 @@ def get_benshanzhufu_videos():
         log_.error(traceback.format_exc())
 
 
+def main():
+    get_benshanzhufu_videos()
+
+
 if __name__ == '__main__':
-    get_benshanzhufu_videos()
+    main()

+ 2 - 2
benshanzhufu_videos_task.sh → shield_videos_task.sh

@@ -1,7 +1,7 @@
 source /etc/profile
 echo $ROV_OFFLINE_ENV
 if [[ $ROV_OFFLINE_ENV == 'test' ]]; then
-    cd /data2/rov-offline && /root/anaconda3/bin/python /data2/rov-offline/benshanzhufu_videos.py
+    cd /data2/rov-offline && /root/anaconda3/bin/python /data2/rov-offline/shield_videos.py
 elif [[ $ROV_OFFLINE_ENV == 'pro' ]]; then
-    cd /data/rov-offline && /root/anaconda3/bin/python /data/rov-offline/benshanzhufu_videos.py
+    cd /data/rov-offline && /root/anaconda3/bin/python /data/rov-offline/shield_videos.py
 fi

+ 11 - 9
utils.py

@@ -290,23 +290,25 @@ def filter_video_status_app(video_ids):
     return filtered_videos
 
 
-def filter_benshanzf_video(video_ids):
+def filter_shield_video(video_ids, shield_key_name_list):
     """
-    过滤本山祝福视频
+    过滤屏蔽视频视频
     :param video_ids: 需过滤的视频列表 type-list
+    :param shield_key_name_list: 过滤视频 redis-key
     :return: filtered_videos  过滤后的列表  type-list
     """
     if len(video_ids) == 0:
         return video_ids
     # 根据Redis缓存中的数据过滤
     redis_helper = RedisHelper()
-    # key拼接
-    benshanzf_videos_list = redis_helper.get_data_from_set(key_name=config_.BENSHAN_ZHUFU_KEY_NAME)
-    if not benshanzf_videos_list:
-        return video_ids
-    benshanzf_videos = [int(video) for video in benshanzf_videos_list]
-    filtered_videos = [video_id for video_id in video_ids if video_id not in benshanzf_videos]
-    return filtered_videos
+    for shield_key_name in shield_key_name_list:
+        shield_videos_list = redis_helper.get_data_from_set(key_name=shield_key_name)
+        if not shield_videos_list:
+            continue
+        shield_videos = [int(video) for video in shield_videos_list]
+        video_ids = [video_id for video_id in video_ids if video_id not in shield_videos]
+
+    return video_ids
 
 
 def update_video_w_h_rate(video_ids, key_name):