فهرست منبع

merge shield-videos-20220826

liqian 2 سال پیش
والد
کامیت
5906d5e9db
3فایلهای تغییر یافته به همراه108 افزوده شده و 5 حذف شده
  1. 20 3
      config.py
  2. 60 0
      region_rule_rank_h.py
  3. 28 2
      shield_videos.py

+ 20 - 3
config.py

@@ -117,6 +117,15 @@ class BaseConfig(object):
         'None': '-1'
     }
 
+    CITY_CODE = {
+        '广州': '440100', '深圳': '440300', '成都': '510100',
+    }
+
+    REGION_CITY_MAPPING = {
+        REGION_CODE['广东省']: [CITY_CODE['广州'], CITY_CODE['深圳'], ],
+        REGION_CODE['四川省']: [CITY_CODE['成都'], ]
+    }
+
     # 地域分组天级规则更新使用数据
     PROJECT_REGION_DAY = 'loghubods'
     TABLE_REGION_DAY = 'video_each_day_update_province'
@@ -444,11 +453,19 @@ class BaseConfig(object):
     TABLE_XNG = 'xng_videos'
     # 小年糕视频redis存储key
     XNG_KEY_NAME = 'xng:videos'
+    # 特殊地区屏蔽危险视频列表,在广州+深圳+成都+无地域划分表中屏蔽
+    PROJECT_SPECIAL_AREA_LIMIT = 'loghubods'
+    TABLE_SPECIAL_AREA_LIMIT = 'special_area_recommend_limit'
+    # 特殊地区屏蔽危险视频redis存储key
+    SPECIAL_AREA_LIMIT_KEY_NAME = 'special:area:limit:videos'
     # 屏蔽视频配置 key:region_code, value:videos key list
     SHIELD_CONFIG = {
-        '110000': [BENSHAN_ZHUFU_KEY_NAME, XNG_KEY_NAME, ],
-        '500000': [BENSHAN_ZHUFU_KEY_NAME, ],
-        '-1': [BENSHAN_ZHUFU_KEY_NAME, XNG_KEY_NAME, ],
+        REGION_CODE['北京']: [BENSHAN_ZHUFU_KEY_NAME, XNG_KEY_NAME, ],
+        REGION_CODE['重庆']: [BENSHAN_ZHUFU_KEY_NAME, ],
+        REGION_CODE['None']: [BENSHAN_ZHUFU_KEY_NAME, SPECIAL_AREA_LIMIT_KEY_NAME, XNG_KEY_NAME, ],
+        CITY_CODE['广州']: [SPECIAL_AREA_LIMIT_KEY_NAME, ],
+        CITY_CODE['深圳']: [SPECIAL_AREA_LIMIT_KEY_NAME, ],
+        CITY_CODE['成都']: [SPECIAL_AREA_LIMIT_KEY_NAME, ],
     }
 
 

+ 60 - 0
region_rule_rank_h.py

@@ -537,6 +537,44 @@ def process_with_app_type(app_type, params, region_code_list, feature_df, now_da
     # gevent.joinall(task_list)
 
 
+def copy_data_for_city(region, city_code, data_key, rule_key, now_date, now_h):
+    """copy 对应数据到城市对应redis,并做相应屏蔽视频过滤"""
+    log_.info(f"city_code = {city_code} start ...")
+    redis_helper = RedisHelper()
+    key_prefix_list = [
+        config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H,  # 地域小时级
+        config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H,  # 地域相对24h
+        config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H,  # 不区分地域相对24h
+        config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H,  # 不区分地域相对24h筛选后
+        config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H,  # rov大列表
+    ]
+    for key_prefix in key_prefix_list:
+        region_key = f"{key_prefix}{region}:{data_key}:{rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
+        city_key = f"{key_prefix}{city_code}:{data_key}:{rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
+        if not redis_helper.key_exists(key_name=region_key):
+            continue
+        region_data = redis_helper.get_all_data_from_zset(key_name=region_key, with_scores=True)
+        if not region_data:
+            continue
+        # 屏蔽视频过滤
+        region_video_ids = [int(video_id) for video_id, _ in region_data]
+        shield_key_name_list = config_.SHIELD_CONFIG.get(city_code, None)
+        if shield_key_name_list is not None:
+            filtered_video_ids = filter_shield_video(video_ids=region_video_ids,
+                                                     shield_key_name_list=shield_key_name_list)
+        else:
+            filtered_video_ids = region_video_ids
+        city_data = {}
+        for video_id, score in region_data:
+            if int(video_id) in filtered_video_ids:
+                city_data[int(video_id)] = score
+
+        if len(city_data) > 0:
+            redis_helper.add_data_with_zset(key_name=city_key, data=city_data, expire_time=23 * 3600)
+
+    log_.info(f"city_code = {city_code} end!")
+
+
 def process_with_param(param, data_params_item, rule_params_item, region_code_list, feature_df, now_date, now_h, rule_rank_h_flag):
     log_.info(f"param = {param} start...")
 
@@ -575,6 +613,18 @@ def process_with_param(param, data_params_item, rule_params_item, region_code_li
         ]
 
     gevent.joinall(task_list)
+
+    # 特殊城市视频数据准备
+    for region, city_list in config_.REGION_CITY_MAPPING.items():
+        t = [
+            gevent.spawn(
+                copy_data_for_city,
+                region, city_code, data_key, rule_key, now_date, now_h
+            )
+            for city_code in city_list
+        ]
+        gevent.joinall(t)
+
     log_.info(f"param = {param} end!")
 
 
@@ -713,6 +763,16 @@ def h_rank_bottom(now_date, now_h, rule_params, region_code_list, rule_rank_h_fl
                          region_24h_rule_key=region_24h_rule_key, region=region,
                          data_key=data_key, by_24h_rule_key=by_24h_rule_key,
                          by_48h_rule_key=by_48h_rule_key, rule_rank_h_flag=rule_rank_h_flag)
+        # 特殊城市视频数据准备
+        for region, city_list in config_.REGION_CITY_MAPPING.items():
+            t = [
+                gevent.spawn(
+                    copy_data_for_city,
+                    region, city_code, data_key, rule_key, now_date, now_h
+                )
+                for city_code in city_list
+            ]
+            gevent.joinall(t)
 
 
 def h_timer_check():

+ 28 - 2
shield_videos.py

@@ -1,5 +1,6 @@
 import datetime
 import traceback
+import datetime
 from config import set_config
 from log import Log
 from utils import execute_sql_from_odps
@@ -21,7 +22,7 @@ def get_benshanzhufu_videos():
             for record in reader:
                 video_id = int(record['videoid'])
                 video_id_list.append(video_id)
-        log_.info(f"count = {len(video_id_list)}")
+        log_.info(f"benshanzhufu videos count = {len(video_id_list)}")
         # 存入redis
         if len(video_id_list) > 0:
             redis_helper.del_keys(key_name=config_.BENSHAN_ZHUFU_KEY_NAME)
@@ -35,7 +36,7 @@ def get_benshanzhufu_videos():
 def get_xng_videos():
     """获取小年糕视频并存入redis"""
     try:
-        # 获取本山祝福视频
+        # 获取小年糕视频
         sql = f"SELECT videoid FROM {config_.PROJECT_XNG}.{config_.TABLE_XNG};"
         records = execute_sql_from_odps(project=config_.PROJECT_XNG, sql=sql)
         video_id_list = []
@@ -43,6 +44,7 @@ def get_xng_videos():
             for record in reader:
                 video_id = int(record['videoid'])
                 video_id_list.append(video_id)
+
         log_.info(f"xng videos count = {len(video_id_list)}")
         # 存入redis
         if len(video_id_list) > 0:
@@ -54,11 +56,35 @@ def get_xng_videos():
         log_.error(traceback.format_exc())
 
 
+def get_special_area_limit_videos():
+    """获取特殊地域屏蔽视频并存入redis"""
+    try:
+        # 获取特殊地域屏蔽视频
+        sql = f"SELECT videoid FROM {config_.PROJECT_SPECIAL_AREA_LIMIT}.{config_.TABLE_SPECIAL_AREA_LIMIT};"
+        records = execute_sql_from_odps(project=config_.PROJECT_SPECIAL_AREA_LIMIT, sql=sql)
+
+        video_id_list = []
+        with records.open_reader() as reader:
+            for record in reader:
+                video_id = int(record['videoid'])
+                video_id_list.append(video_id)
+        log_.info(f"special area limit videos count = {len(video_id_list)}")
+        # 存入redis
+        if len(video_id_list) > 0:
+            redis_helper.del_keys(key_name=config_.SPECIAL_AREA_LIMIT_KEY_NAME)
+            redis_helper.add_data_with_set(key_name=config_.SPECIAL_AREA_LIMIT_KEY_NAME, values=video_id_list,
+                                           expire_time=25 * 3600)
+
+    except Exception as e:
+        log_.error(traceback.format_exc())
+
+
 def main():
     now_h = datetime.datetime.now().hour
     if now_h == 4:
         get_benshanzhufu_videos()
     get_xng_videos()
+    get_special_area_limit_videos()
 
 
 if __name__ == '__main__':