浏览代码

add shield videos

liqian 2 年之前
父节点
当前提交
84c1baca05
共有 3 个文件被更改,包括 61 次插入22 次删除
  1. 11 3
      config.py
  2. 3 3
      utils.py
  3. 47 16
      video_recall.py

+ 11 - 3
config.py

@@ -584,14 +584,22 @@ class BaseConfig(object):
         '陕西省': '610000', '甘肃省': '620000', '青海省': '630000', '宁夏': '640000', '新疆': '650000',
         '台湾省': '710000', '香港': '810000', '澳门': '820000',
     }
+    CITY_CODE = {
+        '广州': '440100', '深圳': '440300', '成都': '510100',
+    }
 
     # 本山祝福视频redis存储key
     BENSHAN_ZHUFU_KEY_NAME = 'benshanzf:videos'
+    # 特殊地区屏蔽危险视频redis存储key
+    SPECIAL_AREA_LIMIT_KEY_NAME = 'special:area:limit:videos'
     # 屏蔽视频配置 key:region_code, value:videos key list
     SHIELD_CONFIG = {
-        '110000': [BENSHAN_ZHUFU_KEY_NAME, ],
-        '500000': [BENSHAN_ZHUFU_KEY_NAME, ],
-        '-1': [BENSHAN_ZHUFU_KEY_NAME, ],
+        REGION_CODE['北京']: [BENSHAN_ZHUFU_KEY_NAME, ],
+        REGION_CODE['重庆']: [BENSHAN_ZHUFU_KEY_NAME, ],
+        REGION_CODE['None']: [BENSHAN_ZHUFU_KEY_NAME, SPECIAL_AREA_LIMIT_KEY_NAME, ],
+        CITY_CODE['广州']: [SPECIAL_AREA_LIMIT_KEY_NAME, ],
+        CITY_CODE['深圳']: [SPECIAL_AREA_LIMIT_KEY_NAME, ],
+        CITY_CODE['成都']: [SPECIAL_AREA_LIMIT_KEY_NAME, ],
     }
 
 

+ 3 - 3
utils.py

@@ -238,7 +238,7 @@ class FilterVideos(object):
         else:
             return [int(video_id) for video_id in filtered_viewed_result]
 
-    def filter_videos(self, pool_type='rov', province_code=None):
+    def filter_videos(self, pool_type='rov', region_code=None):
         """视频过滤"""
         # 预曝光过滤
         st_pre = time.time()
@@ -289,10 +289,10 @@ class FilterVideos(object):
         filtered_viewed_videos = [int(video_id) for video_id in filtered_viewed_result]
         if pool_type == 'flow':
             # 流量池视频需过滤屏蔽视频
-            if province_code is None:
+            if region_code is None:
                 return filtered_viewed_videos
             else:
-                shield_key_name_list = config_.SHIELD_CONFIG.get(province_code, None)
+                shield_key_name_list = config_.SHIELD_CONFIG.get(region_code, None)
                 if shield_key_name_list is not None:
                     filtered_shield_video_ids = self.filter_shield_video(
                         video_ids=filtered_viewed_videos, shield_key_name_list=shield_key_name_list

+ 47 - 16
video_recall.py

@@ -434,10 +434,21 @@ class PoolRecall(object):
     def flow_pool_recall(self, size=10, flow_pool_id=None):
         """从流量池中获取视频"""
         start_time = time.time()
+        # 获取存在城市分组数据的城市编码列表
+        city_code_list = [code for _, code in config_.CITY_CODE.items()]
         # 获取provinceCode
         province_code = self.client_info.get('provinceCode', '-1')
-        if province_code == '':
-            province_code = '-1'
+        # 获取cityCode
+        city_code = self.client_info.get('cityCode', '-1')
+
+        if city_code in city_code_list:
+            # 分城市数据存在时,获取城市分组数据
+            region_code = city_code
+        else:
+            region_code = province_code
+        if region_code == '':
+            region_code = '-1'
+
         flow_pool_key = self.get_pool_redis_key('flow', flow_pool_id=flow_pool_id)
         print(flow_pool_key)
         flow_pool_recall_result = []
@@ -487,7 +498,7 @@ class PoolRecall(object):
             # 过滤
             filter_ = FilterVideos(request_id=self.request_id,
                                    app_type=self.app_type, mid=self.mid, uid=self.uid, video_ids=video_ids)
-            ge = gevent.spawn(filter_.filter_videos, pool_type='flow', province_code=province_code)
+            ge = gevent.spawn(filter_.filter_videos, pool_type='flow', region_code=region_code)
             ge.join()
             filtered_result = ge.get()
             # 检查可分发数
@@ -945,14 +956,24 @@ class PoolRecall(object):
             else:
                 # 判断热度列表是否更新,未更新则使用前一小时的热度列表
                 if self.ab_code in [code for _, code in config_.AB_CODE['region_rank_by_h'].items()]:
+                    # 获取存在城市分组数据的城市编码列表
+                    city_code_list = [code for _, code in config_.CITY_CODE.items()]
                     # 获取provinceCode
                     province_code = self.client_info.get('provinceCode', '-1')
-                    if province_code == '':
-                        province_code = '-1'
+                    # 获取cityCode
+                    city_code = self.client_info.get('cityCode', '-1')
+                    if city_code in city_code_list:
+                        # 分城市数据存在时,获取城市分组数据
+                        region_code = city_code
+                    else:
+                        region_code = province_code
+                    if region_code == '':
+                        region_code = '-1'
+
                     if self.ab_code == config_.AB_CODE['region_rank_by_h'].get('region_rule_rank2'):
-                        key_prefix = f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_24H}{province_code}."
+                        key_prefix = f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_24H}{region_code}."
                     else:
-                        key_prefix = f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}{province_code}:"
+                        key_prefix = f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}{region_code}:"
                 elif self.ab_code in [code for _, code in config_.AB_CODE['rank_by_24h'].items()]:
                     key_prefix = config_.RECALL_KEY_NAME_PREFIX_DUP_24H
                 else:
@@ -1185,29 +1206,39 @@ class PoolRecall(object):
         :return:
         """
         start_time = time.time()
+        # 获取存在城市分组数据的城市编码列表
+        city_code_list = [code for _, code in config_.CITY_CODE.items()]
         # 获取provinceCode
         province_code = self.client_info.get('provinceCode', '-1')
-        if province_code == '':
-            province_code = '-1'
+        # 获取cityCode
+        city_code = self.client_info.get('cityCode', '-1')
+
+        if city_code in city_code_list:
+            # 分城市数据存在时,获取城市分组数据
+            region_code = city_code
+        else:
+            region_code = province_code
+        if region_code == '':
+            region_code = '-1'
 
         # if self.ab_code in [config_.AB_CODE['region_rank_by_h'].get('region_rule_rank5_appType_0_data1'),
         #                     config_.AB_CODE['region_rank_by_h'].get('abtest_082'),
         #                     config_.AB_CODE['region_rank_by_h'].get('abtest_112')]:
 
-        if province_code == '-1':
+        if region_code == '-1':
             t = [
                 # gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size, 'day_24h'),
-                 gevent.spawn(self.recall_region_dup_24h, province_code, size, '24h_dup2', expire_time),
-                 gevent.spawn(self.recall_region_dup_24h, province_code, size, '24h_dup3', expire_time)]
+                 gevent.spawn(self.recall_region_dup_24h, region_code, size, '24h_dup2', expire_time),
+                 gevent.spawn(self.recall_region_dup_24h, region_code, size, '24h_dup3', expire_time)]
         else:
             t = [
                 # gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size),
                  # gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size, 'region_24h'),
                  # gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size, 'day_24h'),
-                 gevent.spawn(self.recall_region_dup_24h, province_code, size, 'region_h', expire_time),
-                 gevent.spawn(self.recall_region_dup_24h, province_code, size, 'region_24h', expire_time),
-                 gevent.spawn(self.recall_region_dup_24h, province_code, size, '24h_dup2', expire_time),
-                 gevent.spawn(self.recall_region_dup_24h, province_code, size, '24h_dup3', expire_time)]
+                 gevent.spawn(self.recall_region_dup_24h, region_code, size, 'region_h', expire_time),
+                 gevent.spawn(self.recall_region_dup_24h, region_code, size, 'region_24h', expire_time),
+                 gevent.spawn(self.recall_region_dup_24h, region_code, size, '24h_dup2', expire_time),
+                 gevent.spawn(self.recall_region_dup_24h, region_code, size, '24h_dup3', expire_time)]
         # else:
         #     if province_code == '-1':
         #         # t = [gevent.spawn(self.rov_pool_recall_with_region_by_h, province_code, size, 'day_24h')]