Procházet zdrojové kódy

add shield_videos_filter

liqian před 2 roky
rodič
revize
77cb043a2c
3 změnil soubory, kde provedl 90 přidání a 42 odebrání
  1. 17 6
      config.py
  2. 68 35
      utils.py
  3. 5 1
      video_recall.py

+ 17 - 6
config.py

@@ -409,12 +409,23 @@ class BaseConfig(object):
         "090": {"app_type": 19, "data_key": "data3", "rule_key": "rule3"},
     }
     REGION_CODE = {
-        '河北省': '130000', '山西省': '140000', '辽宁省': '210000', '吉林省': '220000', '黑龙江省': '230000', '江苏省': '320000',
-        '浙江省': '330000', '安徽省': '340000', '福建省': '350000', '江西省': '360000', '山东省': '370000', '河南省': '410000',
-        '湖北省': '420000', '湖南省': '430000', '广东省': '440000', '海南省': '460000', '四川省': '510000', '贵州省': '520000',
-        '云南省': '530000', '陕西省': '610000', '甘肃省': '620000', '青海省': '630000', '台湾省': '710000', '北京': '110000',
-        '天津': '120000', '内蒙古': '150000', '上海': '310000', '广西': '450000', '重庆': '500000', '西藏': '540000',
-        '宁夏': '640000', '新疆': '650000', '香港': '810000', '澳门': '820000'
+        '北京': '110000', '天津': '120000', '河北省': '130000', '山西省': '140000', '内蒙古': '150000',
+        '辽宁省': '210000', '吉林省': '220000', '黑龙江省': '230000',
+        '上海': '310000', '江苏省': '320000', '浙江省': '330000', '安徽省': '340000', '福建省': '350000', '江西省': '360000',
+        '山东省': '370000',
+        '河南省': '410000', '湖北省': '420000', '湖南省': '430000', '广东省': '440000', '广西': '450000', '海南省': '460000',
+        '重庆': '500000', '四川省': '510000', '贵州省': '520000', '云南省': '530000', '西藏': '540000',
+        '陕西省': '610000', '甘肃省': '620000', '青海省': '630000', '宁夏': '640000', '新疆': '650000',
+        '台湾省': '710000', '香港': '810000', '澳门': '820000',
+    }
+
+    # 本山祝福视频redis存储key
+    BENSHAN_ZHUFU_KEY_NAME = 'com.weiqu.video.benshanzf'
+    # 屏蔽视频配置 key:region_code, value:videos key list
+    SHIELD_CONFIG = {
+        '110000': [BENSHAN_ZHUFU_KEY_NAME, ],
+        '500000': [BENSHAN_ZHUFU_KEY_NAME, ],
+        '-1': [BENSHAN_ZHUFU_KEY_NAME, ],
     }
 
 

+ 68 - 35
utils.py

@@ -238,7 +238,7 @@ class FilterVideos(object):
         else:
             return [int(video_id) for video_id in filtered_viewed_result]
 
-    def filter_videos(self, pool_type='rov'):
+    def filter_videos(self, pool_type='rov', province_code=None):
         """视频过滤"""
         # 预曝光过滤
         st_pre = time.time()
@@ -286,8 +286,22 @@ class FilterVideos(object):
         })
         if not filtered_viewed_result:
             return None
+        filtered_viewed_videos = [int(video_id) for video_id in filtered_viewed_result]
+        if pool_type == 'flow':
+            # 流量池视频需过滤屏蔽视频
+            if province_code is None:
+                return filtered_viewed_videos
+            else:
+                shield_key_name_list = config_.SHIELD_CONFIG.get(province_code, None)
+                if shield_key_name_list is not None:
+                    filtered_shield_video_ids = self.filter_shield_video(
+                        video_ids=filtered_viewed_videos, shield_key_name_list=shield_key_name_list
+                    )
+                    return filtered_shield_video_ids
+                else:
+                    return filtered_viewed_videos
         else:
-            return [int(video_id) for video_id in filtered_viewed_result]
+            return filtered_viewed_videos
 
     def filter_video_previewed(self, video_ids):
         """
@@ -313,39 +327,39 @@ class FilterVideos(object):
         filtered_videos = [video_id for video_id in video_ids if video_id not in pe_videos]
         return filtered_videos
 
-    def filter_video_status(self, video_ids):
-        """
-        对视频状态进行过滤
-        :param video_ids: 视频id列表 type-list
-        :return: filtered_videos
-        """
-        if len(video_ids) == 1:
-            sql = "set hg_experimental_enable_shard_pruning=off; " \
-                  "SELECT video_id " \
-                  "FROM {} " \
-                  "WHERE audit_status = 5 " \
-                  "AND applet_rec_status IN (1, -6) " \
-                  "AND open_status = 1 " \
-                  "AND payment_status = 0 " \
-                  "AND encryption_status != 5 " \
-                  "AND transcoding_status = 3 " \
-                  "AND video_id IN ({});".format(config_.VIDEO_STATUS, video_ids[0])
-        else:
-            sql = "set hg_experimental_enable_shard_pruning=off; " \
-                  "SELECT video_id " \
-                  "FROM {} " \
-                  "WHERE audit_status = 5 " \
-                  "AND applet_rec_status IN (1, -6) " \
-                  "AND open_status = 1 " \
-                  "AND payment_status = 0 " \
-                  "AND encryption_status != 5 " \
-                  "AND transcoding_status = 3 " \
-                  "AND video_id IN {};".format(config_.VIDEO_STATUS, tuple(video_ids))
-
-        hologres_helper = HologresHelper()
-        data = hologres_helper.get_data(sql=sql)
-        filtered_videos = [int(temp[0]) for temp in data]
-        return filtered_videos
+    # def filter_video_status(self, video_ids):
+    #     """
+    #     对视频状态进行过滤
+    #     :param video_ids: 视频id列表 type-list
+    #     :return: filtered_videos
+    #     """
+    #     if len(video_ids) == 1:
+    #         sql = "set hg_experimental_enable_shard_pruning=off; " \
+    #               "SELECT video_id " \
+    #               "FROM {} " \
+    #               "WHERE audit_status = 5 " \
+    #               "AND applet_rec_status IN (1, -6) " \
+    #               "AND open_status = 1 " \
+    #               "AND payment_status = 0 " \
+    #               "AND encryption_status != 5 " \
+    #               "AND transcoding_status = 3 " \
+    #               "AND video_id IN ({});".format(config_.VIDEO_STATUS, video_ids[0])
+    #     else:
+    #         sql = "set hg_experimental_enable_shard_pruning=off; " \
+    #               "SELECT video_id " \
+    #               "FROM {} " \
+    #               "WHERE audit_status = 5 " \
+    #               "AND applet_rec_status IN (1, -6) " \
+    #               "AND open_status = 1 " \
+    #               "AND payment_status = 0 " \
+    #               "AND encryption_status != 5 " \
+    #               "AND transcoding_status = 3 " \
+    #               "AND video_id IN {};".format(config_.VIDEO_STATUS, tuple(video_ids))
+    #
+    #     hologres_helper = HologresHelper()
+    #     data = hologres_helper.get_data(sql=sql)
+    #     filtered_videos = [int(temp[0]) for temp in data]
+    #     return filtered_videos
 
     def filter_video_viewed(self, video_ids, types=(1, 6,)):
         """
@@ -373,6 +387,25 @@ class FilterVideos(object):
         filtered_videos = result['data']
         return filtered_videos
 
+    def filter_shield_video(self, video_ids, shield_key_name_list):
+        """
+        过滤屏蔽视频视频
+        :param video_ids: 需过滤的视频列表 type-list
+        :param shield_key_name_list: 过滤视频 redis-key
+        :return: filtered_videos  过滤后的列表  type-list
+        """
+        if len(video_ids) == 0:
+            return video_ids
+        # 根据Redis缓存中的数据过滤
+        redis_helper = RedisHelper()
+        for shield_key_name in shield_key_name_list:
+            shield_videos_list = redis_helper.get_data_from_set(key_name=shield_key_name)
+            if not shield_videos_list:
+                continue
+            shield_videos = [int(video) for video in shield_videos_list]
+            video_ids = [video_id for video_id in video_ids if video_id not in shield_videos]
+        return video_ids
+
 
 if __name__ == '__main__':
     # filter_ = FilterVideos(app_type=1, mid='22', uid='www', video_ids=[1, 2, 3, 55])

+ 5 - 1
video_recall.py

@@ -434,6 +434,10 @@ class PoolRecall(object):
     def flow_pool_recall(self, size=10, flow_pool_id=None):
         """从流量池中获取视频"""
         start_time = time.time()
+        # 获取provinceCode
+        province_code = self.client_info.get('provinceCode', '-1')
+        if province_code == '':
+            province_code = '-1'
         flow_pool_key = self.get_pool_redis_key('flow', flow_pool_id=flow_pool_id)
         print(flow_pool_key)
         flow_pool_recall_result = []
@@ -475,7 +479,7 @@ class PoolRecall(object):
             # 过滤
             filter_ = FilterVideos(request_id=self.request_id,
                                    app_type=self.app_type, mid=self.mid, uid=self.uid, video_ids=video_ids)
-            ge = gevent.spawn(filter_.filter_videos, pool_type='flow')
+            ge = gevent.spawn(filter_.filter_videos, pool_type='flow', province_code=province_code)
             ge.join()
             filtered_result = ge.get()
             #filtered_result = filter_.filter_videos()