Browse Source

add new filter

liqian 1 year ago
parent
commit
dc12eaa359
3 changed files with 168 additions and 9 deletions
  1. 9 2
      config.py
  2. 9 2
      db_helper.py
  3. 150 5
      utils.py

+ 9 - 2
config.py

@@ -17,6 +17,13 @@ class BaseConfig(object):
         'PIAO_QUAN_VIDEO_PLUS': 21,  # 票圈视频+
         'JOURNEY': 22,  # 票圈足迹
         'BLESSING_YEAR': 3,  # 票圈福年
+        'H5': 12,  # H5
+    }
+    # 白名单(影视,宗教)过滤Redis
+    REDIS_INFO_FILTER = {
+        'host': 'r-bp1258kbkv8dj81dwj.redis.rds.aliyuncs.com',
+        'port': 6379,
+        'password': 'Wqsd@2019',
     }
 
     REGION_CODE = {
@@ -1259,8 +1266,8 @@ class ProductionConfig(BaseConfig):
 
 def set_config():
     # 获取环境变量 ROV_SERVER_ENV
-    env = os.environ.get('ROV_SERVER_ENV')
-    # env = 'dev'
+    # env = os.environ.get('ROV_SERVER_ENV')
+    env = 'dev'
     if env is None:
         # log_.error('ENV ERROR: is None!')
         return

+ 9 - 2
db_helper.py

@@ -14,12 +14,11 @@ conn_redis = None
 
 
 class RedisHelper(object):
-    def __init__(self, params=None):
+    def __init__(self, params=None, redis_info=config_.REDIS_INFO):
         """
         初始化redis连接信息
         redis_info: redis连接信息, 格式:dict, {'host': '', 'port': '', 'password': ''}
         """
-        redis_info = config_.REDIS_INFO
         self.host = redis_info['host']
         self.port = redis_info['port']
         self.password = redis_info['password']
@@ -418,6 +417,14 @@ class RedisHelper(object):
         #         'executeTime': (time.time() - start_time) * 1000
         #     })
 
+    def mget(self, keys):
+        st_time = time.time()
+        conn = self.connect()
+        data = conn.mget(keys=keys)
+        print(f"mget time: {(time.time() - st_time) * 1000}")
+        return data
+
+
 
 #hologres_info = config_.HOLOGRES_INFO
 #conn = psycopg2.connect(**hologres_info)

+ 150 - 5
utils.py

@@ -3,6 +3,9 @@ import traceback
 import requests
 import json
 import time
+import gevent
+import pandas as pd
+import random
 
 from datetime import datetime
 # from db_helper import HologresHelper, RedisHelper, MysqlHelper
@@ -370,6 +373,7 @@ class FilterVideos(object):
         :param video_ids: 需过滤的视频列表 type-list
         :return: filtered_videos  过滤后的列表  type-list
         """
+        pre_time = time.time()
         if not self.mid or self.mid == 'null':
             # mid为空时,不做预曝光过滤
             return video_ids
@@ -386,6 +390,7 @@ class FilterVideos(object):
             return video_ids
         pe_videos = [int(video) for video in pe_videos_list]
         filtered_videos = [video_id for video_id in video_ids if video_id not in pe_videos]
+        print(f"pre res: {filtered_videos}\nexecute_time: {(time.time() - pre_time) * 1000}")
         return filtered_videos
 
     # def filter_video_status(self, video_ids):
@@ -453,6 +458,43 @@ class FilterVideos(object):
         filtered_videos = result['data']
         return filtered_videos
 
+    def filter_video_viewed_new(self, video_ids):
+        """
+        调用后端接口过滤用户已观看视频
+        :param video_ids: 视频id列表 type-list
+        :param types: 过滤参数 type-tuple, 默认(1, )
+        1-已观看 2-视频状态 3-是否进入老年人社区 4-话题状态 5-推荐状态 6-白名单过滤 7-涉政视频过滤
+        :return: filtered_videos
+        """
+        # 获取对应端的过滤参数types
+        st_time = time.time()
+        types = config_.FILTER_VIEWED_TYPES_CONFIG.get(self.app_type, None)
+        print(types)
+        if types is None:
+            types = config_.FILTER_VIEWED_TYPES_CONFIG.get('other')
+        if 6 in types:
+            types = list(types)
+            types.remove(6)
+        print(types)
+        request_data = {"appType": self.app_type,
+                        "mid": self.mid,
+                        "uid": self.uid,
+                        "types": list(types),
+                        "videoIds": video_ids}
+        # 调用http接口
+        result = request_post(request_url=config_.VIDEO_FILTER_URL, request_data=request_data, timeout=(0.1, 1))
+        print(f"view res: {result}\nexecute_time: {(time.time() - st_time) * 1000}")
+        if result is None:
+            # log_.info('过滤失败,types: {}'.format(types))
+            return []
+
+        if result['code'] != 0:
+            # log_.info('过滤失败,types: {}'.format(types))
+            return []
+
+        filtered_videos = result['data']
+        return filtered_videos
+
     def filter_shield_video(self, video_ids, shield_key_name_list):
         """
         过滤屏蔽视频视频
@@ -476,10 +518,111 @@ class FilterVideos(object):
             # video_ids = [int(video_id) for video_id in video_ids if int(video_id) not in shield_videos]
         return video_ids
 
+    def filter_movie_religion_video(self, video_ids):
+        """过滤白名单视频(影视,宗教)"""
+        # 影视 + 宗教: rov.filter.movie.{videoId}
+        # 宗教: rov.filter.religion.{videoId}
+        st_time = time.time()
+        if self.app_type not in [config_.APP_TYPE['WAN_NENG_VIDEO'],
+                                 config_.APP_TYPE['LAO_HAO_KAN_VIDEO'],
+                                 config_.APP_TYPE['ZUI_JING_QI'],
+                                 config_.APP_TYPE['H5']]:
+            # 过滤 影视 + 宗教
+            keys = [f"rov.filter.movie.{video_id}" for video_id in video_ids]
+        elif self.app_type in [config_.APP_TYPE['WAN_NENG_VIDEO'],
+                               config_.APP_TYPE['ZUI_JING_QI'],
+                               config_.APP_TYPE['H5']]:
+            # 过滤 影视 + 宗教
+            keys = [f"rov.filter.religion.{video_id}" for video_id in video_ids]
+        else:
+            print(f"m_r res: {video_ids}\nexecute_time: {(time.time() - st_time) * 1000}")
+            return video_ids
+        redis_helper = RedisHelper(redis_info=config_.REDIS_INFO_FILTER)
+        filter_videos = []
+        for i in range(len(keys)//1000 + 1):
+            video_ids_temp = video_ids[i*1000:(i+1)*1000]
+            if len(video_ids_temp) == 0:
+                break
+            mget_res = redis_helper.mget(keys=keys[i*1000:(i+1)*1000])
+            filter_videos.extend([int(data) for data in mget_res if data != 'nil'])
+        if len(filter_videos) > 0:
+            filtered_videos = set(video_ids) - set(filter_videos)
+            print(f"m_r res: {list(filtered_videos)}\nexecute_time: {(time.time() - st_time) * 1000}")
+            return list(filtered_videos)
+        else:
+            print(f"m_r res: {video_ids}\nexecute_time: {(time.time() - st_time) * 1000}")
+            return video_ids
+
+    def filter_videos_new(self, pool_type='rov', region_code=None, shield_config=None):
+        """视频过滤"""
+        # 预曝光过滤
+        st_pre = time.time()
+        filtered_pre_result = self.filter_video_previewed(self.video_ids)
+        if not filtered_pre_result:
+            return None
+
+        # 视频已曝光过滤/白名单过滤
+        st_viewed = time.time()
+        t = [
+            gevent.spawn(self.filter_video_viewed_new, filtered_pre_result),
+            gevent.spawn(self.filter_movie_religion_video, filtered_pre_result)]
+        gevent.joinall(t)
+        filtered_result_list = [i.get() for i in t]
+        filtered_viewed_result = list(set(filtered_result_list[0]) & set(filtered_result_list[1]))
+        print(f"view&m_r res: {filtered_viewed_result}\nexecute_time: {(time.time() - st_viewed) * 1000}")
+        if not filtered_viewed_result:
+            return None
+        filtered_viewed_videos = [int(video_id) for video_id in filtered_viewed_result]
+        if pool_type == 'flow':
+            # 流量池视频需过滤屏蔽视频
+            if region_code is None or shield_config is None:
+                return filtered_viewed_videos
+            else:
+                shield_key_name_list = shield_config.get(region_code, None)
+                if shield_key_name_list is not None:
+                    filtered_shield_video_ids = self.filter_shield_video(
+                        video_ids=filtered_viewed_videos, shield_key_name_list=shield_key_name_list
+                    )
+                    log_.info({
+                        'logTimestamp': int(time.time() * 1000),
+                        'pool_type': pool_type,
+                        'request_id': self.request_id,
+                        'app_type': self.app_type,
+                        'mid': self.mid,
+                        'uid': self.uid,
+                        'operation': 'shield_filter',
+                        'request_videos': filtered_viewed_videos,
+                        'shield_filter_result': filtered_shield_video_ids,
+                        'executeTime': (time.time() - st_viewed) * 1000
+                    })
+                    return filtered_shield_video_ids
+                else:
+                    return filtered_viewed_videos
+        else:
+            return filtered_viewed_videos
+
 
 if __name__ == '__main__':
-    # filter_ = FilterVideos(app_type=1, mid='22', uid='www', video_ids=[1, 2, 3, 55])
-    # filter_.filter_videos()
+    user = [
+        ('weixin_openid_o0w175fDc8pNnywrYN49E341tKfI', ''),
+        ('weixin_openid_o0w175YwC3hStzcR5DAQdbgzdMeI', ''),
+        ('weixin_openid_o0w175ftZDl6VJVDx9la3WVPh7mU', '15900461'),
+        ('weixin_openid_o0w175SPqpCVRcp7x1XvnX4qpIvI', '19659040'),
+        ('weixin_openid_o0w175cOnguapyWIrDrHkOWl4oFQ', '31210128'),
+        ('weixin_openid_o0w175UXYId-o71e1Q3SOheYNteQ', '33099722'),
+        ('weixin_openid_o0w175QQ5b42AtOe50bchrFgcttA', ''),
+        ('weixin_openid_o0w175bgaPlfLsp3YLDKWqLWtXX8', '35371534'),
+        ('weixin_openid_o0w175eRpvbmV6nOhM1VTyyLICWA', '30488803'),
+        ('weixin_openid_o0w175TZYvG47pQkOjyJFoxQuqsw', '')
+    ]
+    video_df = pd.read_csv('/Users/liqian/Documents/liqian/work/PythonProject/test/data/videoids.csv')
+    videoid_list = video_df['videoid'].tolist()
+    for mid, uid in user:
+        video_ids = random.sample(videoid_list, 1000)
+        start_time = time.time()
+        filter_ = FilterVideos(request_id=f'{mid} - {uid}', app_type=0, mid=mid, uid=uid, video_ids=video_ids)
+        res = filter_.filter_videos_new()
+        print(f"res: {res}\nexecute_time: {(time.time() - start_time) * 1000}")
     # filter_.filter_video_status(video_ids=[1, 3, 5])
 
     # videos = [{'videoId': 9034659, 'flowPool': '3#11#3#1637824188547'}, {'videoId': 9035052, 'flowPool': '3#11#3#1637824172827'}]
@@ -491,9 +634,11 @@ if __name__ == '__main__':
 
     # update_video_w_h_rate(video_id=113, key_name='')
 
-    mid = "weixin_openid_obHDW5c4g3aULfCWh-68LcUSxCB"
+    # mid = "weixin_openid_obHDW5c4g3aULfCWh-68LcUSxCB"
     # request_url = f"{config_.GET_USER_30DayReturnCnt_URL}{mid}"
     # res = request_get(request_url=request_url, timeout=100)
-    res = get_user_has30day_return(mid=mid)
-    print(res, type(res))
+    # res = get_user_has30day_return(mid=mid)
+    # print(res, type(res))
+
+