Browse Source

merge filter-update-2023051011

liqian 2 years ago
parent
commit
184b0b36b8
3 changed files with 120 additions and 11 deletions
  1. 7 0
      config.py
  2. 9 3
      db_helper.py
  3. 104 8
      utils.py

+ 7 - 0
config.py

@@ -17,6 +17,13 @@ class BaseConfig(object):
         'PIAO_QUAN_VIDEO_PLUS': 21,  # 票圈视频+
         'JOURNEY': 22,  # 票圈足迹
         'BLESSING_YEAR': 3,  # 票圈福年
+        'H5': 12,  # H5
+    }
+    # 白名单(影视,宗教)过滤Redis
+    REDIS_INFO_FILTER = {
+        'host': 'r-bp1258kbkv8dj81dwj.redis.rds.aliyuncs.com',
+        'port': 6379,
+        'password': 'Wqsd@2019',
     }
 
     REGION_CODE = {

+ 9 - 3
db_helper.py

@@ -14,12 +14,11 @@ conn_redis = None
 
 
 class RedisHelper(object):
-    def __init__(self, params=None):
+    def __init__(self, params=None, redis_info=config_.REDIS_INFO):
         """
         初始化redis连接信息
         redis_info: redis连接信息, 格式:dict, {'host': '', 'port': '', 'password': ''}
         """
-        redis_info = config_.REDIS_INFO
         self.host = redis_info['host']
         self.port = redis_info['port']
         self.password = redis_info['password']
@@ -421,7 +420,14 @@ class RedisHelper(object):
     def get_batch_key(self, name_list):
         conn = self.connect()
         res = conn.mget(name_list)
-        return  res
+        return res
+
+    def mget(self, keys):
+        st_time = time.time()
+        conn = self.connect()
+        data = conn.mget(keys=keys)
+        print(f"mget time: {(time.time() - st_time) * 1000}")
+        return data
 
 #hologres_info = config_.HOLOGRES_INFO
 #conn = psycopg2.connect(**hologres_info)

+ 104 - 8
utils.py

@@ -3,6 +3,9 @@ import traceback
 import requests
 import json
 import time
+import gevent
+import pandas as pd
+import random
 
 from datetime import datetime
 # from db_helper import HologresHelper, RedisHelper, MysqlHelper
@@ -370,6 +373,7 @@ class FilterVideos(object):
         :param video_ids: 需过滤的视频列表 type-list
         :return: filtered_videos  过滤后的列表  type-list
         """
+        pre_time = time.time()
         if not self.mid or self.mid == 'null':
             # mid为空时,不做预曝光过滤
             return video_ids
@@ -389,6 +393,7 @@ class FilterVideos(object):
         pe_videos = [int(video) for video in pe_videos_list]
         print("pe_videos:", pe_videos)
         filtered_videos = [video_id for video_id in video_ids if video_id not in pe_videos]
+        print(f"pre res: {filtered_videos}\nexecute_time: {(time.time() - pre_time) * 1000}")
         return filtered_videos
 
     # def filter_video_status(self, video_ids):
@@ -459,6 +464,43 @@ class FilterVideos(object):
         filtered_videos = result['data']
         return filtered_videos
 
+    def filter_video_viewed_new(self, video_ids):
+        """
+        调用后端接口过滤用户已观看视频
+        :param video_ids: 视频id列表 type-list
+        :param types: 过滤参数 type-tuple, 默认(1, )
+        1-已观看 2-视频状态 3-是否进入老年人社区 4-话题状态 5-推荐状态 6-白名单过滤 7-涉政视频过滤
+        :return: filtered_videos
+        """
+        # 获取对应端的过滤参数types
+        st_time = time.time()
+        types = config_.FILTER_VIEWED_TYPES_CONFIG.get(self.app_type, None)
+        print(types)
+        if types is None:
+            types = config_.FILTER_VIEWED_TYPES_CONFIG.get('other')
+        if 6 in types:
+            types = list(types)
+            types.remove(6)
+        print(types)
+        request_data = {"appType": self.app_type,
+                        "mid": self.mid,
+                        "uid": self.uid,
+                        "types": list(types),
+                        "videoIds": video_ids}
+        # 调用http接口
+        result = request_post(request_url=config_.VIDEO_FILTER_URL, request_data=request_data, timeout=(0.1, 1))
+        print(f"view res: {result}\nexecute_time: {(time.time() - st_time) * 1000}")
+        if result is None:
+            # log_.info('过滤失败,types: {}'.format(types))
+            return []
+
+        if result['code'] != 0:
+            # log_.info('过滤失败,types: {}'.format(types))
+            return []
+
+        filtered_videos = result['data']
+        return filtered_videos
+
     def filter_shield_video(self, video_ids, shield_key_name_list):
         """
         过滤屏蔽视频视频
@@ -527,7 +569,42 @@ class FilterVideos(object):
         else:
             return normal_video_list, flow_video_list
 
-         def filter_videos_new(self, pool_type='rov', region_code=None, shield_config=None):
+    def filter_movie_religion_video(self, video_ids):
+        """过滤白名单视频(影视,宗教)"""
+        # 影视 + 宗教: rov.filter.movie.{videoId}
+        # 宗教: rov.filter.religion.{videoId}
+        st_time = time.time()
+        if self.app_type not in [config_.APP_TYPE['WAN_NENG_VIDEO'],
+                                 config_.APP_TYPE['LAO_HAO_KAN_VIDEO'],
+                                 config_.APP_TYPE['ZUI_JING_QI'],
+                                 config_.APP_TYPE['H5']]:
+            # 过滤 影视 + 宗教
+            keys = [f"rov.filter.movie.{video_id}" for video_id in video_ids]
+        elif self.app_type in [config_.APP_TYPE['WAN_NENG_VIDEO'],
+                               config_.APP_TYPE['ZUI_JING_QI'],
+                               config_.APP_TYPE['H5']]:
+            # 过滤 影视 + 宗教
+            keys = [f"rov.filter.religion.{video_id}" for video_id in video_ids]
+        else:
+            print(f"m_r res: {video_ids}\nexecute_time: {(time.time() - st_time) * 1000}")
+            return video_ids
+        redis_helper = RedisHelper(redis_info=config_.REDIS_INFO_FILTER)
+        filter_videos = []
+        for i in range(len(keys)//1000 + 1):
+            video_ids_temp = video_ids[i*1000:(i+1)*1000]
+            if len(video_ids_temp) == 0:
+                break
+            mget_res = redis_helper.mget(keys=keys[i*1000:(i+1)*1000])
+            filter_videos.extend([int(data) for data in mget_res if data is not None])
+        if len(filter_videos) > 0:
+            filtered_videos = set(video_ids) - set(filter_videos)
+            print(f"m_r res: {list(filtered_videos)}\nexecute_time: {(time.time() - st_time) * 1000}")
+            return list(filtered_videos)
+        else:
+            print(f"m_r res: {video_ids}\nexecute_time: {(time.time() - st_time) * 1000}")
+            return video_ids
+
+    def filter_videos_new(self, pool_type='rov', region_code=None, shield_config=None):
         """视频过滤"""
         # 预曝光过滤
         st_pre = time.time()
@@ -572,12 +649,29 @@ class FilterVideos(object):
                     return filtered_shield_video_ids
                 else:
                     return filtered_viewed_videos
-        else:
-            return filtered_viewed_videos
+
 
 if __name__ == '__main__':
-    # filter_ = FilterVideos(app_type=1, mid='22', uid='www', video_ids=[1, 2, 3, 55])
-    # filter_.filter_videos()
+    user = [
+        ('weixin_openid_o0w175fDc8pNnywrYN49E341tKfI', ''),
+        ('weixin_openid_o0w175YwC3hStzcR5DAQdbgzdMeI', ''),
+        ('weixin_openid_o0w175ftZDl6VJVDx9la3WVPh7mU', '15900461'),
+        ('weixin_openid_o0w175SPqpCVRcp7x1XvnX4qpIvI', '19659040'),
+        ('weixin_openid_o0w175cOnguapyWIrDrHkOWl4oFQ', '31210128'),
+        ('weixin_openid_o0w175UXYId-o71e1Q3SOheYNteQ', '33099722'),
+        ('weixin_openid_o0w175QQ5b42AtOe50bchrFgcttA', ''),
+        ('weixin_openid_o0w175bgaPlfLsp3YLDKWqLWtXX8', '35371534'),
+        ('weixin_openid_o0w175eRpvbmV6nOhM1VTyyLICWA', '30488803'),
+        ('weixin_openid_o0w175TZYvG47pQkOjyJFoxQuqsw', '')
+    ]
+    video_df = pd.read_csv('./data/videoids.csv')
+    videoid_list = video_df['videoid'].tolist()
+    for mid, uid in user:
+        video_ids = random.sample(videoid_list, 1000)
+        start_time = time.time()
+        filter_ = FilterVideos(request_id=f'{mid} - {uid}', app_type=0, mid=mid, uid=uid, video_ids=video_ids)
+        res = filter_.filter_videos_new()
+        print(f"res: {res}\nexecute_time: {(time.time() - start_time) * 1000}")
     # filter_.filter_video_status(video_ids=[1, 3, 5])
 
     # videos = [{'videoId': 9034659, 'flowPool': '3#11#3#1637824188547'}, {'videoId': 9035052, 'flowPool': '3#11#3#1637824172827'}]
@@ -589,9 +683,11 @@ if __name__ == '__main__':
 
     # update_video_w_h_rate(video_id=113, key_name='')
 
-    mid = "weixin_openid_obHDW5c4g3aULfCWh-68LcUSxCB"
+    # mid = "weixin_openid_obHDW5c4g3aULfCWh-68LcUSxCB"
     # request_url = f"{config_.GET_USER_30DayReturnCnt_URL}{mid}"
     # res = request_get(request_url=request_url, timeout=100)
-    res = get_user_has30day_return(mid=mid)
-    print(res, type(res))
+    # res = get_user_has30day_return(mid=mid)
+    # print(res, type(res))
+
+