Browse Source

Merge branch 'drop_base_20230608' of algorithm/rov-server into master

linfan 1 year ago
parent
commit
ebb9adb0c4
3 changed files with 197 additions and 47 deletions
  1. 31 31
      recommend.py
  2. 116 0
      utils.py
  3. 50 16
      video_recall.py

+ 31 - 31
recommend.py

@@ -211,21 +211,21 @@ def video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type, al
     if app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
         t = [gevent.spawn(pool_recall.rov_pool_recall_with_region, size, expire_time, ab_code, exp_config)]
         if ab_code==60058:
-            t.append(gevent.spawn(pool_recall.get_U2I_reall, mid))
+            t.append(gevent.spawn(pool_recall.get_U2I_reall, mid, exp_config))
         elif  ab_code==60059:
-            t.append(gevent.spawn(pool_recall.get_word2vec_item_reall))
+            t.append(gevent.spawn(pool_recall.get_word2vec_item_reall, exp_config))
         elif  ab_code==60061:
-            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
+            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter, exp_config))
     else:
         t = [gevent.spawn(pool_recall.rov_pool_recall_with_region, size, expire_time, ab_code, exp_config),
              gevent.spawn(pool_recall.flow_pool_recall, size, config_.QUICK_FLOW_POOL_ID),
              gevent.spawn(pool_recall.flow_pool_recall, size)]
         if ab_code==60058:
-            t.append(gevent.spawn(pool_recall.get_U2I_reall, mid))
+            t.append(gevent.spawn(pool_recall.get_U2I_reall, mid, exp_config))
         elif ab_code == 60059:
-            t.append(gevent.spawn(pool_recall.get_word2vec_item_reall))
+            t.append(gevent.spawn(pool_recall.get_word2vec_item_reall, exp_config))
         elif ab_code == 60061:
-            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
+            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter, exp_config))
 
     # 最惊奇相关推荐实验
     # elif ab_code == config_.AB_CODE['top_video_relevant_appType_19']:
@@ -432,11 +432,11 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
     if app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
         t = [gevent.spawn(pool_recall.rov_pool_recall_with_region, size, expire_time, ab_code, exp_config)]
         if ab_code ==60054:
-            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
+            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter, exp_config))
         if ab_code == 60055:
-            t.append(gevent.spawn(pool_recall.get_3days_hot_item_reall))
+            t.append(gevent.spawn(pool_recall.get_3days_hot_item_reall, exp_config))
         if ab_code == 60056:
-            t.append(gevent.spawn(pool_recall.get_hot_item_reall))
+            t.append(gevent.spawn(pool_recall.get_hot_item_reall, exp_config))
     else:
         t = [gevent.spawn(pool_recall.rov_pool_recall_with_region, size, expire_time, ab_code, exp_config),
              gevent.spawn(pool_recall.flow_pool_recall, size, config_.QUICK_FLOW_POOL_ID),
@@ -1600,15 +1600,15 @@ def video_homepage_recommend(request_id, mid, uid, size, app_type, algo_type,
                              no_op_flag=no_op_flag, old_video_index=old_video_index,
                              params=params, rule_key_30day=rule_key_30day, shield_config=shield_config)
     # old base (test4, test5)
-    elif ab_code == 60050 or ab_code == 60051:
-        result = video_recommend(request_id=request_id,
-                                 mid=mid, uid=uid, app_type=app_type,
-                                 size=size, top_K=top_K, flow_pool_P=flow_pool_P,
-                                 algo_type=algo_type, client_info=client_info,
-                                 ab_code=ab_code, expire_time=expire_time,
-                                 rule_key=rule_key, data_key=data_key,
-                                 no_op_flag=no_op_flag, old_video_index=old_video_index,
-                                 params=params, rule_key_30day=rule_key_30day, shield_config=shield_config)
+    # elif ab_code == 60050 or ab_code == 60051:
+    #     result = video_recommend(request_id=request_id,
+    #                              mid=mid, uid=uid, app_type=app_type,
+    #                              size=size, top_K=top_K, flow_pool_P=flow_pool_P,
+    #                              algo_type=algo_type, client_info=client_info,
+    #                              ab_code=ab_code, expire_time=expire_time,
+    #                              rule_key=rule_key, data_key=data_key,
+    #                              no_op_flag=no_op_flag, old_video_index=old_video_index,
+    #                              params=params, rule_key_30day=rule_key_30day, shield_config=shield_config)
     # 60052,60053,60057(test7, test8, test6)
     # simrecal: 60054
     # 3 days , 7days recall: 60005, test1
@@ -1653,8 +1653,8 @@ def video_homepage_recommend(request_id, mid, uid, size, app_type, algo_type,
     update_redis_st = time.time()
     if ab_code == 60047 or  ab_code == 60048 or  ab_code == 60049:
         update_flow_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
-    elif ab_code == 60050 or  ab_code == 60051:
-        update_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
+    # elif ab_code == 60050 or  ab_code == 60051:
+    #     update_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
     elif ab_code == 60052 or ab_code == 60053 or ab_code == 60054 or ab_code == 60055 \
             or ab_code == 60056 or ab_code==60057:
         update_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
@@ -1731,15 +1731,15 @@ def video_relevant_recommend(request_id, video_id, mid, uid, size, app_type, ab_
                                  old_video_index=old_video_index, video_id=video_id,
                                  params=params, rule_key_30day=rule_key_30day, shield_config=shield_config)
         # log_.info({
-    elif ab_code == 60050 or ab_code == 60051:
-        result = video_recommend(request_id=request_id,
-                                 mid=mid, uid=uid, app_type=app_type,
-                                 size=size, top_K=top_K, flow_pool_P=flow_pool_P,
-                                 algo_type='', client_info=client_info,
-                                 ab_code=ab_code, expire_time=expire_time,
-                                 rule_key=rule_key, data_key=data_key, no_op_flag=no_op_flag,
-                                 old_video_index=old_video_index, video_id=video_id,
-                                 params=params, rule_key_30day=rule_key_30day, shield_config=shield_config)
+    # elif ab_code == 60050 or ab_code == 60051:
+    #     result = video_recommend(request_id=request_id,
+    #                              mid=mid, uid=uid, app_type=app_type,
+    #                              size=size, top_K=top_K, flow_pool_P=flow_pool_P,
+    #                              algo_type='', client_info=client_info,
+    #                              ab_code=ab_code, expire_time=expire_time,
+    #                              rule_key=rule_key, data_key=data_key, no_op_flag=no_op_flag,
+    #                              old_video_index=old_video_index, video_id=video_id,
+    #                              params=params, rule_key_30day=rule_key_30day, shield_config=shield_config)
     elif ab_code == 60052 or ab_code == 60053 or ab_code == 60054 or ab_code == 60055 or \
             ab_code == 60056 or ab_code==60057:
         result = video_old_recommend(request_id=request_id,
@@ -1782,8 +1782,8 @@ def video_relevant_recommend(request_id, video_id, mid, uid, size, app_type, ab_
     update_redis_st = time.time()
     if ab_code == 60047 or ab_code == 60048 or  ab_code == 60049:
          update_flow_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
-    elif ab_code == 60050 or ab_code == 60051:
-         update_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
+    # elif ab_code == 60050 or ab_code == 60051:
+    #      update_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
     elif ab_code == 60052 or ab_code == 60053 or ab_code == 60054 or ab_code == 60055 or \
             ab_code == 60056 or ab_code==60057:
          update_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)

+ 116 - 0
utils.py

@@ -665,6 +665,122 @@ class FilterVideos(object):
                     return filtered_viewed_videos
 
 
+
+    def filter_videos_status(self, pool_type='rov', region_code=None, shield_config=None):
+        """视频过滤"""
+        # 预曝光过滤
+        st_pre = time.time()
+        filtered_pre_result = self.filter_video_previewed(self.video_ids)
+        # print("filtered_pre:", (time.time()-st_pre)*1000)
+        # et_pre = time.time()
+        # log_.info({
+        #     'logTimestamp': int(time.time() * 1000),
+        #     'request_id': self.request_id,
+        #     'app_type': self.app_type,
+        #     'mid': self.mid,
+        #     'uid': self.uid,
+        #     'operation': 'preview_filter',
+        #     'request_videos': self.video_ids,
+        #     'preview_filter_result': filtered_pre_result,
+        #     'executeTime': (time.time() - st_pre) * 1000
+        # })
+        if not filtered_pre_result:
+            return None
+
+        # 视频状态过滤采用离线定时过滤方案
+        # 视频状态过滤
+        # st_status = time.time()
+        # filtered_status_result = self.filter_video_status(video_ids=filtered_pre_result)
+        # et_status = time.time()
+        # log_.info('filter by video status: result = {}, execute time = {}ms'.format(
+        #     filtered_status_result, (et_status - st_status) * 1000))
+        # if not filtered_status_result:
+        #     return None
+
+        # 视频已曝光过滤
+        st_viewed = time.time()
+        filtered_viewed_result = self.filter_video_viewed_status(video_ids=filtered_pre_result)
+        # print("filtered_pre:", (time.time() - st_viewed) * 1000)
+        # et_viewed = time.time()
+        # log_.info({
+        #     'logTimestamp': int(time.time() * 1000),
+        #     'pool_type': pool_type,
+        #     'request_id': self.request_id,
+        #     'app_type': self.app_type,
+        #     'mid': self.mid,
+        #     'uid': self.uid,
+        #     'operation': 'view_filter',
+        #     'request_videos': filtered_pre_result,
+        #     'view_filter_result': filtered_viewed_result,
+        #     'executeTime': (time.time() - st_viewed) * 1000
+        # })
+        if not filtered_viewed_result:
+            return None
+        filtered_viewed_videos = [int(video_id) for video_id in filtered_viewed_result]
+        if pool_type == 'flow' or pool_type=='normal':
+            # 流量池视频需过滤屏蔽视频
+            if region_code is None or shield_config is None:
+                return filtered_viewed_videos
+            else:
+                shield_key_name_list = shield_config.get(region_code, None)
+                if shield_key_name_list is not None:
+                    filtered_shield_video_ids = self.filter_shield_video(
+                        video_ids=filtered_viewed_videos, shield_key_name_list=shield_key_name_list
+                    )
+                    log_.info({
+                        'logTimestamp': int(time.time() * 1000),
+                        'pool_type': pool_type,
+                        'request_id': self.request_id,
+                        'app_type': self.app_type,
+                        'mid': self.mid,
+                        'uid': self.uid,
+                        'operation': 'shield_filter',
+                        'request_videos': filtered_viewed_videos,
+                        'shield_filter_result': filtered_shield_video_ids,
+                        'executeTime': (time.time() - st_viewed) * 1000
+                    })
+                    # print("filtered_pre flow:", (time.time() - st_viewed) * 1000)
+                    return filtered_shield_video_ids
+                else:
+                    return filtered_viewed_videos
+        else:
+            return filtered_viewed_videos
+    def filter_video_viewed_status(self, video_ids, types=(1, 6,)):
+        """
+                   调用后端接口过滤用户已观看视频
+                   :param video_ids: 视频id列表 type-list
+                   :param types: 过滤参数 type-tuple, 默认(1, )
+                   1-已观看 2-视频状态 3-是否进入老年人社区 4-话题状态 5-推荐状态 6-白名单过滤 7-涉政视频过滤
+                   :return: filtered_videos
+                   """
+        # 获取对应端的过滤参数types
+        types = config_.FILTER_VIEWED_TYPES_CONFIG.get(self.app_type, None)
+        if types is None:
+            types = config_.FILTER_VIEWED_TYPES_CONFIG.get('other')
+        types = list(types)
+        types.append(2)
+        request_data = {"appType": self.app_type,
+                        "mid": self.mid,
+                        "uid": self.uid,
+                        "types": types,
+                        "videoIds": video_ids}
+        # print(request_data)
+        # 调用http接口
+        result = request_post(request_url=config_.VIDEO_FILTER_URL, request_data=request_data, timeout=(0.1, 1))
+
+        # print("result:", result)
+        if result is None:
+            # print("result is None")
+            # log_.info('过滤失败,types: {}'.format(types))
+            return []
+
+        if result['code'] != 0:
+            # log_.info('过滤失败,types: {}'.format(types))
+            return []
+
+        filtered_videos = result['data']
+        return filtered_videos
+
 if __name__ == '__main__':
     user = [
         ('weixin_openid_o0w175fDc8pNnywrYN49E341tKfI', ''),

+ 50 - 16
video_recall.py

@@ -2137,7 +2137,7 @@ class PoolRecall(object):
                 )
         return recall_result[:200]
 
-    def get_sim_hot_item_reall_filter(self):
+    def get_sim_hot_item_reall_filter(self, exp_config=None):
         if self.video_id is None:
             return  []
         recall_key = "sim_hot_" + str(self.video_id)
@@ -2162,18 +2162,25 @@ class PoolRecall(object):
                     continue
         if len(video_ids)<=0:
             return  recall_result
-        video_ids = video_ids[:50]
+        recall_num = 20
+        try:
+            if exp_config and exp_config['recall_get_num']:
+                recall_num = int(exp_config['recall_get_num'])
+        except:
+            recall_num = 20
+        #print("recall_num:", recall_num)
+        video_ids = video_ids[:recall_num]
         #print(video_ids)
         filter_ = FilterVideos(request_id=self.request_id,
                                app_type=self.app_type, mid=self.mid, uid=self.uid, video_ids=video_ids)
-        filtered_viewed_videos = filter_.filter_videos(pool_type='normal')
+        filtered_viewed_videos = filter_.filter_videos_status(pool_type='normal')
         if filtered_viewed_videos is None:
             return recall_result
-        print("filtered_viewed_videos:", filtered_viewed_videos)
+        #print("filtered_viewed_videos:", filtered_viewed_videos)
         for vid in filtered_viewed_videos:
             if vid in recall_dict:
                 recall_result.append(recall_dict[vid])
-        return recall_result[:30]
+        return recall_result
     # get region_hour_recall
     def get_region_hour_recall(self, size=4, region_code='-1'):
         pool_key_prefix = config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H
@@ -2377,7 +2384,7 @@ class PoolRecall(object):
 
         return flow_pool_recall_result[:size]
 
-    def get_3days_hot_item_reall(self):
+    def get_3days_hot_item_reall(self, exp_config=None):
         recall_key = "hot_3day:"
         #print("recall_key:", recall_key)
         data = self.redis_helper.get_data_from_redis(key_name=recall_key)
@@ -2401,11 +2408,17 @@ class PoolRecall(object):
         #print("vid len:", len(video_ids))
         if len(video_ids)<=0:
             return  recall_result
-        video_ids = video_ids[:30]
+        recall_num = 20
+        try:
+            if exp_config and exp_config['recall_get_num']:
+                recall_num = int(exp_config['recall_get_num'])
+        except:
+            recall_num = 20
+        video_ids = video_ids[:recall_num]
         #print(video_ids)
         filter_ = FilterVideos(request_id=self.request_id,
                                app_type=self.app_type, mid=self.mid, uid=self.uid, video_ids=video_ids)
-        filtered_viewed_videos = filter_.filter_videos(pool_type='normal')
+        filtered_viewed_videos = filter_.filter_videos_status(pool_type='normal')
         if filtered_viewed_videos is None:
             return recall_result
         #print("filtered_viewed_videos:", filtered_viewed_videos)
@@ -2416,7 +2429,7 @@ class PoolRecall(object):
         #print("recall_dict:", recall_dict)
         return recall_result
 
-    def get_hot_item_reall(self):
+    def get_hot_item_reall(self,exp_config=None):
         #recall_key = "hot_video:"
         recall_key = "hot_video:"
         #print("recall_key:", recall_key)
@@ -2440,7 +2453,14 @@ class PoolRecall(object):
                     continue
         if len(video_ids)<=0:
             return  recall_result
-        video_ids = video_ids[:30]
+        recall_num = 20
+        try:
+            if exp_config and exp_config['recall_get_num']:
+                recall_num = int(exp_config['recall_get_num'])
+        except:
+            recall_num = 20
+        #print("recall_num:", recall_num)
+        video_ids = video_ids[:recall_num]
         #print(video_ids)
         filter_ = FilterVideos(request_id=self.request_id,
                                app_type=self.app_type, mid=self.mid, uid=self.uid, video_ids=video_ids)
@@ -2492,7 +2512,7 @@ class PoolRecall(object):
                 recall_result.append(recall_dict[vid])
         return recall_result
 
-    def get_word2vec_item_reall(self):
+    def get_word2vec_item_reall(self, exp_config=None):
         if self.video_id is None:
             return  []
         recall_key = "w2v:" + str(self.video_id)
@@ -2517,11 +2537,18 @@ class PoolRecall(object):
                     continue
         if len(video_ids)<=0:
             return  recall_result
-        video_ids = video_ids[:50]
+        recall_num = 20
+        try:
+            if exp_config and exp_config['recall_get_num']:
+                recall_num = int(exp_config['recall_get_num'])
+        except:
+            recall_num = 20
+        #print("recall_num:", recall_num)
+        video_ids = video_ids[:recall_num]
         #print(video_ids)
         filter_ = FilterVideos(request_id=self.request_id,
                                app_type=self.app_type, mid=self.mid, uid=self.uid, video_ids=video_ids)
-        filtered_viewed_videos = filter_.filter_videos(pool_type='normal')
+        filtered_viewed_videos = filter_.filter_videos_status(pool_type='normal')
         if filtered_viewed_videos is None:
             return recall_result
         #print("filtered_viewed_videos:", filtered_viewed_videos)
@@ -2665,7 +2692,7 @@ class PoolRecall(object):
             return None
 
 
-    def get_U2I_reall(self, mid):
+    def get_U2I_reall(self, mid, exp_config=None):
         #recall_key = "hot_video:"
         if not mid:
             return  []
@@ -2691,11 +2718,18 @@ class PoolRecall(object):
                     continue
         if len(video_ids)<=0:
             return  recall_result
-        video_ids = video_ids[:50]
+        recall_num = 20
+        try:
+            if exp_config and exp_config['recall_get_num']:
+                recall_num = int(exp_config['recall_get_num'])
+        except:
+            recall_num = 20
+        #print("recall_num:", recall_num)
+        video_ids = video_ids[:recall_num]
         #print(video_ids)
         filter_ = FilterVideos(request_id=self.request_id,
                                app_type=self.app_type, mid=self.mid, uid=self.uid, video_ids=video_ids)
-        filtered_viewed_videos = filter_.filter_videos(pool_type='normal')
+        filtered_viewed_videos = filter_.filter_videos_status(pool_type='normal')
         if filtered_viewed_videos is None:
             return recall_result
         #print("filtered_viewed_videos:", filtered_viewed_videos)