zhangbo 1 år sedan
förälder
incheckning
61f37d020a
4 ändrade filer med 121 tillägg och 5 borttagningar
  1. 7 0
      config.py
  2. 54 4
      recommend.py
  3. 12 0
      utils.py
  4. 48 1
      video_recall.py

+ 7 - 0
config.py

@@ -196,6 +196,8 @@ class BaseConfig(object):
             'abtest_518': 60096,
             # 流量池召回子策略,ab实验号523,推荐服务内实验号60097
             'abtest_523': 60097,
+            # 趋势性召回子策略,ab实验号533,推荐服务内实验号60098
+            'abtest_533': 60098,
         },  # 地域分组小时级规则实验
 
         'rank_by_24h': {
@@ -573,6 +575,10 @@ class BaseConfig(object):
             'data_key': 'data66', 'rule_key': 'rule66',
             'ab_code': AB_CODE['region_rank_by_h'].get('abtest_523'),
         },  #
+        '533': {
+            'data_key': 'data66', 'rule_key': 'rule66',
+            'ab_code': AB_CODE['region_rank_by_h'].get('abtest_523'),
+        },  # 新增一路召回趋势性
 
     }
 
@@ -660,6 +666,7 @@ class BaseConfig(object):
         'return_video_recall': 'return_video_recall',  # return_video_recall
         'u2i_tag_play_recall': 'u2i_tag_play_recall',  # u2i_tag_recall
         'rov_recall_h_h_without_dup': 'rov_recall_h_h_without_dup',  # 不区分地域小时级更新列表(不做离线去重)
+        'recall_strategy_trend_v1': 'recall_strategy_trend_v1', # 趋势召回策略
     }
 
     # category id mapping

+ 54 - 4
recommend.py

@@ -539,7 +539,12 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
             or ab_code == 60092 or ab_code == 60093 or ab_code == 60094 or ab_code == 60095 or ab_code == 60096\
             or ab_code == 60097:
         t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
-        t.append(gevent.spawn(pool_recall.get_return_video_reall, 'rv2:'))
+        t.append(gevent.spawn(pool_recall.recall_strategy_trend_v1, 'rv2:'))
+    elif ab_code == 60098:
+        t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
+        t.append(gevent.spawn(pool_recall.recall_strategy_trend_v1, 'rv2:'))
+        # todo:zhangbo
+        t.append(gevent.spawn(pool_recall.recall_strategy_trend_v1))
 
     gevent.joinall(t)
     recall_result_list = [i.get() for i in t]
@@ -661,6 +666,52 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
                         now_video_ids.add(video_id)
             if len(rov_pool_recall)>0:
                 recall_result_list[0] = rov_pool_recall
+    # merge新增的recall_strategy_trend_v1 60098
+    if ab_code == 60098:
+        rov_pool_recall = []
+        if len(recall_result_list) >= 2:
+            region_recall = recall_result_list[0]
+            return_video_reall = []
+            sim_recall = []
+            trend_recall = []
+            if app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
+                sim_recall = recall_result_list[1]
+                return_video_reall = recall_result_list[2] if len(recall_result_list) >= 3 else []
+                trend_recall = recall_result_list[3] if len(recall_result_list) >= 4 else []
+            else:
+                if len(recall_result_list) >= 4:
+                    sim_recall = recall_result_list[3]
+                if len(recall_result_list) >= 5:
+                    return_video_reall = recall_result_list[4]
+                trend_recall = recall_result_list[5] if len(recall_result_list) >= 6 else []
+            now_video_ids = set('')
+            if len(region_recall) > 0:
+                for video in region_recall:
+                    video_id = video.get('videoId')
+                    if video_id not in now_video_ids:
+                        rov_pool_recall.append(video)
+                        now_video_ids.add(video_id)
+            if len(sim_recall) > 0:
+                for video in sim_recall:
+                    video_id = video.get('videoId')
+                    if video_id not in now_video_ids:
+                        rov_pool_recall.append(video)
+                        now_video_ids.add(video_id)
+            if len(return_video_reall) > 0:
+                for video in return_video_reall:
+                    video_id = video.get('videoId')
+                    if video_id not in now_video_ids:
+                        rov_pool_recall.append(video)
+                        now_video_ids.add(video_id)
+            if len(trend_recall) > 0:
+                for video in trend_recall:
+                    video_id = video.get('videoId')
+                    if video_id not in now_video_ids:
+                        rov_pool_recall.append(video)
+                        now_video_ids.add(video_id)
+            if len(rov_pool_recall) > 0:
+                recall_result_list[0] = rov_pool_recall
+
     result['recallResult'] = recall_result_list
     result['recallTime'] = (time.time() - start_recall) * 1000
 
@@ -691,7 +742,6 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
     else:
         # add_flow_pool_recall_log
         if recall_result_list[1][0]:
-        # if recall_result_list[1]:
             redis_helper = RedisHelper()
             quick_flow_pool_P = redis_helper.get_data_from_redis(
                 key_name=f"{config_.QUICK_FLOWPOOL_DISTRIBUTE_RATE_KEY_NAME_PREFIX}{config_.QUICK_FLOW_POOL_ID}"
@@ -2024,7 +2074,7 @@ def video_homepage_recommend(request_id, mid, uid, size, app_type, algo_type,
             or ab_code == 60083 or ab_code == 60084 or ab_code == 60085 or ab_code == 60086 \
             or ab_code == 60087 or ab_code == 60088 or ab_code == 60089 or ab_code == 60090 \
             or ab_code == 60091 or ab_code == 60092 or ab_code == 60093 or ab_code == 60094 or ab_code == 60095 \
-            or ab_code == 60096 or ab_code == 60097:
+            or ab_code == 60096 or ab_code == 60097 or ab_code == 60098:
         result, fea_info = video_old_recommend(request_id=request_id, mid=mid, uid=uid, app_type=app_type, size=size,
                                                top_K=top_K, flow_pool_P=flow_pool_P, algo_type='',
                                                client_info=client_info, ab_code=ab_code, expire_time=expire_time,
@@ -2152,7 +2202,7 @@ def video_relevant_recommend(request_id, video_id, mid, uid, size, app_type, ab_
             or ab_code == 60083 or ab_code == 60084 or ab_code == 60085 or ab_code == 60086 \
             or ab_code == 60087 or ab_code == 60088 or ab_code == 60089 or ab_code == 60090 \
             or ab_code == 60091 or ab_code == 60092 or ab_code == 60093 or ab_code == 60094 or ab_code == 60095 \
-            or ab_code == 60096 or ab_code == 60097:
+            or ab_code == 60096 or ab_code == 60097 or ab_code == 60098:
         result, fea_info = video_old_recommend(request_id=request_id, mid=mid, uid=uid, app_type=app_type, size=size,
                                                top_K=top_K, flow_pool_P=flow_pool_P, algo_type='',
                                                client_info=client_info, ab_code=ab_code, expire_time=expire_time,

+ 12 - 0
utils.py

@@ -892,6 +892,18 @@ class FilterVideos(object):
         else:
             return video_ids[:min(self.force_truncation, len(video_ids))]
 
+    def filter_videos_for_group(self, region_code=None, videos=None):
+        """视频过滤"""
+        videos_filtered = self.filter_videos_with_risk_video(videos, self.app_type, region_code)
+        filtered_pre_result = self.filter_video_previewed(videos_filtered)
+        if not filtered_pre_result:
+            return None
+        filtered_viewed_result = self.filter_video_viewed_status(video_ids=filtered_pre_result)
+        if not filtered_viewed_result:
+            return None
+        filtered_viewed_videos = [int(video_id) for video_id in filtered_viewed_result]
+        return filtered_viewed_videos
+
 if __name__ == '__main__':
     user = [
         ('weixin_openid_o0w175fDc8pNnywrYN49E341tKfI', ''),

+ 48 - 1
video_recall.py

@@ -3932,4 +3932,51 @@ class PoolRecall(object):
             region_code = province_code
         if region_code == '':
             region_code = '-1'
-        return region_code
+        return region_code
+
+    def recall_strategy_trend_v1(self):
+        #1 获取trigger信息
+        region_code_province = self.client_info.get('provinceCode', '-1')
+        #2 拼接redis key
+        key1 = "alg_recsys_recall_4h_region_trend_sum_" + region_code_province
+        key2 = "alg_recsys_recall_4h_region_trend_avg_" + region_code_province
+        #3 取数据
+        data1 = self.redis_helper.get_data_from_redis(key_name=key1)
+        data2 = self.redis_helper.get_data_from_redis(key_name=key2)
+        data_for_filter = []
+        group_size = 20
+        if data1 is not None and not "" == data1:
+            # todo 类型转换没做兜底
+            data1_list = [int(i) for i in data1.split(",")]
+            data_for_filter.extend([data1_list[i:i + group_size] for i in range(0, len(data1_list), group_size)])
+        else:
+            data1_list = []
+        if data2 is not None and not "" == data2:
+            # todo 类型转换没做兜底
+            data2_list = [int(i) for i in data2.split(",")]
+            data2_list = [i for i in data2_list if i not in data1_list]
+            if len(data2_list) > 0:
+                data_for_filter.extend([data2_list[i:i + group_size] for i in range(0, len(data2_list), group_size)])
+        data_for_filter = [i for i in data_for_filter if len(i) > 0]
+        #4 视频过滤
+        filter_ = FilterVideos(request_id=self.request_id,
+                               app_type=self.app_type, mid=self.mid, uid=self.uid, video_ids=None,
+                               expansion_factor=self.expansion_factor,
+                               risk_filter_flag=self.risk_filter_flag,
+                               app_region_filtered=self.app_region_filtered,
+                               videos_with_risk=self.videos_with_risk
+                               )
+        region_code = self.get_region_code()
+        t = [gevent.spawn(filter_.filter_videos_for_group, region_code, videos) for videos in data_for_filter]
+        gevent.joinall(t)
+        result_list = [i.get() for i in t if i.get() is not None and len(i.get()) > 0]
+        #5 返回结果
+        results = []
+        for g in result_list:
+            for v in g:
+                results.append({
+                    'videoId': v, 'flowPool': '',
+                    'rovScore': 0.0, 'pushFrom': config_.PUSH_FROM['recall_strategy_trend_v1'],
+                    'abCode': self.ab_code
+                })
+        return results