|
@@ -10,7 +10,7 @@ import config
|
|
|
from log import Log
|
|
|
from config import set_config
|
|
|
from video_recall import PoolRecall
|
|
|
-from video_rank import video_new_rank,video_rank, bottom_strategy, video_rank_by_w_h_rate, video_rank_with_old_video, bottom_strategy2
|
|
|
+from video_rank import video_new_rank,video_rank,refactor_video_rank, bottom_strategy, video_rank_by_w_h_rate, video_rank_with_old_video, bottom_strategy2
|
|
|
from db_helper import RedisHelper
|
|
|
import gevent
|
|
|
from utils import FilterVideos, get_user_has30day_return
|
|
@@ -201,7 +201,7 @@ def video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type, al
|
|
|
t = [gevent.spawn(pool_recall.rov_pool_recall_with_region, size, expire_time),
|
|
|
gevent.spawn(pool_recall.flow_pool_recall, size, config_.QUICK_FLOW_POOL_ID),
|
|
|
gevent.spawn(pool_recall.flow_pool_recall, size),
|
|
|
- gevent.spawn(pool_recall.get_sim_hot_item_reall, size)]
|
|
|
+ gevent.spawn(pool_recall.get_sim_hot_item_reall)]
|
|
|
|
|
|
# 最惊奇相关推荐实验
|
|
|
# elif ab_code == config_.AB_CODE['top_video_relevant_appType_19']:
|
|
@@ -340,6 +340,176 @@ def video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type, al
|
|
|
return result
|
|
|
# return rank_result, last_rov_recall_key
|
|
|
|
|
|
+def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type, algo_type, client_info,
|
|
|
+ expire_time=24*3600, ab_code=config_.AB_CODE['initial'], rule_key='', data_key='',
|
|
|
+ no_op_flag=False, old_video_index=-1, video_id=None, params=None, rule_key_30day=None,
|
|
|
+ shield_config=None):
|
|
|
+ """
|
|
|
+ 首页线上推荐逻辑
|
|
|
+ :param request_id: request_id
|
|
|
+ :param mid: mid type-string
|
|
|
+ :param uid: uid type-string
|
|
|
+ :param size: 请求视频数量 type-int
|
|
|
+ :param top_K: 保证topK为召回池视频 type-int
|
|
|
+ :param flow_pool_P: size-top_K视频为流量池视频的概率 type-float
|
|
|
+ :param app_type: 产品标识 type-int
|
|
|
+ :param algo_type: 算法类型 type-string
|
|
|
+ :param client_info: 用户位置信息 {"country": "国家", "province": "省份", "city": "城市"}
|
|
|
+ :param expire_time: 末位视频记录redis过期时间
|
|
|
+ :param ab_code: AB实验code
|
|
|
+ :param video_id: 相关推荐头部视频id
|
|
|
+ :param params:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ #1. recall
|
|
|
+ result = {}
|
|
|
+ # ####### 多进程召回
|
|
|
+ start_recall = time.time()
|
|
|
+
|
|
|
+ # 1. 根据城市或者省份获取region_code
|
|
|
+ city_code_list = [code for _, code in config_.CITY_CODE.items()]
|
|
|
+ # 获取provinceCode
|
|
|
+ province_code = client_info.get('provinceCode', '-1')
|
|
|
+ # 获取cityCode
|
|
|
+ city_code = client_info.get('cityCode', '-1')
|
|
|
+
|
|
|
+ if city_code in city_code_list:
|
|
|
+ # 分城市数据存在时,获取城市分组数据
|
|
|
+ region_code = city_code
|
|
|
+ else:
|
|
|
+ region_code = province_code
|
|
|
+ if region_code == '':
|
|
|
+ region_code = '-1'
|
|
|
+
|
|
|
+ size =1000
|
|
|
+ pool_recall = PoolRecall(request_id=request_id,
|
|
|
+ app_type=app_type, mid=mid, uid=uid, ab_code=ab_code,
|
|
|
+ client_info=client_info, rule_key=rule_key, data_key=data_key, no_op_flag=no_op_flag,
|
|
|
+ params=params, rule_key_30day=rule_key_30day, shield_config=shield_config, video_id= video_id)
|
|
|
+ if app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
|
|
|
+ t = [gevent.spawn(pool_recall.get_region_hour_recall, size, region_code),
|
|
|
+ gevent.spawn(pool_recall.get_region_day_recall, size, region_code),
|
|
|
+ gevent.spawn(pool_recall.get_selected_recall, size),
|
|
|
+ gevent.spawn(pool_recall.get_no_selected_recall, size)
|
|
|
+ ]
|
|
|
+ else:
|
|
|
+ t = [gevent.spawn(pool_recall.get_region_hour_recall, size),
|
|
|
+ gevent.spawn(pool_recall.get_region_day_recall, size),
|
|
|
+ gevent.spawn(pool_recall.get_selected_recall, size),
|
|
|
+ gevent.spawn(pool_recall.get_no_selected_recall, size),
|
|
|
+ gevent.spawn(pool_recall.get_fast_flow_pool_recall, size),
|
|
|
+ gevent.spawn(pool_recall.get_flow_pool_recall, size),
|
|
|
+ gevent.spawn(pool_recall.get_sim_hot_item_reall)]
|
|
|
+ gevent.joinall(t)
|
|
|
+ # all recall_result
|
|
|
+ all_recall_result_list = [i.get() for i in t]
|
|
|
+ result['recallTime'] = (time.time() - start_recall) * 1000
|
|
|
+
|
|
|
+ #2. duplicate
|
|
|
+ recall_dict = {}
|
|
|
+ fast_flow_set = set('')
|
|
|
+ flow_flow_set = set('')
|
|
|
+ all_flow_set = set('')
|
|
|
+ region_h_recall = []
|
|
|
+ region_day_recall = []
|
|
|
+ select_day_recall = []
|
|
|
+ no_selected_recall = []
|
|
|
+ for per_item in all_recall_result_list:
|
|
|
+ vId = per_item.get("videoId",'0')
|
|
|
+ if vId=='0':
|
|
|
+ continue
|
|
|
+ recall_name = per_item.get("pushFrom",'')
|
|
|
+ if recall_name=='fast_flow_recall':
|
|
|
+ fast_flow_set.add(vId)
|
|
|
+ if recall_name=='flow_recall':
|
|
|
+ flow_flow_set.add(vId)
|
|
|
+ #duplicate divide into
|
|
|
+ if vId not in recall_dict:
|
|
|
+ if recall_name == config_.PUSH_FROM['rov_recall_region_h']:
|
|
|
+ region_h_recall.append(per_item)
|
|
|
+ elif recall_name == config_.PUSH_FROM['rov_recall_region_24h']:
|
|
|
+ region_day_recall.append(per_item)
|
|
|
+ elif recall_name == config_.PUSH_FROM['rov_recall_24h']:
|
|
|
+ select_day_recall.append(per_item)
|
|
|
+ elif recall_name == config_.PUSH_FROM['rov_recall_24h_dup']:
|
|
|
+ no_selected_recall.append(per_item)
|
|
|
+ if vId not in recall_dict:
|
|
|
+ recall_dict[vId] = recall_name
|
|
|
+ else:
|
|
|
+ recall_name = recall_dict[vId] + "," + recall_name
|
|
|
+ recall_dict[vId] = recall_name
|
|
|
+ all_flow_set.add(fast_flow_set)
|
|
|
+ all_flow_set.add(flow_flow_set)
|
|
|
+ #3. filter video, 先过预曝光
|
|
|
+ filter_ = FilterVideos(request_id=request_id,
|
|
|
+ app_type=app_type, mid=mid, uid=uid, video_ids=recall_dict.keys())
|
|
|
+ #a).expose filter
|
|
|
+ expose_filterd_videos = filter_.new_filter_video()
|
|
|
+ if expose_filterd_videos is None:
|
|
|
+ return
|
|
|
+ #b). sep_filter
|
|
|
+ normal_video_list, flow_video_list = filter_.new_flow_video(expose_filterd_videos, all_flow_set, region_code, shield_config)
|
|
|
+ if len(normal_video_list) and len(flow_video_list)==0:
|
|
|
+ return
|
|
|
+ #4. sort: old sort: flow 按概率出
|
|
|
+ start_rank = time.time()
|
|
|
+ #quick_flow_pool_P get from redis
|
|
|
+ redis_helper = RedisHelper()
|
|
|
+ quick_flow_pool_P = redis_helper.get_data_from_redis(
|
|
|
+ key_name=f"{config_.QUICK_FLOWPOOL_DISTRIBUTE_RATE_KEY_NAME_PREFIX}{config_.QUICK_FLOW_POOL_ID}"
|
|
|
+ )
|
|
|
+ if quick_flow_pool_P:
|
|
|
+ flow_pool_P = quick_flow_pool_P
|
|
|
+
|
|
|
+ all_recall_list = normal_video_list+flow_video_list
|
|
|
+
|
|
|
+ rank_result= []
|
|
|
+ if ab_code=="ab_new_test":
|
|
|
+ rank_ids = video_new_rank(videoIds=all_recall_list,fast_flow_set=fast_flow_set, flow_set=flow_flow_set,size=size, top_K=top_K, flow_pool_P=float(flow_pool_P))
|
|
|
+ for rank_id in rank_ids:
|
|
|
+ if rank_id in recall_dict:
|
|
|
+ rank_result.append(recall_dict.get(rank_id))
|
|
|
+ else:
|
|
|
+ all_dup_recall_result = region_h_recall+region_day_recall+select_day_recall+no_selected_recall
|
|
|
+ rank_result = refactor_video_rank(rov_recall_rank=all_dup_recall_result,fast_flow_set=fast_flow_set, flow_set=flow_flow_set, size=size, top_K=top_K, flow_pool_P=float(flow_pool_P))
|
|
|
+
|
|
|
+ result['rankResult'] = rank_result
|
|
|
+ result['rankTime'] = (time.time() - start_rank) * 1000
|
|
|
+
|
|
|
+ # if not rank_result:
|
|
|
+ # # 兜底策略
|
|
|
+ # # log_.info('====== bottom strategy')
|
|
|
+ # start_bottom = time.time()
|
|
|
+ # rank_result = bottom_strategy2(
|
|
|
+ # size=size, app_type=app_type, mid=mid, uid=uid, ab_code=ab_code, client_info=client_info, params=params
|
|
|
+ # )
|
|
|
+ #
|
|
|
+ # # if ab_code == config_.AB_CODE['region_rank_by_h'].get('abtest_130'):
|
|
|
+ # # rank_result = bottom_strategy2(
|
|
|
+ # # size=size, app_type=app_type, mid=mid, uid=uid, ab_code=ab_code, client_info=client_info, params=params
|
|
|
+ # # )
|
|
|
+ # # else:
|
|
|
+ # # rank_result = bottom_strategy(
|
|
|
+ # # request_id=request_id, size=size, app_type=app_type, ab_code=ab_code, params=params
|
|
|
+ # # )
|
|
|
+ #
|
|
|
+ # # log_.info({
|
|
|
+ # # 'logTimestamp': int(time.time() * 1000),
|
|
|
+ # # 'request_id': request_id,
|
|
|
+ # # 'mid': mid,
|
|
|
+ # # 'uid': uid,
|
|
|
+ # # 'operation': 'bottom',
|
|
|
+ # # 'bottom_result': rank_result,
|
|
|
+ # # 'executeTime': (time.time() - start_bottom) * 1000
|
|
|
+ # # })
|
|
|
+ # result['bottomResult'] = rank_result
|
|
|
+ # result['bottomTime'] = (time.time() - start_bottom) * 1000
|
|
|
+ #
|
|
|
+ # result['rankResult'] = rank_result
|
|
|
+
|
|
|
+ return result
|
|
|
+ # return rank_result, last_rov_recall_key
|
|
|
+
|
|
|
|
|
|
def ab_test_op(rank_result, ab_code_list, app_type, mid, uid, **kwargs):
|
|
|
"""
|