|
@@ -27,7 +27,6 @@ def video_rank(data, size):
|
|
|
flow_recall_rank = sorted(data['flow_pool_recall'], key=lambda k: (k.get('rovScore'), 0), reverse=True)
|
|
|
# 对各路召回的视频进行去重
|
|
|
rov_recall_rank, flow_recall_rank = remove_duplicate(rov_recall=rov_recall_rank, flow_recall=flow_recall_rank)
|
|
|
-
|
|
|
# 从ROV召回池中获取top k
|
|
|
if len(rov_recall_rank) > 0:
|
|
|
rank_result = rov_recall_rank[:config_.K]
|
|
@@ -46,14 +45,14 @@ def video_rank(data, size):
|
|
|
rank_result.append(flow_recall_rank[0])
|
|
|
flow_recall_rank.remove(flow_recall_rank[0])
|
|
|
else:
|
|
|
- rank_result.append(rov_recall_rank[:size - config_.K - i])
|
|
|
+ rank_result.extend(rov_recall_rank[:size - config_.K - i])
|
|
|
return rank_result
|
|
|
else:
|
|
|
if rov_recall_rank:
|
|
|
rank_result.append(rov_recall_rank[0])
|
|
|
rov_recall_rank.remove(rov_recall_rank[0])
|
|
|
else:
|
|
|
- rank_result.append(flow_recall_rank[:size - config_.K - i])
|
|
|
+ rank_result.extend(flow_recall_rank[:size - config_.K - i])
|
|
|
return rank_result
|
|
|
i += 1
|
|
|
return rank_result
|
|
@@ -68,21 +67,27 @@ def remove_duplicate(rov_recall, flow_recall):
|
|
|
:param flow_recall: 流量池-已排序
|
|
|
:return:
|
|
|
"""
|
|
|
- flow_recall_remove = []
|
|
|
- flow_recall_video_ids = [item[0] for item in flow_recall]
|
|
|
+ flow_recall_result = []
|
|
|
+ rov_recall_remove = []
|
|
|
+ flow_recall_video_ids = [item['videoId'] for item in flow_recall]
|
|
|
# rov_recall topK
|
|
|
for item in rov_recall[:config_.K]:
|
|
|
- if item[0] in flow_recall_video_ids:
|
|
|
- flow_recall_remove.append(item[0])
|
|
|
+ if item['videoId'] in flow_recall_video_ids:
|
|
|
+ flow_recall_video_ids.remove(item['videoId'])
|
|
|
# other
|
|
|
for item in rov_recall[config_.K:]:
|
|
|
- if item[0] in flow_recall_video_ids:
|
|
|
- rov_recall.remove(item)
|
|
|
+ if item['videoId'] in flow_recall_video_ids:
|
|
|
+ rov_recall_remove.append(item)
|
|
|
+
|
|
|
+ # rov recall remove
|
|
|
+ for item in rov_recall_remove:
|
|
|
+ rov_recall.remove(item)
|
|
|
# flow recall remove
|
|
|
for item in flow_recall:
|
|
|
- if item[0] in flow_recall_remove:
|
|
|
- flow_recall.remove(item)
|
|
|
- return rov_recall, flow_recall
|
|
|
+ if item['videoId'] not in flow_recall_video_ids:
|
|
|
+ flow_recall_result.append(item)
|
|
|
+
|
|
|
+ return rov_recall, flow_recall_result
|
|
|
|
|
|
|
|
|
def bottom_strategy(size, app_type, ab_code):
|
|
@@ -108,6 +113,9 @@ def bottom_strategy(size, app_type, ab_code):
|
|
|
# 状态过滤
|
|
|
filter_videos = FilterVideos(app_type=app_type, video_ids=data)
|
|
|
filtered_data = filter_videos.filter_video_status(video_ids=data)
|
|
|
- random_data = numpy.random.choice(filtered_data, size, False)
|
|
|
+ if len(filtered_data) > size:
|
|
|
+ random_data = numpy.random.choice(filtered_data, size, False)
|
|
|
+ else:
|
|
|
+ random_data = filtered_data
|
|
|
bottom_data = [{'videoId': item, 'pushFrom': 'bottom_strategy', 'abCode': ab_code} for item in random_data]
|
|
|
return bottom_data
|