浏览代码

update sim_recall_new_info

linfan 1 年之前
父节点
当前提交
05a9f00eda
共有 4 个文件被更改,包括 36 次插入19 次删除
  1. 14 9
      recommend.py
  2. 13 5
      utils.py
  3. 7 3
      video_rank.py
  4. 2 2
      video_recall.py

+ 14 - 9
recommend.py

@@ -395,10 +395,11 @@ def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
              gevent.spawn(pool_recall.get_no_selected_recall, size, region_code)
              ]
     else:
-        t = [gevent.spawn(pool_recall.get_region_hour_recall, size, region_code),
-             gevent.spawn(pool_recall.get_region_day_recall, size, region_code),
-             gevent.spawn(pool_recall.get_selected_recall, size, region_code),
-             gevent.spawn(pool_recall.get_no_selected_recall, size, region_code),
+        t = [
+             #gevent.spawn(pool_recall.get_region_hour_recall, size, region_code),
+             #gevent.spawn(pool_recall.get_region_day_recall, size, region_code),
+             #gevent.spawn(pool_recall.get_selected_recall, size, region_code),
+             #gevent.spawn(pool_recall.get_no_selected_recall, size, region_code),
              gevent.spawn(pool_recall.flow_pool_recall, size, config_.QUICK_FLOW_POOL_ID),
              gevent.spawn(pool_recall.flow_pool_recall, size)]
 
@@ -408,6 +409,7 @@ def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
     # all recall_result
     all_recall_result_list = [i.get() for i in t]
     all_recall_result = []
+    print(all_recall_result_list)
     result['recallTime'] = (time.time() - start_recall) * 1000
 
     if not all_recall_result_list or len(all_recall_result_list)==0:
@@ -432,8 +434,8 @@ def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
     flowFlag_dict = {}
     for per_item in all_recall_result:
         #print(per_item)
-        vId = per_item.get("videoId",'0')
-        if vId=='0':
+        vId = int(per_item.get("videoId",0))
+        if vId==0:
             continue
         recall_name = per_item.get("pushFrom",'')
         flow_pool = per_item.get("flowPool", '')
@@ -464,14 +466,16 @@ def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
         else:
             recall_name = recall_dict[vId] + "," + recall_name
             recall_dict[vId] = recall_name
+    print("recall_dict:", recall_dict)
     #3. filter video, 先过预曝光
     filter_ = FilterVideos(request_id=request_id,
                            app_type=app_type, mid=mid, uid=uid, video_ids=list(recall_dict.keys()))
 
-    print("filer:", list(recall_dict.keys()))
+    #print("filer:", list(recall_dict.keys()))
     #a).expose filter
+    #all_recall_list = list(recall_dict.keys())
     all_recall_list = filter_.filter_videos_new(pool_type='rov', region_code=region_code, shield_config=shield_config)
-    print("filer after:", all_recall_list)
+    #print("filer after:", all_recall_list)
     #4. sort: old sort: flow 按概率出
     start_rank = time.time()
     #quick_flow_pool_P get from redis
@@ -485,11 +489,12 @@ def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
     rank_result= []
     if ab_code==60048 or ab_code==60049:
         rank_ids, add_flow_set = video_new_rank(videoIds=all_recall_list,fast_flow_set=fast_flow_set, flow_set=flow_flow_set,size=size, top_K=top_K, flow_pool_P=float(flow_pool_P))
-        #print("rank_ids:", rank_ids)
+        print("rank_ids:", rank_ids)
         for rank_item in rank_ids:
             rank_id = rank_item[0]
             rank_score = rank_item[1]
             pushFrom = recall_dict.get(rank_id, '')
+            print(pushFrom, rank_id)
             flowPoolFlag = ''
             if rank_id in add_flow_set:
                 flowPoolFlag = flowFlag_dict.get(rank_id,'') 

+ 13 - 5
utils.py

@@ -610,11 +610,12 @@ class FilterVideos(object):
         """视频过滤"""
         # 预曝光过滤
         st_pre = time.time()
-        #print("self.video_ids:", len(self.video_ids))
+        print("self.video_ids:", len(self.video_ids))
         filtered_pre_result = self.filter_video_previewed(self.video_ids)
         if not filtered_pre_result:
             return None
-        #print("filtered_pre_result:", len(filtered_pre_result))
+        print("filtered_pre_result:", len(filtered_pre_result))
+        print(filtered_pre_result)
         # 视频已曝光过滤/白名单过滤
         st_viewed = time.time()
         t = [
@@ -622,14 +623,21 @@ class FilterVideos(object):
             gevent.spawn(self.filter_movie_religion_video, filtered_pre_result)]
         gevent.joinall(t)
         filtered_result_list = [i.get() for i in t]
-        #print("filtered_result_list:",len(filtered_result_list))
-        filtered_viewed_result = list(set(filtered_result_list[0]) & set(filtered_result_list[1]))
+        print("filtered_result_list1:",filtered_result_list[0])
+        print("filtered_result_list2:",filtered_result_list[1])
+        filtered_viewed_set = set('')
+        for i in filtered_result_list[0]:
+            filtered_viewed_set.add(int(i))
+        filter_video_set =set('')
+        for j in filtered_result_list[1]:
+            filter_video_set.add(int(j))
+        filtered_viewed_result = list(filtered_viewed_set & filter_video_set)
         #print(f"view&m_r res: {filtered_viewed_result}\nexecute_time: {(time.time() - st_viewed) * 1000}")
         #print("filtered:",len(filtered_viewed_result))
         if not filtered_viewed_result:
             return None
         filtered_viewed_videos = [int(video_id) for video_id in filtered_viewed_result]
-        #print("result:", len(filtered_viewed_videos))
+        print("result:", filtered_viewed_videos)
         if pool_type != 'flow':
             return  filtered_viewed_videos
         else:

+ 7 - 3
video_rank.py

@@ -184,7 +184,7 @@ def video_new_rank(videoIds, fast_flow_set, flow_set, size, top_K, flow_pool_P):
         except Exception:
             video_items.append((videoIds[i], 0.0))
     sort_items = sorted(video_items, key=lambda k: k[1], reverse=True)
-    #print("sort_items:", sort_items)
+    print("sort_items:", sort_items)
     rov_recall_rank = sort_items
     fast_flow_recall_rank = []
     flow_recall_rank = []
@@ -212,13 +212,15 @@ def video_new_rank(videoIds, fast_flow_set, flow_set, size, top_K, flow_pool_P):
     # 按概率 p 及score排序获取 size - k 个视频, 第4个位置按概率取流量池
     i = 0
     left_quato = size - top_K
-
-    while i < left_quato:
+    j = 0
+    jj = 0
+    while i < left_quato and j<len(all_flow_recall_rank) and jj<len(rov_recall_rank):
         # 随机生成[0, 1)浮点数
         rand = random.random()
         # log_.info('rand: {}'.format(rand))
         if rand < flow_pool_P:
             for flow_item in all_flow_recall_rank:
+                j+=1
                 if flow_item[0] in rank_set:
                     continue
                 else:
@@ -228,8 +230,10 @@ def video_new_rank(videoIds, fast_flow_set, flow_set, size, top_K, flow_pool_P):
                 i += 1
                 if i>= left_quato:
                     break
+                
         else:
             for recall_item in rov_recall_rank:
+                jj+=1
                 if recall_item[0] in rank_set:
                     continue
                 else:

+ 2 - 2
video_recall.py

@@ -2109,9 +2109,9 @@ class PoolRecall(object):
     #linfan
     def get_sim_hot_item_reall(self):
         recall_key = "sim_hot_"+str(self.video_id)
-        #print("recall_key:", recall_key)
+        print("recall_key:", recall_key)
         data = self.redis_helper.get_data_from_redis(key_name=recall_key)
-        #print(data)
+        print(data)
         recall_result = []
         if data is not  None:
             json_result =json.loads(data)