linfan vor 2 Jahren
Ursprung
Commit
f79a8c3fd8
4 geänderte Dateien mit 46 neuen und 32 gelöschten Zeilen
  1. 26 19
      recommend.py
  2. 6 6
      utils.py
  3. 1 1
      video_rank.py
  4. 13 6
      video_recall.py

+ 26 - 19
recommend.py

@@ -396,11 +396,11 @@ def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
              ]
     else:
         t = [
-             #gevent.spawn(pool_recall.get_region_hour_recall, size, region_code),
-             #gevent.spawn(pool_recall.get_region_day_recall, size, region_code),
-             #gevent.spawn(pool_recall.get_selected_recall, size, region_code),
-             #gevent.spawn(pool_recall.get_no_selected_recall, size, region_code),
-             gevent.spawn(pool_recall.flow_pool_recall, size, config_.QUICK_FLOW_POOL_ID),
+             gevent.spawn(pool_recall.get_region_hour_recall, size, region_code),
+             gevent.spawn(pool_recall.get_region_day_recall, size, region_code),
+             gevent.spawn(pool_recall.get_selected_recall, size, region_code),
+             gevent.spawn(pool_recall.get_no_selected_recall, size, region_code),
+             #gevent.spawn(pool_recall.flow_pool_recall, size, config_.QUICK_FLOW_POOL_ID),
              gevent.spawn(pool_recall.flow_pool_recall, size)]
 
         if ab_code ==60049:
@@ -409,9 +409,9 @@ def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
     # all recall_result
     all_recall_result_list = [i.get() for i in t]
     all_recall_result = []
-    print(all_recall_result_list)
+    #print(all_recall_result_list)
     result['recallTime'] = (time.time() - start_recall) * 1000
-
+    print("all recall time:", result['recallTime'])
     if not all_recall_result_list or len(all_recall_result_list)==0:
         return result
     for recall_item in all_recall_result_list:
@@ -423,6 +423,7 @@ def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
     #print("all_recall_result:", all_recall_result)
     #2. duplicate
     recall_dict = {}
+    duplicat_time  = time.time()
     fast_flow_set = set('')
     flow_flow_set = set('')
     region_h_recall = []
@@ -466,7 +467,9 @@ def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
         else:
             recall_name = recall_dict[vId] + "," + recall_name
             recall_dict[vId] = recall_name
-    print("recall_dict:", recall_dict)
+    #print("recall_dict:", recall_dict)
+    end_duplicat_time  = time.time()
+    print("duplicate time:", (end_duplicat_time-duplicat_time)*1000)
     #3. filter video, 先过预曝光
     filter_ = FilterVideos(request_id=request_id,
                            app_type=app_type, mid=mid, uid=uid, video_ids=list(recall_dict.keys()))
@@ -477,6 +480,7 @@ def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
     all_recall_list = filter_.filter_videos_new(pool_type='rov', region_code=region_code, shield_config=shield_config)
     #print("filer after:", all_recall_list)
     #4. sort: old sort: flow 按概率出
+    print("filter time:", (time.time()-end_duplicat_time)*1000)
     start_rank = time.time()
     #quick_flow_pool_P get from redis
     redis_helper = RedisHelper()
@@ -489,7 +493,7 @@ def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
     rank_result= []
     if ab_code==60048 or ab_code==60049:
         rank_ids, add_flow_set = video_new_rank(videoIds=all_recall_list,fast_flow_set=fast_flow_set, flow_set=flow_flow_set,size=size, top_K=top_K, flow_pool_P=float(flow_pool_P))
-        print("rank_ids:", rank_ids)
+        #print("rank_ids:", rank_ids)
         for rank_item in rank_ids:
             rank_id = rank_item[0]
             rank_score = rank_item[1]
@@ -509,6 +513,7 @@ def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
 
     result['rankResult'] = rank_result
     result['rankTime'] = (time.time() - start_rank) * 1000
+    print("rank time:", result['rankTime'])
     return result
     # return rank_result, last_rov_recall_key
 
@@ -1414,7 +1419,7 @@ def video_relevant_recommend(request_id, video_id, mid, uid, size, app_type, ab_
     no_op_flag, old_video_index, rule_key_30day, shield_config = \
         get_recommend_params(recommend_type=1, ab_exp_info=ab_exp_info, ab_info_data=ab_info_data, page_type=page_type,
                              mid=mid, app_type=app_type)
-    # log_.info({
+    #log_.info({
     #     'logTimestamp': int(time.time() * 1000),
     #     'request_id': request_id,
     #     'app_type': app_type,
@@ -1423,6 +1428,7 @@ def video_relevant_recommend(request_id, video_id, mid, uid, size, app_type, ab_
     #     'operation': 'get_recommend_params',
     #     'executeTime': (time.time() - param_st) * 1000
     # })
+    print("get params:", (time.time() - param_st) * 1000)
     recommend_result['getRecommendParamsTime'] = (time.time() - param_st) * 1000
 
     # 简单召回 - 排序 - 兜底
@@ -1447,15 +1453,16 @@ def video_relevant_recommend(request_id, video_id, mid, uid, size, app_type, ab_
                              rule_key=rule_key, data_key=data_key, no_op_flag=no_op_flag,
                              old_video_index=old_video_index, video_id=video_id,
                              params=params, rule_key_30day=rule_key_30day, shield_config=shield_config)
-    # log_.info({
-    #     'logTimestamp': int(time.time() * 1000),
-    #     'request_id': request_id,
-    #     'app_type': app_type,
-    #     'mid': mid,
-    #     'uid': uid,
-    #     'operation': 'get_recommend_result',
-    #     'executeTime': (time.time() - get_result_st) * 1000
-    # })
+     #log_.info({
+     #    'logTimestamp': int(time.time() * 1000),
+     #    'request_id': request_id,
+     #    'app_type': app_type,
+     #    'mid': mid,
+     #    'uid': uid,
+     #    'operation': 'get_recommend_result',
+     #    'executeTime': (time.time() - get_result_st) * 1000
+    #})
+    print("executeTime:", (time.time() - get_result_st) * 1000)
     recommend_result['recommendOperation'] = result
     rank_result = result.get('rankResult')
     recommend_result['videos'] = rank_result

+ 6 - 6
utils.py

@@ -610,12 +610,12 @@ class FilterVideos(object):
         """视频过滤"""
         # 预曝光过滤
         st_pre = time.time()
-        print("self.video_ids:", len(self.video_ids))
+        #print("self.video_ids:", len(self.video_ids))
         filtered_pre_result = self.filter_video_previewed(self.video_ids)
         if not filtered_pre_result:
             return None
-        print("filtered_pre_result:", len(filtered_pre_result))
-        print(filtered_pre_result)
+        #print("filtered_pre_result:", len(filtered_pre_result))
+        #print(filtered_pre_result)
         # 视频已曝光过滤/白名单过滤
         st_viewed = time.time()
         t = [
@@ -623,8 +623,8 @@ class FilterVideos(object):
             gevent.spawn(self.filter_movie_religion_video, filtered_pre_result)]
         gevent.joinall(t)
         filtered_result_list = [i.get() for i in t]
-        print("filtered_result_list1:",filtered_result_list[0])
-        print("filtered_result_list2:",filtered_result_list[1])
+        #print("filtered_result_list1:",filtered_result_list[0])
+        #print("filtered_result_list2:",filtered_result_list[1])
         filtered_viewed_set = set('')
         for i in filtered_result_list[0]:
             filtered_viewed_set.add(int(i))
@@ -637,7 +637,7 @@ class FilterVideos(object):
         if not filtered_viewed_result:
             return None
         filtered_viewed_videos = [int(video_id) for video_id in filtered_viewed_result]
-        print("result:", filtered_viewed_videos)
+        #print("result:", filtered_viewed_videos)
         if pool_type != 'flow':
             return  filtered_viewed_videos
         else:

+ 1 - 1
video_rank.py

@@ -184,7 +184,7 @@ def video_new_rank(videoIds, fast_flow_set, flow_set, size, top_K, flow_pool_P):
         except Exception:
             video_items.append((videoIds[i], 0.0))
     sort_items = sorted(video_items, key=lambda k: k[1], reverse=True)
-    print("sort_items:", sort_items)
+    #print("sort_items:", sort_items)
     rov_recall_rank = sort_items
     fast_flow_recall_rank = []
     flow_recall_rank = []

+ 13 - 6
video_recall.py

@@ -452,9 +452,11 @@ class PoolRecall(object):
             region_code = province_code
         if region_code == '':
             region_code = '-1'
-
+        print("flow_pool_recall:", (time.time()-start_time)*1000)
+        
         flow_pool_key = self.get_pool_redis_key('flow', flow_pool_id=flow_pool_id)
-        # print(flow_pool_key)
+        print(flow_pool_key)
+        print("flow_pool_recall:", (time.time()-start_time)*1000)
         flow_pool_recall_result = []
         flow_pool_recall_videos = []
         # 每次获取的视频数
@@ -467,11 +469,12 @@ class PoolRecall(object):
             if freq > config_.MAX_FREQ_FROM_FLOW_POOL:
                 break
             # 获取数据
-            # st_get = time.time()
+            st_get = time.time()
             data = self.redis_helper.get_data_zset_with_index(key_name=flow_pool_key,
                                                               start=idx, end=idx + get_size - 1,
                                                               with_scores=True)
-            # et_get = time.time()
+            et_get = time.time()
+            print("per get time:", (et_get-st_get)*1000)
             # log_.info('get data from flow pool redis: freq = {}, data = {}, execute time = {}ms'.format(
             #     freq, data, (et_get - st_get) * 1000))
             if not data:
@@ -500,12 +503,14 @@ class PoolRecall(object):
                 else:
                     video_mapping[video_id].append(flow_pool)
             # 过滤
+            print("per pro time1:", (time.time()-st_get)*1000)
             filter_ = FilterVideos(request_id=self.request_id,
                                    app_type=self.app_type, mid=self.mid, uid=self.uid, video_ids=video_ids)
             ge = gevent.spawn(filter_.filter_videos, pool_type='flow',
                               region_code=region_code, shield_config=self.shield_config)
             ge.join()
             filtered_result = ge.get()
+            print("per pro time2:", (time.time()-st_get)*1000)
             # 检查可分发数
             if filtered_result:
                 st_check = time.time()
@@ -521,6 +526,7 @@ class PoolRecall(object):
                 #     'operation': 'check_video_counts',
                 #     'executeTime': (time.time() - st_check) * 1000
                 # })
+                print("per pro time3:", (time.time()-st_get)*1000)
 
                 for item in check_result:
                     video_id = int(item[0])
@@ -543,6 +549,7 @@ class PoolRecall(object):
                 # if error_flag:
                 #     # 结束流量池召回
                 #     break
+                print("per pro time4:", (time.time()-st_get)*1000)
 
             idx += get_size
 
@@ -2109,9 +2116,9 @@ class PoolRecall(object):
     #linfan
     def get_sim_hot_item_reall(self):
         recall_key = "sim_hot_"+str(self.video_id)
-        print("recall_key:", recall_key)
+        #print("recall_key:", recall_key)
         data = self.redis_helper.get_data_from_redis(key_name=recall_key)
-        print(data)
+        #print(data)
         recall_result = []
         if data is not  None:
             json_result =json.loads(data)