Browse Source

Merge branch 'dev' into pre-master

liqian 3 years ago
parent
commit
cc7b9b65ab
3 changed files with 36 additions and 10 deletions
  1. 20 9
      utils.py
  2. 15 1
      video_rank.py
  3. 1 0
      video_recall.py

+ 20 - 9
utils.py

@@ -116,15 +116,26 @@ class FilterVideos(object):
         :param video_ids: 视频id列表 type-list
         :return: filtered_videos
         """
-        sql = "SELECT video_id " \
-              "FROM {} " \
-              "WHERE audit_status = 5 " \
-              "AND applet_rec_status IN (1, -6) " \
-              "AND open_status = 1 " \
-              "AND payment_status = 0 " \
-              "AND encryption_status IS NULL " \
-              "AND transcoding_status = 3 " \
-              "AND video_id IN {};".format(config_.VIDEO_STATUS, tuple(video_ids))
+        if len(video_ids) == 1:
+            sql = "SELECT video_id " \
+                  "FROM {} " \
+                  "WHERE audit_status = 5 " \
+                  "AND applet_rec_status IN (1, -6) " \
+                  "AND open_status = 1 " \
+                  "AND payment_status = 0 " \
+                  "AND encryption_status IS NULL " \
+                  "AND transcoding_status = 3 " \
+                  "AND video_id IN ({});".format(config_.VIDEO_STATUS, video_ids[0])
+        else:
+            sql = "SELECT video_id " \
+                  "FROM {} " \
+                  "WHERE audit_status = 5 " \
+                  "AND applet_rec_status IN (1, -6) " \
+                  "AND open_status = 1 " \
+                  "AND payment_status = 0 " \
+                  "AND encryption_status IS NULL " \
+                  "AND transcoding_status = 3 " \
+                  "AND video_id IN {};".format(config_.VIDEO_STATUS, tuple(video_ids))
 
         hologres_helper = HologresHelper()
         data = hologres_helper.get_data(sql=sql)

+ 15 - 1
video_rank.py

@@ -27,6 +27,8 @@ def video_rank(data, size):
     flow_recall_rank = sorted(data['flow_pool_recall'], key=lambda k: (k.get('rovScore'), 0), reverse=True)
     # 对各路召回的视频进行去重
     rov_recall_rank, flow_recall_rank = remove_duplicate(rov_recall=rov_recall_rank, flow_recall=flow_recall_rank)
+    log_.info('remove_duplicate finished! rov_recall_rank = {}, flow_recall_rank = {}'.format(
+        rov_recall_rank, flow_recall_rank))
     # 从ROV召回池中获取top k
     if len(rov_recall_rank) > 0:
         rank_result = rov_recall_rank[:config_.K]
@@ -40,6 +42,7 @@ def video_rank(data, size):
     while i < size - config_.K:
         # 随机生成[0, 1)浮点数
         rand = random.random()
+        log_.info('rand: {}'.format(rand))
         if rand < config_.P:
             if flow_recall_rank:
                 rank_result.append(flow_recall_rank[0])
@@ -84,7 +87,7 @@ def remove_duplicate(rov_recall, flow_recall):
         rov_recall.remove(item)
     # flow recall remove
     for item in flow_recall:
-        if item['videoId'] not in flow_recall_video_ids:
+        if item['videoId'] in flow_recall_video_ids:
             flow_recall_result.append(item)
 
     return rov_recall, flow_recall_result
@@ -119,3 +122,14 @@ def bottom_strategy(size, app_type, ab_code):
         random_data = filtered_data
     bottom_data = [{'videoId': item, 'pushFrom': 'bottom_strategy', 'abCode': ab_code} for item in random_data]
     return bottom_data
+
+
+if __name__ == '__main__':
+    d_test = [[{'videoId': 3674236, 'rovScore': 99.24105262298141, 'pushFrom': 'recall_pool', 'abCode': 10000}, {'videoId': 1915009, 'rovScore': 99.248872388032, 'pushFrom': 'recall_pool', 'abCode': 10000}, {'videoId': 9033859, 'rovScore': 99.21956695197761, 'pushFrom': 'recall_pool', 'abCode': 10000}, {'videoId': 4258137, 'rovScore': 99.24737622823497, 'pushFrom': 'recall_pool', 'abCode': 10000}, {'videoId': 9034962, 'rovScore': 99.18993382219318, 'pushFrom': 'recall_pool', 'abCode': 10000}, {'videoId': 1922051, 'rovScore': 99.2351969813565, 'pushFrom': 'recall_pool', 'abCode': 10000}, {'videoId': 7829308, 'rovScore': 99.25465474490638, 'pushFrom': 'recall_pool', 'abCode': 10000}, {'videoId': 3247671, 'rovScore': 99.24601245746983, 'pushFrom': 'recall_pool', 'abCode': 10000}, {'videoId': 5831941, 'rovScore': 99.16776814766304, 'pushFrom': 'recall_pool', 'abCode': 10000}, {'videoId': 7837973, 'rovScore': 99.253749334822, 'pushFrom': 'recall_pool', 'abCode': 10000}], [{'videoId': 9035245, 'flowPool': '1#1#1#1636085384424', 'rovScore': 1.0, 'pushFrom': 'flow_pool', 'abCode': 10000}, {'videoId': 9034828, 'flowPool': '1#1#1#1636090368461', 'rovScore': 1.0, 'pushFrom': 'flow_pool', 'abCode': 10000}, {'videoId': 9035244, 'flowPool': '1#1#1#1636085467105', 'rovScore': 1.0, 'pushFrom': 'flow_pool', 'abCode': 10000}, {'videoId': 9035237, 'flowPool': '1#1#1#1636086478074', 'rovScore': 1.0, 'pushFrom': 'flow_pool', 'abCode': 10000}]]
+    data = {
+        'rov_pool_recall': d_test[0],
+        'flow_pool_recall': d_test[1]
+    }
+    res = video_rank(data, size=10)
+    for item in res:
+        print(item)

+ 1 - 0
video_recall.py

@@ -149,6 +149,7 @@ class PoolRecall(object):
             for flow_pool in flow_pool_mapping[video_id]:
                 videos.append({'videoId': video_id, 'flowPool': flow_pool})
         view_count_result = get_videos_remain_view_count(app_type=self.app_type, videos=videos)
+        log_.info('view_count_result = {}'.format(view_count_result))
         if not view_count_result:
             return None
         check_result = []