Kaynağa Gözat

add relevant videos filter

liqian 3 yıl önce
ebeveyn
işleme
ab2c3da67c
4 değiştirilmiş dosya ile 91 ekleme ve 8 silme
  1. 2 0
      config.py
  2. 9 2
      db_helper.py
  3. 16 6
      relevant_top_videos.py
  4. 64 0
      videos_filter.py

+ 2 - 0
config.py

@@ -77,6 +77,8 @@ class BaseConfig(object):
 
     # 头部视频对应运营强插的相关视频 redis 存储 key 前缀, 完整key格式:com.weiqu.video.relevant.videos.item.{videoId}
     RELEVANT_VIDEOS_WITH_OP_KEY_NAME = 'com.weiqu.video.relevant.videos.item.'
+    # 有设置运营强插相关视频的头部视频id redis存储key
+    RELEVANT_TOP_VIDEOS_KEY_NAME = 'com.weiqu.video.relevant.top.video_ids'
 
 
 class DevelopmentConfig(BaseConfig):

+ 9 - 2
db_helper.py

@@ -174,8 +174,15 @@ class RedisHelper(object):
         if not conn.exists(key_name):
             # key不存在
             return None
-        data = conn.sscan(key_name)
-        return data[1]
+        data = []
+        cursor = 0
+        while True:
+            cur, temp = conn.sscan(key_name, cursor=cursor, count=2000)
+            data.extend(temp)
+            if cur == 0:
+                break
+            cursor = cur
+        return list(set(data))
 
     def add_data_with_set(self, key_name, values, expire_time=30*60):
         """

+ 16 - 6
relevant_top_videos.py

@@ -3,6 +3,7 @@ import json
 import time
 from config import set_config
 from db_helper import RedisHelper
+from utils import filter_video_status
 
 config_, _ = set_config()
 
@@ -19,12 +20,16 @@ def get_relevant_videos_with_excel():
         head_vid = int(df.iloc[i]['headVid'])
         order = int(df.iloc[i]['order'])
         recommend_vid = int(df.iloc[i]['recommendVid'])
+        # 状态过滤
+        filtered_videos = filter_video_status(video_ids=[recommend_vid])
+        if not filtered_videos or len(filtered_videos) == 0:
+            continue
         # 将时间转换为10位时间戳
         start_time = int(time.mktime(time.strptime(df.iloc[i]['startTime'].split(r'.')[0], '%Y-%m-%d %H:%M:%S')))
         finish_time = int(time.mktime(time.strptime(df.iloc[i]['finishTime'].split(r'.')[0], '%Y-%m-%d %H:%M:%S')))
         item = {
             'order': order,
-            'recommend_vid': recommend_vid,
+            'recommend_vid': filtered_videos[0],
             'start_time': start_time,
             'finish_time': finish_time
         }
@@ -35,12 +40,13 @@ def get_relevant_videos_with_excel():
             head_videos.append(head_vid)
 
     print(head_videos)
-    return relevant_videos
+    return relevant_videos, head_videos
 
 
-def update_relevant_videos_to_redis(relevant_videos):
+def update_relevant_videos_to_redis(relevant_videos, head_videos):
     if not relevant_videos:
         return
+    redis_helper = RedisHelper()
     for head_vid, videos in relevant_videos.items():
         # 拼接key
         key_name = '{}{}'.format(config_.RELEVANT_VIDEOS_WITH_OP_KEY_NAME, head_vid)
@@ -52,11 +58,15 @@ def update_relevant_videos_to_redis(relevant_videos):
         if expire_time <= 0:
             return
         # 存入redis
-        redis_helper = RedisHelper()
         redis_helper.set_data_to_redis(key_name=key_name, value=videos_json, expire_time=expire_time)
         print('head_vid = {} relevant videos update finished!'.format(head_vid))
 
+    # 将头部id存入redis中
+    redis_helper.add_data_with_set(key_name=config_.RELEVANT_TOP_VIDEOS_KEY_NAME,
+                                   values=tuple(head_videos), expire_time=24*3600)
+    print('relevant top videos update finished!')
+
 
 if __name__ == '__main__':
-    relevant_videos = get_relevant_videos_with_excel()
-    update_relevant_videos_to_redis(relevant_videos=relevant_videos)
+    relevant_videos, head_videos = get_relevant_videos_with_excel()
+    update_relevant_videos_to_redis(relevant_videos=relevant_videos, head_videos=head_videos)

+ 64 - 0
videos_filter.py

@@ -1,4 +1,5 @@
 import time
+import json
 import traceback
 from datetime import date, timedelta, datetime
 
@@ -11,6 +12,67 @@ config_, env = set_config()
 log_ = Log()
 
 
+def filter_relevant_videos():
+    """运营强插相关推荐视频过滤"""
+    log_.info("relevant videos with op filter start...")
+    # 读取需要过滤的头部视频id
+    redis_helper = RedisHelper()
+    head_videos = redis_helper.get_data_from_set(key_name=config_.RELEVANT_TOP_VIDEOS_KEY_NAME)
+    if len(head_videos) == 0:
+        return
+
+    # 过滤
+    remove_head_vids = []
+    for head_vid in head_videos:
+        key_name = '{}{}'.format(config_.RELEVANT_VIDEOS_WITH_OP_KEY_NAME, head_vid)
+        # 头部视频 对应的key不存在时,将head_vid移除对应redis
+        if not redis_helper.key_exists(key_name=key_name):
+            remove_head_vids.append(head_vid)
+            log_.info('head_vid = {} relevant redis key not exist!'.format(head_vid))
+            continue
+
+        # 获取头部视频对应的相关视频
+        relevant_videos = redis_helper.get_data_from_redis(key_name=key_name)
+        # 该视频没有指定的相关性视频,将head_vid移除对应redis
+        if relevant_videos is None:
+            remove_head_vids.append(head_vid)
+            log_.info('head_vid = {} not have relevant videos!'.format(head_vid))
+            continue
+        # 过滤
+        relevant_videos = json.loads(relevant_videos)
+        relevant_video_ids = [int(item['recommend_vid']) for item in relevant_videos]
+        filtered_videos = filter_video_status(video_ids=relevant_video_ids)
+        # 保留可推荐 且生效中 的视频
+        relevant_videos_new = [
+            item for item in relevant_videos
+            if int(item['recommend_vid']) in filtered_videos and int(item['finish_time'] <= int(time.time()))
+        ]
+
+        # 过滤后没有符合的视频,将head_vid移除对应redis,删除对应的相关推荐的key
+        if len(relevant_videos_new) == 0:
+            remove_head_vids.append(head_vid)
+            redis_helper.del_keys(key_name=key_name)
+            log_.info('head_vid = {} filtered finished! new relevant videos count = {}'.format(
+                head_vid, len(relevant_videos_new)))
+            continue
+
+        # 重新写入redis
+        # 以最晚结束的视频的结束时间 - 当前时间 + 5s 作为key的过期时间
+        finish_time_list = [item['finish_time'] for item in relevant_videos_new]
+        expire_time = max(finish_time_list) - int(time.time()) + 5
+        # 存入redis
+        redis_helper.set_data_to_redis(key_name=key_name,
+                                       value=json.dumps(relevant_videos_new),
+                                       expire_time=expire_time)
+        log_.info('head_vid = {} filtered finished! new relevant videos count = {}'.format(
+                head_vid, len(relevant_videos_new)))
+
+    # 将需要移除的头部视频id进行移除
+    redis_helper.remove_value_from_set(key_name=config_.RELEVANT_TOP_VIDEOS_KEY_NAME, values=tuple(remove_head_vids))
+    log_.info('head videos remove finished! remove_head_vids = {}'.format(remove_head_vids))
+    log_.info("relevant videos with op filter end!")
+
+
 def filter_rov_pool(app_type=None):
     """ROV召回池视频过滤"""
     log_.info("rov recall pool filter start ...")
@@ -198,6 +260,8 @@ def main():
         filter_bottom()
         # 修改过ROV的视频过滤
         filter_rov_updated()
+        # 运营强插相关推荐视频过滤
+        filter_relevant_videos()
     except Exception as e:
         log_.error(traceback.format_exc())
         send_msg_to_feishu('{} - 过滤失败 \n {}'.format(config_.ENV_TEXT, traceback.format_exc()))