Browse Source

add filter_whole_movies

liqian 2 years ago
parent
commit
b34b5cde3e
1 changed files with 35 additions and 0 deletions
  1. 35 0
      videos_filter.py

+ 35 - 0
videos_filter.py

@@ -801,6 +801,39 @@ def filter_data(videos, region):
     filter_df.to_csv(file, index=False)
 
 
+def filter_whole_movies():
+    """过滤完整电影数据"""
+    log_.info("whole movies filter start ...")
+    redis_helper = RedisHelper()
+    # 获取当前日期
+    now_date = date.today().strftime('%Y%m%d')
+    now_h = datetime.now().hour
+    log_.info(f'now_date = {now_date}, now_h = {now_h}.')
+    # 拼接key
+    key_name = f'{config_.RECALL_KEY_NAME_PREFIX_WHOLE_MOVIES}{now_date}.{now_h}'
+    # 获取视频
+    data = redis_helper.get_data_zset_with_index(key_name=key_name, start=0, end=-1)
+    if data is None:
+        log_.info("data is None")
+        log_.info("whole movies filter end!")
+        return
+    # 过滤
+    video_ids = [int(video_id) for video_id in data]
+    filtered_result = filter_video_status(video_ids=video_ids)
+    # 求差集,获取需要过滤掉的视频,并从redis中移除
+    filter_videos = set(video_ids) - set(filtered_result)
+    log_.info("video_ids size = {}, filtered size = {}, filter sizer = {}".format(len(video_ids),
+                                                                                  len(filtered_result),
+                                                                                  len(filter_videos)))
+    log_.info({'key_name': key_name, 'filter_videos': filter_videos})
+
+    if len(filter_videos) == 0:
+        log_.info("whole movies filter end!")
+        return
+    redis_helper.remove_value_from_zset(key_name=key_name, value=list(filter_videos))
+    log_.info("whole movies filter end!")
+
+
 def main():
     try:
         # ROV召回池视频过滤
@@ -840,6 +873,8 @@ def main():
         filter_rov_h_24h()
         # 过滤地域分组24h规则视频
         filter_region_videos_24h()
+        # 过滤完整电影数据
+        filter_whole_movies()
     except Exception as e:
         log_.error(traceback.format_exc())
         send_msg_to_feishu(