|
@@ -801,6 +801,39 @@ def filter_data(videos, region):
|
|
|
filter_df.to_csv(file, index=False)
|
|
|
|
|
|
|
|
|
+def filter_whole_movies():
|
|
|
+ """过滤完整电影数据"""
|
|
|
+ log_.info("whole movies filter start ...")
|
|
|
+ redis_helper = RedisHelper()
|
|
|
+ # 获取当前日期
|
|
|
+ now_date = date.today().strftime('%Y%m%d')
|
|
|
+ now_h = datetime.now().hour
|
|
|
+ log_.info(f'now_date = {now_date}, now_h = {now_h}.')
|
|
|
+ # 拼接key
|
|
|
+ key_name = f'{config_.RECALL_KEY_NAME_PREFIX_WHOLE_MOVIES}{now_date}.{now_h}'
|
|
|
+ # 获取视频
|
|
|
+ data = redis_helper.get_data_zset_with_index(key_name=key_name, start=0, end=-1)
|
|
|
+ if data is None:
|
|
|
+ log_.info("data is None")
|
|
|
+ log_.info("whole movies filter end!")
|
|
|
+ return
|
|
|
+ # 过滤
|
|
|
+ video_ids = [int(video_id) for video_id in data]
|
|
|
+ filtered_result = filter_video_status(video_ids=video_ids)
|
|
|
+ # 求差集,获取需要过滤掉的视频,并从redis中移除
|
|
|
+ filter_videos = set(video_ids) - set(filtered_result)
|
|
|
+ log_.info("video_ids size = {}, filtered size = {}, filter sizer = {}".format(len(video_ids),
|
|
|
+ len(filtered_result),
|
|
|
+ len(filter_videos)))
|
|
|
+ log_.info({'key_name': key_name, 'filter_videos': filter_videos})
|
|
|
+
|
|
|
+ if len(filter_videos) == 0:
|
|
|
+ log_.info("whole movies filter end!")
|
|
|
+ return
|
|
|
+ redis_helper.remove_value_from_zset(key_name=key_name, value=list(filter_videos))
|
|
|
+ log_.info("whole movies filter end!")
|
|
|
+
|
|
|
+
|
|
|
def main():
|
|
|
try:
|
|
|
# ROV召回池视频过滤
|
|
@@ -840,6 +873,8 @@ def main():
|
|
|
filter_rov_h_24h()
|
|
|
# 过滤地域分组24h规则视频
|
|
|
filter_region_videos_24h()
|
|
|
+ # 过滤完整电影数据
|
|
|
+ filter_whole_movies()
|
|
|
except Exception as e:
|
|
|
log_.error(traceback.format_exc())
|
|
|
send_msg_to_feishu(
|