|
@@ -17,7 +17,7 @@ from functools import reduce
|
|
|
from odps import ODPS
|
|
|
from threading import Timer, Thread
|
|
|
from utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video, \
|
|
|
- check_table_partition_exits, filter_video_status_app, send_msg_to_feishu
|
|
|
+ check_table_partition_exits, filter_video_status_app, send_msg_to_feishu, filter_political_videos
|
|
|
from config import set_config
|
|
|
from log import Log
|
|
|
from check_video_limit_distribute import update_limit_video_score
|
|
@@ -249,6 +249,13 @@ def video_rank(df, now_date, now_h, rule_key, param, region, data_key, rule_rank
|
|
|
filtered_videos = filter_shield_video(video_ids=filtered_videos, shield_key_name_list=shield_key_name_list)
|
|
|
# log_.info(f"shield filtered_videos count = {len(filtered_videos)}")
|
|
|
|
|
|
+ # 涉政视频过滤
|
|
|
+ political_filter = param.get('political_filter', None)
|
|
|
+ if political_filter is True:
|
|
|
+ log_.info(f"political filter videos count = {len(filtered_videos)}")
|
|
|
+ filtered_videos = filter_political_videos(video_ids=filtered_videos)
|
|
|
+ log_.info(f"political filtered videos count = {len(filtered_videos)}")
|
|
|
+
|
|
|
# 写入对应的redis
|
|
|
h_video_ids = []
|
|
|
by_30day_rule_key = param.get('30day_rule_key', None)
|
|
@@ -283,10 +290,10 @@ def video_rank(df, now_date, now_h, rule_key, param, region, data_key, rule_rank
|
|
|
# 与其他召回视频池去重,存入对应的redis
|
|
|
dup_to_redis(h_video_ids=h_video_ids, now_date=now_date, now_h=now_h, rule_key=rule_key,
|
|
|
region_24h_rule_key=region_24h_rule_key, by_24h_rule_key=by_24h_rule_key, by_48h_rule_key=by_48h_rule_key,
|
|
|
- region=region, data_key=data_key, rule_rank_h_flag=rule_rank_h_flag)
|
|
|
+ region=region, data_key=data_key, rule_rank_h_flag=rule_rank_h_flag, political_filter=political_filter)
|
|
|
|
|
|
|
|
|
-def dup_data(h_video_ids, initial_key_name, dup_key_name, region):
|
|
|
+def dup_data(h_video_ids, initial_key_name, dup_key_name, region, political_filter):
|
|
|
redis_helper = RedisHelper()
|
|
|
if redis_helper.key_exists(key_name=initial_key_name):
|
|
|
initial_data = redis_helper.get_all_data_from_zset(key_name=initial_key_name, with_scores=True)
|
|
@@ -294,7 +301,11 @@ def dup_data(h_video_ids, initial_key_name, dup_key_name, region):
|
|
|
initial_video_ids = [int(video_id) for video_id, _ in initial_data]
|
|
|
shield_key_name_list = config_.SHIELD_CONFIG.get(region, None)
|
|
|
if shield_key_name_list is not None:
|
|
|
- initial_video_ids = filter_shield_video(video_ids=initial_video_ids, shield_key_name_list=shield_key_name_list)
|
|
|
+ initial_video_ids = filter_shield_video(video_ids=initial_video_ids,
|
|
|
+ shield_key_name_list=shield_key_name_list)
|
|
|
+ # 涉政视频过滤
|
|
|
+ if political_filter is True:
|
|
|
+ initial_video_ids = filter_political_videos(video_ids=initial_video_ids)
|
|
|
|
|
|
dup_data = {}
|
|
|
for video_id, score in initial_data:
|
|
@@ -309,7 +320,8 @@ def dup_data(h_video_ids, initial_key_name, dup_key_name, region):
|
|
|
return h_video_ids
|
|
|
|
|
|
|
|
|
-def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by_24h_rule_key, by_48h_rule_key, region, data_key, rule_rank_h_flag):
|
|
|
+def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by_24h_rule_key, by_48h_rule_key,
|
|
|
+ region, data_key, rule_rank_h_flag, political_filter):
|
|
|
"""将地域分组小时级数据与其他召回视频池去重,存入对应的redis"""
|
|
|
# ##### 去重更新地域分组小时级24h列表,并另存为redis中
|
|
|
region_24h_key_name = \
|
|
@@ -319,7 +331,7 @@ def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by
|
|
|
f"{config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=region_24h_key_name,
|
|
|
- dup_key_name=region_24h_dup_key_name, region=region)
|
|
|
+ dup_key_name=region_24h_dup_key_name, region=region, political_filter=political_filter)
|
|
|
|
|
|
if rule_rank_h_flag == '48h':
|
|
|
|
|
@@ -330,7 +342,7 @@ def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by
|
|
|
f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_48H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_48h_key_name,
|
|
|
- dup_key_name=h_48h_dup_key_name, region=region)
|
|
|
+ dup_key_name=h_48h_dup_key_name, region=region, political_filter=political_filter)
|
|
|
|
|
|
# ##### 去重小程序相对48h 筛选后剩余数据 更新结果,并另存为redis中
|
|
|
if by_48h_rule_key == 'rule1':
|
|
@@ -340,7 +352,8 @@ def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by
|
|
|
f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_48H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=other_h_48h_key_name,
|
|
|
- dup_key_name=other_h_48h_dup_key_name, region=region)
|
|
|
+ dup_key_name=other_h_48h_dup_key_name, region=region,
|
|
|
+ political_filter=political_filter)
|
|
|
|
|
|
else:
|
|
|
# ##### 去重小程序相对24h更新结果,并另存为redis中
|
|
@@ -350,7 +363,7 @@ def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by
|
|
|
f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_24h_key_name,
|
|
|
- dup_key_name=h_24h_dup_key_name, region=region)
|
|
|
+ dup_key_name=h_24h_dup_key_name, region=region, political_filter=political_filter)
|
|
|
|
|
|
# ##### 去重小程序相对24h 筛选后剩余数据 更新结果,并另存为redis中
|
|
|
# if by_24h_rule_key in ['rule3', 'rule4']:
|
|
@@ -360,7 +373,7 @@ def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by
|
|
|
f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=other_h_24h_key_name,
|
|
|
- dup_key_name=other_h_24h_dup_key_name, region=region)
|
|
|
+ dup_key_name=other_h_24h_dup_key_name, region=region, political_filter=political_filter)
|
|
|
|
|
|
# ##### 去重小程序模型更新结果,并另存为redis中
|
|
|
# model_key_name = get_rov_redis_key(now_date=now_date)
|
|
@@ -681,6 +694,8 @@ def h_rank_bottom(now_date, now_h, rule_params, region_code_list, rule_rank_h_fl
|
|
|
region_24h_rule_key = rule_param.get('region_24h_rule_key', 'rule1')
|
|
|
by_24h_rule_key = rule_param.get('24h_rule_key', None)
|
|
|
by_48h_rule_key = rule_param.get('48h_rule_key', None)
|
|
|
+ # 涉政视频过滤
|
|
|
+ political_filter = param.get('political_filter', None)
|
|
|
for region in region_code_list:
|
|
|
log_.info(f"region = {region}")
|
|
|
key_name = f"{key_prefix}{region}:{data_key}:{rule_key}:{redis_dt}:{redis_h}"
|
|
@@ -701,7 +716,8 @@ def h_rank_bottom(now_date, now_h, rule_params, region_code_list, rule_rank_h_fl
|
|
|
dup_to_redis(h_video_ids=h_video_ids, now_date=now_date, now_h=now_h, rule_key=rule_key,
|
|
|
region_24h_rule_key=region_24h_rule_key, region=region,
|
|
|
data_key=data_key, by_24h_rule_key=by_24h_rule_key,
|
|
|
- by_48h_rule_key=by_48h_rule_key, rule_rank_h_flag=rule_rank_h_flag)
|
|
|
+ by_48h_rule_key=by_48h_rule_key, rule_rank_h_flag=rule_rank_h_flag,
|
|
|
+ political_filter=political_filter)
|
|
|
# 特殊城市视频数据准备
|
|
|
for region, city_list in config_.REGION_CITY_MAPPING.items():
|
|
|
t = [
|