|
@@ -243,7 +243,8 @@ def add_videos(initial_df, now_date, rule_key, region, data_key, hour_count, top
|
|
|
return df
|
|
|
|
|
|
|
|
|
-def video_rank(df, now_date, now_h, rule_key, param, region, data_key, add_videos_with_pre_h=False, hour_count=0):
|
|
|
+def video_rank(df, now_date, now_h, rule_key, param, region, data_key, add_videos_with_pre_h=False, hour_count=0,
|
|
|
+ videos_count=40):
|
|
|
"""
|
|
|
获取符合进入召回源条件的视频,与每日更新的rov模型结果视频列表进行合并
|
|
|
:param hour_count:
|
|
@@ -319,7 +320,7 @@ def video_rank(df, now_date, now_h, rule_key, param, region, data_key, add_video
|
|
|
log_.info(f"h_recall_result count = {len(h_recall_result)}")
|
|
|
# 写入对应的redis
|
|
|
redis_helper.set_data_to_redis(
|
|
|
- key_name=h_recall_key_name, value=json.dumps(h_recall_result[:10000]), expire_time=30 * 24 * 3600
|
|
|
+ key_name=h_recall_key_name, value=json.dumps(h_recall_result[:videos_count]), expire_time=30 * 24 * 3600
|
|
|
)
|
|
|
# 写入本地文件
|
|
|
filename = f"{region}_{data_key}_{rule_key}_{datetime.datetime.strftime(now_date, '%Y%m%d%H')}.txt"
|
|
@@ -334,7 +335,7 @@ def video_rank(df, now_date, now_h, rule_key, param, region, data_key, add_video
|
|
|
shield_config=shield_config)
|
|
|
|
|
|
|
|
|
-def dup_data(initial_key_name, dup_key_name, region, political_filter, shield_config, filepath):
|
|
|
+def dup_data(initial_key_name, dup_key_name, region, political_filter, shield_config, filepath, videos_count):
|
|
|
redis_helper = RedisHelper()
|
|
|
if redis_helper.key_exists(key_name=initial_key_name):
|
|
|
initial_data = redis_helper.get_all_data_from_zset(key_name=initial_key_name, with_scores=True)
|
|
@@ -363,7 +364,7 @@ def dup_data(initial_key_name, dup_key_name, region, political_filter, shield_co
|
|
|
log_.info(f"data count = {len(data)}")
|
|
|
# 写入对应的redis
|
|
|
redis_helper.set_data_to_redis(
|
|
|
- key_name=dup_key_name, value=json.dumps(data[:10000]), expire_time=30 * 24 * 3600
|
|
|
+ key_name=dup_key_name, value=json.dumps(data[:videos_count]), expire_time=30 * 24 * 3600
|
|
|
)
|
|
|
# 写入本地文件
|
|
|
data2file(data=json.dumps(data), filepath=filepath)
|
|
@@ -379,7 +380,8 @@ def dup_to_redis(now_date, now_h, rule_key, region_24h_rule_key, by_24h_rule_key
|
|
|
region_24h_dup_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H}{region}:{data_key}:{rule_key}"
|
|
|
filename = f"{region}_{data_key}_{rule_key}_{datetime.datetime.strftime(now_date, '%Y%m%d%H')}.txt"
|
|
|
dup_data(initial_key_name=region_24h_key_name, dup_key_name=region_24h_dup_key_name, region=region,
|
|
|
- political_filter=political_filter, shield_config=shield_config, filepath=f"./data/region24h/{filename}")
|
|
|
+ political_filter=political_filter, shield_config=shield_config, filepath=f"./data/region24h/{filename}",
|
|
|
+ videos_count=100)
|
|
|
|
|
|
# ##### 小程序相对24h更新结果,并另存为redis中
|
|
|
h_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H}{data_key}:{by_24h_rule_key}:" \
|
|
@@ -387,7 +389,8 @@ def dup_to_redis(now_date, now_h, rule_key, region_24h_rule_key, by_24h_rule_key
|
|
|
h_24h_dup_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H}{region}:{data_key}:{rule_key}"
|
|
|
filename = f"{region}_{data_key}_{rule_key}_{datetime.datetime.strftime(now_date, '%Y%m%d%H')}.txt"
|
|
|
dup_data(initial_key_name=h_24h_key_name, dup_key_name=h_24h_dup_key_name, region=region,
|
|
|
- political_filter=political_filter, shield_config=shield_config, filepath=f"./data/24h/{filename}")
|
|
|
+ political_filter=political_filter, shield_config=shield_config, filepath=f"./data/24h/{filename}",
|
|
|
+ videos_count=100)
|
|
|
|
|
|
# ##### 去重小程序相对24h 筛选后剩余数据 更新结果,并另存为redis中
|
|
|
other_h_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H_OTHER}{data_key}:" \
|
|
@@ -395,7 +398,8 @@ def dup_to_redis(now_date, now_h, rule_key, region_24h_rule_key, by_24h_rule_key
|
|
|
other_h_24h_dup_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H}{region}:{data_key}:{rule_key}"
|
|
|
filename = f"{region}_{data_key}_{rule_key}_{datetime.datetime.strftime(now_date, '%Y%m%d%H')}.txt"
|
|
|
dup_data(initial_key_name=other_h_24h_key_name, dup_key_name=other_h_24h_dup_key_name, region=region,
|
|
|
- political_filter=political_filter, shield_config=shield_config, filepath=f"./data/24h_other/{filename}")
|
|
|
+ political_filter=political_filter, shield_config=shield_config, filepath=f"./data/24h_other/{filename}",
|
|
|
+ videos_count=200)
|
|
|
|
|
|
|
|
|
def merge_df(df_left, df_right):
|