Selaa lähdekoodia

update region_rule_rank_h_new

liqian 1 vuosi sitten
vanhempi
commit
b4b0f6f3dd
1 muutettua tiedostoa jossa 11 lisäystä ja 7 poistoa
  1. 11 7
      region_rule_rank_h_new.py

+ 11 - 7
region_rule_rank_h_new.py

@@ -243,7 +243,8 @@ def add_videos(initial_df, now_date, rule_key, region, data_key, hour_count, top
     return df
 
 
-def video_rank(df, now_date, now_h, rule_key, param, region, data_key, add_videos_with_pre_h=False, hour_count=0):
+def video_rank(df, now_date, now_h, rule_key, param, region, data_key, add_videos_with_pre_h=False, hour_count=0,
+               videos_count=40):
     """
     获取符合进入召回源条件的视频,与每日更新的rov模型结果视频列表进行合并
     :param hour_count:
@@ -319,7 +320,7 @@ def video_rank(df, now_date, now_h, rule_key, param, region, data_key, add_video
         log_.info(f"h_recall_result count = {len(h_recall_result)}")
         # 写入对应的redis
         redis_helper.set_data_to_redis(
-            key_name=h_recall_key_name, value=json.dumps(h_recall_result[:10000]), expire_time=30 * 24 * 3600
+            key_name=h_recall_key_name, value=json.dumps(h_recall_result[:videos_count]), expire_time=30 * 24 * 3600
         )
         # 写入本地文件
         filename = f"{region}_{data_key}_{rule_key}_{datetime.datetime.strftime(now_date, '%Y%m%d%H')}.txt"
@@ -334,7 +335,7 @@ def video_rank(df, now_date, now_h, rule_key, param, region, data_key, add_video
                  shield_config=shield_config)
 
 
-def dup_data(initial_key_name, dup_key_name, region, political_filter, shield_config, filepath):
+def dup_data(initial_key_name, dup_key_name, region, political_filter, shield_config, filepath, videos_count):
     redis_helper = RedisHelper()
     if redis_helper.key_exists(key_name=initial_key_name):
         initial_data = redis_helper.get_all_data_from_zset(key_name=initial_key_name, with_scores=True)
@@ -363,7 +364,7 @@ def dup_data(initial_key_name, dup_key_name, region, political_filter, shield_co
             log_.info(f"data count = {len(data)}")
             # 写入对应的redis
             redis_helper.set_data_to_redis(
-                key_name=dup_key_name, value=json.dumps(data[:10000]), expire_time=30 * 24 * 3600
+                key_name=dup_key_name, value=json.dumps(data[:videos_count]), expire_time=30 * 24 * 3600
             )
             # 写入本地文件
             data2file(data=json.dumps(data), filepath=filepath)
@@ -379,7 +380,8 @@ def dup_to_redis(now_date, now_h, rule_key, region_24h_rule_key, by_24h_rule_key
     region_24h_dup_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H}{region}:{data_key}:{rule_key}"
     filename = f"{region}_{data_key}_{rule_key}_{datetime.datetime.strftime(now_date, '%Y%m%d%H')}.txt"
     dup_data(initial_key_name=region_24h_key_name, dup_key_name=region_24h_dup_key_name, region=region,
-             political_filter=political_filter, shield_config=shield_config, filepath=f"./data/region24h/{filename}")
+             political_filter=political_filter, shield_config=shield_config, filepath=f"./data/region24h/{filename}",
+             videos_count=100)
 
     # ##### 小程序相对24h更新结果,并另存为redis中
     h_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H}{data_key}:{by_24h_rule_key}:" \
@@ -387,7 +389,8 @@ def dup_to_redis(now_date, now_h, rule_key, region_24h_rule_key, by_24h_rule_key
     h_24h_dup_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H}{region}:{data_key}:{rule_key}"
     filename = f"{region}_{data_key}_{rule_key}_{datetime.datetime.strftime(now_date, '%Y%m%d%H')}.txt"
     dup_data(initial_key_name=h_24h_key_name, dup_key_name=h_24h_dup_key_name, region=region,
-             political_filter=political_filter, shield_config=shield_config, filepath=f"./data/24h/{filename}")
+             political_filter=political_filter, shield_config=shield_config, filepath=f"./data/24h/{filename}",
+             videos_count=100)
 
     # ##### 去重小程序相对24h 筛选后剩余数据 更新结果,并另存为redis中
     other_h_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H_OTHER}{data_key}:" \
@@ -395,7 +398,8 @@ def dup_to_redis(now_date, now_h, rule_key, region_24h_rule_key, by_24h_rule_key
     other_h_24h_dup_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H}{region}:{data_key}:{rule_key}"
     filename = f"{region}_{data_key}_{rule_key}_{datetime.datetime.strftime(now_date, '%Y%m%d%H')}.txt"
     dup_data(initial_key_name=other_h_24h_key_name, dup_key_name=other_h_24h_dup_key_name, region=region,
-             political_filter=political_filter, shield_config=shield_config, filepath=f"./data/24h_other/{filename}")
+             political_filter=political_filter, shield_config=shield_config, filepath=f"./data/24h_other/{filename}",
+             videos_count=200)
 
 
 def merge_df(df_left, df_right):