|
@@ -196,15 +196,15 @@ def video_rank(df, now_date, now_h, rule_key, param, region):
|
|
redis_helper.add_data_with_zset(key_name=initial_key_name, data=initial_data_dup, expire_time=23 * 3600)
|
|
redis_helper.add_data_with_zset(key_name=initial_key_name, data=initial_data_dup, expire_time=23 * 3600)
|
|
|
|
|
|
|
|
|
|
-def rank_by_h(project, table, now_date, now_h, rule_params):
|
|
|
|
|
|
+def rank_by_h(project, table, now_date, now_h, rule_params, region_code_list):
|
|
# 获取特征数据
|
|
# 获取特征数据
|
|
feature_df = get_feature_data(project=project, table=table, now_date=now_date)
|
|
feature_df = get_feature_data(project=project, table=table, now_date=now_date)
|
|
# 获取所有的region
|
|
# 获取所有的region
|
|
- region_list = list(set(feature_df[''].to_list()))
|
|
|
|
|
|
+ # region_code_list = list(set(feature_df[''].to_list()))
|
|
# rank
|
|
# rank
|
|
for key, value in rule_params.items():
|
|
for key, value in rule_params.items():
|
|
log_.info(f"rule = {key}, param = {value}")
|
|
log_.info(f"rule = {key}, param = {value}")
|
|
- for region in region_list:
|
|
|
|
|
|
+ for region in region_code_list:
|
|
log_.info(f"region = {region}")
|
|
log_.info(f"region = {region}")
|
|
# 计算score
|
|
# 计算score
|
|
score_df = cal_score(df=feature_df)
|
|
score_df = cal_score(df=feature_df)
|
|
@@ -214,7 +214,7 @@ def rank_by_h(project, table, now_date, now_h, rule_params):
|
|
score_df.to_csv(f'./data/{score_filename}')
|
|
score_df.to_csv(f'./data/{score_filename}')
|
|
|
|
|
|
|
|
|
|
-def h_rank_bottom(now_date, now_h, rule_key, project, table):
|
|
|
|
|
|
+def h_rank_bottom(now_date, now_h, rule_key, region_code_list):
|
|
"""未按时更新数据,用上一小时结果作为当前小时的数据"""
|
|
"""未按时更新数据,用上一小时结果作为当前小时的数据"""
|
|
log_.info(f"rule_key = {rule_key}")
|
|
log_.info(f"rule_key = {rule_key}")
|
|
# 获取rov模型结果
|
|
# 获取rov模型结果
|
|
@@ -227,9 +227,9 @@ def h_rank_bottom(now_date, now_h, rule_key, project, table):
|
|
redis_h = now_h - 1
|
|
redis_h = now_h - 1
|
|
|
|
|
|
key_prefix_list = [config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H, config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H]
|
|
key_prefix_list = [config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H, config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H]
|
|
- fea_df = get_feature_data(project=project, table=table, now_date=now_date - datetime.timedelta(hours=1))
|
|
|
|
- region_list = list(set(fea_df[''].to_list()))
|
|
|
|
- for region in region_list:
|
|
|
|
|
|
+ # fea_df = get_feature_data(project=project, table=table, now_date=now_date - datetime.timedelta(hours=1))
|
|
|
|
+ # region_list = list(set(fea_df[''].to_list()))
|
|
|
|
+ for region in region_code_list:
|
|
log_.info(f"region = {region}")
|
|
log_.info(f"region = {region}")
|
|
for key_prefix in key_prefix_list:
|
|
for key_prefix in key_prefix_list:
|
|
key_name = f"{key_prefix}{region}.{rule_key}.{redis_dt}.{redis_h}"
|
|
key_name = f"{key_prefix}{region}.{rule_key}.{redis_dt}.{redis_h}"
|
|
@@ -250,24 +250,26 @@ def h_timer_check():
|
|
rule_params = config_.RULE_PARAMS_REGION
|
|
rule_params = config_.RULE_PARAMS_REGION
|
|
project = config_.PROJECT_REGION
|
|
project = config_.PROJECT_REGION
|
|
table = config_.TABLE_REGION
|
|
table = config_.TABLE_REGION
|
|
|
|
+ region_code_list = [code for region, code in region_code.items()]
|
|
now_date = datetime.datetime.today()
|
|
now_date = datetime.datetime.today()
|
|
log_.info(f"now_date: {datetime.datetime.strftime(now_date, '%Y%m%d%H')}")
|
|
log_.info(f"now_date: {datetime.datetime.strftime(now_date, '%Y%m%d%H')}")
|
|
now_h = datetime.datetime.now().hour
|
|
now_h = datetime.datetime.now().hour
|
|
now_min = datetime.datetime.now().minute
|
|
now_min = datetime.datetime.now().minute
|
|
if now_h == 0:
|
|
if now_h == 0:
|
|
for key, _ in rule_params.items():
|
|
for key, _ in rule_params.items():
|
|
- h_rank_bottom(now_date=now_date, now_h=now_h, rule_key=key, project=project, table=table)
|
|
|
|
|
|
+ h_rank_bottom(now_date=now_date, now_h=now_h, rule_key=key, region_code_list=region_code_list)
|
|
return
|
|
return
|
|
# 查看当前小时更新的数据是否已准备好
|
|
# 查看当前小时更新的数据是否已准备好
|
|
h_data_count = h_data_check(project=project, table=table, now_date=now_date)
|
|
h_data_count = h_data_check(project=project, table=table, now_date=now_date)
|
|
if h_data_count > 0:
|
|
if h_data_count > 0:
|
|
log_.info(f'h_data_count = {h_data_count}')
|
|
log_.info(f'h_data_count = {h_data_count}')
|
|
# 数据准备好,进行更新
|
|
# 数据准备好,进行更新
|
|
- rank_by_h(now_date=now_date, now_h=now_h, rule_params=rule_params, project=project, table=table)
|
|
|
|
|
|
+ rank_by_h(now_date=now_date, now_h=now_h, rule_params=rule_params,
|
|
|
|
+ project=project, table=table, region_code_list=region_code_list)
|
|
elif now_min > 50:
|
|
elif now_min > 50:
|
|
log_.info('h_recall data is None, use bottom data!')
|
|
log_.info('h_recall data is None, use bottom data!')
|
|
for key, _ in rule_params.items():
|
|
for key, _ in rule_params.items():
|
|
- h_rank_bottom(now_date=now_date, now_h=now_h, rule_key=key, project=project, table=table)
|
|
|
|
|
|
+ h_rank_bottom(now_date=now_date, now_h=now_h, rule_key=key, region_code_list=region_code_list)
|
|
else:
|
|
else:
|
|
# 数据没准备好,1分钟后重新检查
|
|
# 数据没准备好,1分钟后重新检查
|
|
Timer(60, h_timer_check).start()
|
|
Timer(60, h_timer_check).start()
|