|
@@ -0,0 +1,1272 @@
|
|
|
|
+
|
|
|
|
+import multiprocessing
|
|
|
|
+import sys
|
|
|
|
+import traceback
|
|
|
|
+import gevent
|
|
|
|
+import datetime
|
|
|
|
+import pandas as pd
|
|
|
|
+import math
|
|
|
|
+from functools import reduce
|
|
|
|
+from odps import ODPS
|
|
|
|
+from threading import Timer, Thread
|
|
|
|
+from utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video, \
|
|
|
|
+ check_table_partition_exits, filter_video_status_app, send_msg_to_feishu, filter_political_videos
|
|
|
|
+from config import set_config
|
|
|
|
+from log import Log
|
|
|
|
+from check_video_limit_distribute import update_limit_video_score
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+config_, _ = set_config()
|
|
|
|
+log_ = Log()
|
|
|
|
+
|
|
|
|
+region_code = config_.REGION_CODE
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+RULE_PARAMS = {
|
|
|
|
+ 'rule_params': {
|
|
|
|
+ 'rule66': {
|
|
|
|
+ 'view_type': 'video-show-region', 'platform_return_rate': 0.001,
|
|
|
|
+ 'region_24h_rule_key': 'rule66', '24h_rule_key': 'rule66'
|
|
|
|
+ },
|
|
|
|
+ },
|
|
|
|
+ 'data_params': config_.DATA_PARAMS,
|
|
|
|
+ 'params_list': [
|
|
|
|
+
|
|
|
|
+ {'data': 'data66', 'rule': 'rule66'},
|
|
|
|
+ ],
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+features = [
|
|
|
|
+ 'apptype',
|
|
|
|
+ 'code',
|
|
|
|
+ 'videoid',
|
|
|
|
+ 'lastonehour_preview',
|
|
|
|
+ 'lastonehour_view',
|
|
|
|
+ 'lastonehour_play',
|
|
|
|
+ 'lastonehour_share',
|
|
|
|
+ 'lastonehour_return',
|
|
|
|
+ 'lastonehour_preview_total',
|
|
|
|
+ 'lastonehour_view_total',
|
|
|
|
+ 'lastonehour_play_total',
|
|
|
|
+ 'lastonehour_share_total',
|
|
|
|
+ 'platform_return',
|
|
|
|
+ 'lastonehour_show',
|
|
|
|
+ 'lastonehour_show_region',
|
|
|
|
+ 'lasttwohour_share',
|
|
|
|
+ 'lasttwohour_return_now',
|
|
|
|
+ 'lasttwohour_return',
|
|
|
|
+ 'lastthreehour_share',
|
|
|
|
+ 'lastthreehour_return_now',
|
|
|
|
+ 'lastthreehour_return',
|
|
|
|
+
|
|
|
|
+ 'lastonehour_return_new',
|
|
|
|
+ 'lasttwohour_return_now_new',
|
|
|
|
+ 'lasttwohour_return_new',
|
|
|
|
+ 'lastthreehour_return_now_new',
|
|
|
|
+ 'lastthreehour_return_new',
|
|
|
|
+ 'platform_return_new',
|
|
|
|
+]
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def get_region_code(region):
|
|
|
|
+ """获取省份对应的code"""
|
|
|
|
+ mysql_helper = MysqlHelper(mysql_info=config_.MYSQL_INFO)
|
|
|
|
+ sql = f"SELECT ad_code FROM region_adcode WHERE parent_id = 0 AND region LIKE '{region}%';"
|
|
|
|
+ ad_code = mysql_helper.get_data(sql=sql)
|
|
|
|
+ return ad_code[0][0]
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def h_data_check(project, table, now_date):
|
|
|
|
+ """检查数据是否准备好"""
|
|
|
|
+ odps = ODPS(
|
|
|
|
+ access_id=config_.ODPS_CONFIG['ACCESSID'],
|
|
|
|
+ secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
|
|
|
|
+ project=project,
|
|
|
|
+ endpoint=config_.ODPS_CONFIG['ENDPOINT'],
|
|
|
|
+ connect_timeout=3000,
|
|
|
|
+ read_timeout=500000,
|
|
|
|
+ pool_maxsize=1000,
|
|
|
|
+ pool_connections=1000
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ try:
|
|
|
|
+ dt = datetime.datetime.strftime(now_date, '%Y%m%d%H')
|
|
|
|
+ check_res = check_table_partition_exits(date=dt, project=project, table=table)
|
|
|
|
+ if check_res:
|
|
|
|
+ sql = f'select * from {project}.{table} where dt = {dt}'
|
|
|
|
+ with odps.execute_sql(sql=sql).open_reader() as reader:
|
|
|
|
+ data_count = reader.count
|
|
|
|
+ else:
|
|
|
|
+ data_count = 0
|
|
|
|
+ except Exception as e:
|
|
|
|
+ data_count = 0
|
|
|
|
+ return data_count
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def get_rov_redis_key(now_date):
|
|
|
|
+ """获取rov模型结果存放key"""
|
|
|
|
+ redis_helper = RedisHelper()
|
|
|
|
+ now_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
|
|
|
|
+ key_name = f'{config_.RECALL_KEY_NAME_PREFIX}{now_dt}'
|
|
|
|
+ if not redis_helper.key_exists(key_name=key_name):
|
|
|
|
+ pre_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
|
|
|
|
+ key_name = f'{config_.RECALL_KEY_NAME_PREFIX}{pre_dt}'
|
|
|
|
+ return key_name
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def get_day_30day_videos(now_date, data_key, rule_key):
|
|
|
|
+ """获取天级更新相对30天的视频id"""
|
|
|
|
+ redis_helper = RedisHelper()
|
|
|
|
+ day_30day_recall_key_prefix = config_.RECALL_KEY_NAME_PREFIX_30DAY
|
|
|
|
+ now_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
|
|
|
|
+ day_30day_recall_key_name = f"{day_30day_recall_key_prefix}{data_key}:{rule_key}:{now_dt}"
|
|
|
|
+ if not redis_helper.key_exists(key_name=day_30day_recall_key_name):
|
|
|
|
+ redis_dt = datetime.datetime.strftime((now_date - datetime.timedelta(days=1)), '%Y%m%d')
|
|
|
|
+ day_30day_recall_key_name = f"{day_30day_recall_key_prefix}{data_key}:{rule_key}:{redis_dt}"
|
|
|
|
+ data = redis_helper.get_all_data_from_zset(key_name=day_30day_recall_key_name, with_scores=True)
|
|
|
|
+ if data is None:
|
|
|
|
+ return None
|
|
|
|
+ video_ids = [int(video_id) for video_id, _ in data]
|
|
|
|
+ return video_ids
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def get_feature_data(project, table, now_date):
|
|
|
|
+ """获取特征数据"""
|
|
|
|
+ dt = datetime.datetime.strftime(now_date, '%Y%m%d%H')
|
|
|
|
+
|
|
|
|
+ records = get_data_from_odps(date=dt, project=project, table=table)
|
|
|
|
+ feature_data = []
|
|
|
|
+ for record in records:
|
|
|
|
+ item = {}
|
|
|
|
+ for feature_name in features:
|
|
|
|
+ item[feature_name] = record[feature_name]
|
|
|
|
+ feature_data.append(item)
|
|
|
|
+ feature_df = pd.DataFrame(feature_data)
|
|
|
|
+ return feature_df
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def cal_score_initial(df, param):
|
|
|
|
+ """
|
|
|
|
+ 计算score
|
|
|
|
+ :param df: 特征数据
|
|
|
|
+ :param param: 规则参数
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ df = df.fillna(0)
|
|
|
|
+ df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
|
|
|
|
+ df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
|
|
|
|
+ df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
|
|
|
|
+ if param.get('view_type', None) == 'video-show':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
|
|
|
|
+ elif param.get('view_type', None) == 'video-show-region':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
|
|
|
|
+ else:
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
|
|
|
|
+ df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
|
|
|
|
+
|
|
|
|
+ df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
|
|
|
|
+
|
|
|
|
+ df['score1'] = df['share_rate'] * df['back_rate'] * df['log_back'] * df['K2']
|
|
|
|
+
|
|
|
|
+ click_score_rate = param.get('click_score_rate', None)
|
|
|
|
+ back_score_rate = param.get('click_score_rate', None)
|
|
|
|
+ if click_score_rate is not None:
|
|
|
|
+ df['score'] = (1 - click_score_rate) * df['score1'] + click_score_rate * df['K2']
|
|
|
|
+ elif back_score_rate is not None:
|
|
|
|
+ df['score'] = (1 - back_score_rate) * df['score1'] + back_score_rate * df['back_rate']
|
|
|
|
+ else:
|
|
|
|
+ df['score'] = df['score1']
|
|
|
|
+
|
|
|
|
+ df = df.sort_values(by=['score'], ascending=False)
|
|
|
|
+ return df
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def cal_score_add_return(df, param):
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ df = df.fillna(0)
|
|
|
|
+ df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
|
|
|
|
+ df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
|
|
|
|
+ df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
|
|
|
|
+ df['back_rate2'] = df['lasttwohour_return_now'] / (df['lasttwohour_share'] + 10)
|
|
|
|
+ df['log_back2'] = (df['lasttwohour_return_now'] + 1).apply(math.log)
|
|
|
|
+ df['back_rate3'] = df['lastthreehour_return_now'] / (df['lastthreehour_share'] + 10)
|
|
|
|
+ df['log_back3'] = (df['lastthreehour_return_now'] + 1).apply(math.log)
|
|
|
|
+
|
|
|
|
+ if param.get('view_type', None) == 'video-show':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
|
|
|
|
+ elif param.get('view_type', None) == 'video-show-region':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
|
|
|
|
+ else:
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
|
|
|
|
+ df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
|
|
|
|
+ df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
|
|
|
|
+
|
|
|
|
+ df['score'] = df['K2'] * df['share_rate'] * (
|
|
|
|
+ df['back_rate'] * df['log_back'] +
|
|
|
|
+ df['back_rate2'] * df['log_back2'] +
|
|
|
|
+ df['back_rate3'] * df['log_back3']
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ df = df.sort_values(by=['score'], ascending=False)
|
|
|
|
+ return df
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def cal_score_multiply_return_retention(df, param):
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ df = df.fillna(0)
|
|
|
|
+ df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
|
|
|
|
+ df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
|
|
|
|
+ df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
|
|
|
|
+
|
|
|
|
+ if param.get('view_type', None) == 'video-show':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
|
|
|
|
+ elif param.get('view_type', None) == 'video-show-region':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
|
|
|
|
+ else:
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
|
|
|
|
+ df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
|
|
|
|
+
|
|
|
|
+ df['return_retention_initial'] = (df['lasttwohour_return_now'] + df['lastthreehour_return_now']) / \
|
|
|
|
+ (df['lasttwohour_return'] + df['lastthreehour_return'] + 1)
|
|
|
|
+ df['return_retention'] = df['return_retention_initial'].apply(lambda x: 0.5 if x == 0 else x)
|
|
|
|
+
|
|
|
|
+ df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
|
|
|
|
+
|
|
|
|
+ df['score'] = df['K2'] * df['share_rate'] * df['back_rate'] * df['log_back'] * df['return_retention']
|
|
|
|
+
|
|
|
|
+ df = df.sort_values(by=['score'], ascending=False)
|
|
|
|
+ return df
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def cal_score_update_backrate(df, param):
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ df = df.fillna(0)
|
|
|
|
+ df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
|
|
|
|
+ df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
|
|
|
|
+ df['back_rate2'] = df['lasttwohour_return_now'] / (df['lasttwohour_share'] + 10)
|
|
|
|
+ df['back_rate3'] = df['lastthreehour_return_now'] / (df['lastthreehour_share'] + 10)
|
|
|
|
+ df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
|
|
|
|
+
|
|
|
|
+ if param.get('view_type', None) == 'video-show':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
|
|
|
|
+ elif param.get('view_type', None) == 'video-show-region':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
|
|
|
|
+ else:
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
|
|
|
|
+ df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
|
|
|
|
+
|
|
|
|
+ df['backrate1_3_initial'] = df['back_rate'] * df['back_rate2'] * df['back_rate3']
|
|
|
|
+ df['backrate1_3'] = df['backrate1_3_initial'].apply(lambda x: 0.02 if x == 0 else x)
|
|
|
|
+ df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
|
|
|
|
+
|
|
|
|
+ df['score'] = df['K2'] * df['share_rate'] * (df['back_rate'] + df['backrate1_3']) * df['log_back']
|
|
|
|
+
|
|
|
|
+ df = df.sort_values(by=['score'], ascending=False)
|
|
|
|
+ return df
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def cal_score_with_new_return(df, param):
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ df = df.fillna(0)
|
|
|
|
+ df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
|
|
|
|
+ df['back_rate'] = df['lastonehour_return_new'] / (df['lastonehour_share'] + 10)
|
|
|
|
+ df['log_back'] = (df['lastonehour_return_new'] + 1).apply(math.log)
|
|
|
|
+ if param.get('view_type', None) == 'video-show':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
|
|
|
|
+ elif param.get('view_type', None) == 'video-show-region':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
|
|
|
|
+ else:
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
|
|
|
|
+ df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
|
|
|
|
+ df['platform_return_rate'] = df['platform_return_new'] / df['lastonehour_return_new']
|
|
|
|
+
|
|
|
|
+ df['score'] = df['share_rate'] * df['back_rate'] * df['log_back'] * df['K2']
|
|
|
|
+
|
|
|
|
+ df = df.sort_values(by=['score'], ascending=False)
|
|
|
|
+ return df
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def cal_score_multiply_return_retention_with_new_return(df, param):
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ df = df.fillna(0)
|
|
|
|
+ df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
|
|
|
|
+ df['back_rate'] = df['lastonehour_return_new'] / (df['lastonehour_share'] + 10)
|
|
|
|
+ df['log_back'] = (df['lastonehour_return_new'] + 1).apply(math.log)
|
|
|
|
+
|
|
|
|
+ if param.get('view_type', None) == 'video-show':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
|
|
|
|
+ elif param.get('view_type', None) == 'video-show-region':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
|
|
|
|
+ else:
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
|
|
|
|
+ df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
|
|
|
|
+
|
|
|
|
+ df['return_retention_initial'] = (df['lasttwohour_return_now_new'] + df['lastthreehour_return_now_new']) / \
|
|
|
|
+ (df['lasttwohour_return_new'] + df['lastthreehour_return_new'] + 1)
|
|
|
|
+ df['return_retention'] = df['return_retention_initial'].apply(lambda x: 0.5 if x == 0 else x)
|
|
|
|
+
|
|
|
|
+ df['platform_return_rate'] = df['platform_return_new'] / df['lastonehour_return_new']
|
|
|
|
+
|
|
|
|
+ df['score'] = df['K2'] * df['share_rate'] * df['back_rate'] * df['log_back'] * df['return_retention']
|
|
|
|
+
|
|
|
|
+ df = df.sort_values(by=['score'], ascending=False)
|
|
|
|
+ return df
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def cal_score_with_back_view0(df, param):
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ df = df.fillna(0)
|
|
|
|
+ df['share_rate'] = (df['lastonehour_share'] + 1) / (df['lastonehour_play'] + 1000)
|
|
|
|
+ df['back_rate'] = (df['lastonehour_return'] + 1) / (df['lastonehour_share'] + 10)
|
|
|
|
+ df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
|
|
|
|
+ df['ctr'] = (df['lastonehour_play'] + 1) / (df['lastonehour_view'] + 100)
|
|
|
|
+ df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
|
|
|
|
+ df['score'] = df['share_rate'] * df['back_rate'] * df['log_back'] * df['ctr']
|
|
|
|
+ df = df.sort_values(by=['score'], ascending=False)
|
|
|
|
+ return df
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def cal_score_with_back_view1(df, param):
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ df = df.fillna(0)
|
|
|
|
+ df['back_play_rate'] = (df['lastonehour_return'] + 1) / (df['lastonehour_play'] + 1000)
|
|
|
|
+ df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
|
|
|
|
+ df['ctr'] = (df['lastonehour_play'] + 1) / (df['lastonehour_view'] + 100)
|
|
|
|
+ df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
|
|
|
|
+ df['score'] = df['back_play_rate'] * df['log_back'] * df['ctr']
|
|
|
|
+ df = df.sort_values(by=['score'], ascending=False)
|
|
|
|
+ return df
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def cal_score_with_back_rate_exponential_weighting1(df, param):
|
|
|
|
+ """
|
|
|
|
+ 计算score
|
|
|
|
+ :param df: 特征数据
|
|
|
|
+ :param param: 规则参数
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ df = df.fillna(0)
|
|
|
|
+ df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
|
|
|
|
+ df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
|
|
|
|
+ df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
|
|
|
|
+ if param.get('view_type', None) == 'video-show':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
|
|
|
|
+ elif param.get('view_type', None) == 'video-show-region':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
|
|
|
|
+ else:
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
|
|
|
|
+ df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
|
|
|
|
+
|
|
|
|
+ df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
|
|
|
|
+
|
|
|
|
+ df['score'] = df['share_rate'] * df['back_rate'] ** 2 * df['log_back'] * df['K2']
|
|
|
|
+
|
|
|
|
+ df = df.sort_values(by=['score'], ascending=False)
|
|
|
|
+ return df
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def cal_score_with_back_rate_exponential_weighting2(df, param):
|
|
|
|
+ """
|
|
|
|
+ 计算score
|
|
|
|
+ :param df: 特征数据
|
|
|
|
+ :param param: 规则参数
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ df = df.fillna(0)
|
|
|
|
+ df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
|
|
|
|
+ df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
|
|
|
|
+ df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
|
|
|
|
+ if param.get('view_type', None) == 'video-show':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
|
|
|
|
+ elif param.get('view_type', None) == 'video-show-region':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
|
|
|
|
+ else:
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
|
|
|
|
+ df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
|
|
|
|
+
|
|
|
|
+ df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
|
|
|
|
+
|
|
|
|
+ df['score'] = df['share_rate'] ** 0.5 * df['back_rate'] ** 2 * df['log_back'] * df['K2'] ** 0.5
|
|
|
|
+
|
|
|
|
+ df = df.sort_values(by=['score'], ascending=False)
|
|
|
|
+ return df
|
|
|
|
+
|
|
|
|
+def cal_score_with_back_rate_by_rank_weighting(df, param):
|
|
|
|
+ """
|
|
|
|
+ add by sunmingze 20231123
|
|
|
|
+ 计算score
|
|
|
|
+ :param df: 特征数据
|
|
|
|
+ :param param: 规则参数
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ df = df.fillna(0)
|
|
|
|
+ df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
|
|
|
|
+ df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
|
|
|
|
+ df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
|
|
|
|
+ if param.get('view_type', None) == 'video-show':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
|
|
|
|
+ elif param.get('view_type', None) == 'video-show-region':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
|
|
|
|
+ else:
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
|
|
|
|
+ df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
|
|
|
|
+
|
|
|
|
+ df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ df['rank_by_sharerate'] = df['share_rate'].rank(ascending=0, method='dense')
|
|
|
|
+ df['rank_by_backrate'] = df['back_rate'].rank(ascending=0, method='dense')
|
|
|
|
+ df['rank_by_K2'] = df['K2'].rank(ascending=0, method='dense')
|
|
|
|
+ df['rank_by_logback'] = df['log_back'].rank(ascending=0, method='dense')
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ df['score'] = 1/(df['rank_by_sharerate'] + 10) + 5/(df['rank_by_backrate'] + 10)
|
|
|
|
+ df['score'] = df['score'] + 5/(df['rank_by_logback'] + 10) + 1/(df['rank_by_K2'] + 10)
|
|
|
|
+
|
|
|
|
+ df = df.sort_values(by=['score'], ascending=False)
|
|
|
|
+ return df
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def cal_score(df, param):
|
|
|
|
+ if param.get('return_data', None) == 'share_region_return':
|
|
|
|
+ if param.get('score_func', None) == 'multiply_return_retention':
|
|
|
|
+ df = cal_score_multiply_return_retention_with_new_return(df=df, param=param)
|
|
|
|
+ else:
|
|
|
|
+ df = cal_score_with_new_return(df=df, param=param)
|
|
|
|
+ else:
|
|
|
|
+ if param.get('score_func', None) == 'add_backrate*log(return+1)':
|
|
|
|
+ df = cal_score_add_return(df=df, param=param)
|
|
|
|
+ elif param.get('score_func', None) == 'multiply_return_retention':
|
|
|
|
+ df = cal_score_multiply_return_retention(df=df, param=param)
|
|
|
|
+ elif param.get('score_func', None) == 'update_backrate':
|
|
|
|
+ df = cal_score_update_backrate(df=df, param=param)
|
|
|
|
+ elif param.get('score_func', None) == 'back_view0':
|
|
|
|
+ df = cal_score_with_back_view0(df=df, param=param)
|
|
|
|
+ elif param.get('score_func', None) == 'back_view1':
|
|
|
|
+ df = cal_score_with_back_view1(df=df, param=param)
|
|
|
|
+ elif param.get('score_func', None) == 'back_rate_exponential_weighting1':
|
|
|
|
+ df = cal_score_with_back_rate_exponential_weighting1(df=df, param=param)
|
|
|
|
+ elif param.get('score_func', None) == 'back_rate_exponential_weighting2':
|
|
|
|
+ df = cal_score_with_back_rate_exponential_weighting2(df=df, param=param)
|
|
|
|
+ elif param.get('score_func', None) == 'back_rate_rank_weighting':
|
|
|
|
+ df = cal_score_with_back_rate_by_rank_weighting(df=df, param=param)
|
|
|
|
+ else:
|
|
|
|
+ df = cal_score_initial(df=df, param=param)
|
|
|
|
+ return df
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def add_func1(initial_df, pre_h_df):
|
|
|
|
+ """当前小时级数据与前几个小时数据合并"""
|
|
|
|
+ score_list = initial_df['score'].to_list()
|
|
|
|
+ if len(score_list) > 0:
|
|
|
|
+ min_score = min(score_list)
|
|
|
|
+ else:
|
|
|
|
+ min_score = 0
|
|
|
|
+ pre_h_df = pre_h_df[pre_h_df['score'] > min_score]
|
|
|
|
+ df = pd.concat([initial_df, pre_h_df], ignore_index=True)
|
|
|
|
+
|
|
|
|
+ df['videoid'] = df['videoid'].astype(int)
|
|
|
|
+ df = df.sort_values(by=['score'], ascending=False)
|
|
|
|
+ df = df.drop_duplicates(subset=['videoid'], keep="first")
|
|
|
|
+ return df
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def add_func2(initial_df, pre_h_df):
|
|
|
|
+ """当前小时级数据与前几个小时数据合并: 当前小时存在的视频以当前小时为准,否则以高分为主"""
|
|
|
|
+ score_list = initial_df['score'].to_list()
|
|
|
|
+ if len(score_list) > 0:
|
|
|
|
+ min_score = min(score_list)
|
|
|
|
+ else:
|
|
|
|
+ min_score = 0
|
|
|
|
+ initial_video_id_list = initial_df['videoid'].to_list()
|
|
|
|
+ pre_h_df = pre_h_df[pre_h_df['score'] > min_score]
|
|
|
|
+ pre_h_df = pre_h_df[~pre_h_df['videoid'].isin(initial_video_id_list)]
|
|
|
|
+
|
|
|
|
+ df = pd.concat([initial_df, pre_h_df], ignore_index=True)
|
|
|
|
+
|
|
|
|
+ df['videoid'] = df['videoid'].astype(int)
|
|
|
|
+ df = df.sort_values(by=['score'], ascending=False)
|
|
|
|
+ df = df.drop_duplicates(subset=['videoid'], keep="first")
|
|
|
|
+ return df
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def add_videos(initial_df, now_date, rule_key, region, data_key, hour_count, top, add_func):
|
|
|
|
+ """
|
|
|
|
+ 地域小时级数据列表中增加前6h优质视频
|
|
|
|
+ :param initial_df: 地域小时级筛选结果
|
|
|
|
+ :param now_date:
|
|
|
|
+ :param data_key:
|
|
|
|
+ :param region:
|
|
|
|
+ :param rule_key:
|
|
|
|
+ :param hour_count: 前几个小时, type-int
|
|
|
|
+ :param top: type-int
|
|
|
|
+ :return: df
|
|
|
|
+ """
|
|
|
|
+ redis_helper = RedisHelper()
|
|
|
|
+ pre_h_data = []
|
|
|
|
+ for i in range(1, hour_count+1):
|
|
|
|
+ pre_date = now_date - datetime.timedelta(hours=i)
|
|
|
|
+ pre_h = pre_date.hour
|
|
|
|
+ pre_h_recall_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H}{region}:{data_key}:{rule_key}:" \
|
|
|
|
+ f"{datetime.datetime.strftime(pre_date, '%Y%m%d')}:{pre_h}"
|
|
|
|
+ pre_h_top_data = redis_helper.get_data_zset_with_index(key_name=pre_h_recall_key_name,
|
|
|
|
+ start=0, end=top-1,
|
|
|
|
+ desc=True, with_scores=True)
|
|
|
|
+ if pre_h_top_data is None:
|
|
|
|
+ continue
|
|
|
|
+ pre_h_data.extend(pre_h_top_data)
|
|
|
|
+ pre_h_df = pd.DataFrame(data=pre_h_data, columns=['videoid', 'score'])
|
|
|
|
+ if add_func == 'func2':
|
|
|
|
+ df = add_func2(initial_df=initial_df, pre_h_df=pre_h_df)
|
|
|
|
+ else:
|
|
|
|
+ df = add_func1(initial_df=initial_df, pre_h_df=pre_h_df)
|
|
|
|
+ return df
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def video_rank(df, now_date, now_h, rule_key, param, region, data_key, rule_rank_h_flag,
|
|
|
|
+ add_videos_with_pre_h=False, hour_count=0):
|
|
|
|
+ """
|
|
|
|
+ 获取符合进入召回源条件的视频,与每日更新的rov模型结果视频列表进行合并
|
|
|
|
+ :param df:
|
|
|
|
+ :param now_date:
|
|
|
|
+ :param now_h:
|
|
|
|
+ :param rule_key: 小时级数据进入条件
|
|
|
|
+ :param param: 小时级数据进入条件参数
|
|
|
|
+ :param region: 所属地域
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+ redis_helper = RedisHelper()
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ return_count = param.get('return_count', 1)
|
|
|
|
+ score_value = param.get('score_rule', 0)
|
|
|
|
+ platform_return_rate = param.get('platform_return_rate', 0)
|
|
|
|
+ h_recall_df = df[(df['lastonehour_return'] >= return_count) & (df['score'] >= score_value)
|
|
|
|
+ & (df['platform_return_rate'] >= platform_return_rate)]
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ h_recall_df = h_recall_df.sort_values(by=['score'], ascending=False)
|
|
|
|
+ h_recall_df = h_recall_df.drop_duplicates(subset=['videoid'], keep='first')
|
|
|
|
+ h_recall_df['videoid'] = h_recall_df['videoid'].astype(int)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ if add_videos_with_pre_h is True:
|
|
|
|
+ add_func = param.get('add_func', None)
|
|
|
|
+ h_recall_df = add_videos(initial_df=h_recall_df, now_date=now_date, rule_key=rule_key,
|
|
|
|
+ region=region, data_key=data_key, hour_count=hour_count, top=10, add_func=add_func)
|
|
|
|
+
|
|
|
|
+ h_recall_videos = h_recall_df['videoid'].to_list()
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ if data_key in ['data7', ]:
|
|
|
|
+ filtered_videos = filter_video_status_app(h_recall_videos)
|
|
|
|
+ else:
|
|
|
|
+ filtered_videos = filter_video_status(h_recall_videos)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ shield_config = param.get('shield_config', config_.SHIELD_CONFIG)
|
|
|
|
+ shield_key_name_list = shield_config.get(region, None)
|
|
|
|
+ if shield_key_name_list is not None:
|
|
|
|
+ filtered_videos = filter_shield_video(video_ids=filtered_videos, shield_key_name_list=shield_key_name_list)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ political_filter = param.get('political_filter', None)
|
|
|
|
+ if political_filter is True:
|
|
|
|
+
|
|
|
|
+ filtered_videos = filter_political_videos(video_ids=filtered_videos)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ h_video_ids = []
|
|
|
|
+ by_30day_rule_key = param.get('30day_rule_key', None)
|
|
|
|
+ if by_30day_rule_key is not None:
|
|
|
|
+
|
|
|
|
+ h_video_ids = get_day_30day_videos(now_date=now_date, data_key=data_key, rule_key=by_30day_rule_key)
|
|
|
|
+
|
|
|
|
+ if h_video_ids is not None:
|
|
|
|
+ filtered_videos = [video_id for video_id in filtered_videos if int(video_id) not in h_video_ids]
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ h_recall_result = {}
|
|
|
|
+ for video_id in filtered_videos:
|
|
|
|
+ score = h_recall_df[h_recall_df['videoid'] == video_id]['score']
|
|
|
|
+
|
|
|
|
+ h_recall_result[int(video_id)] = float(score)
|
|
|
|
+ h_video_ids.append(int(video_id))
|
|
|
|
+ h_recall_key_name = \
|
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H}{region}:{data_key}:{rule_key}:" \
|
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
|
+ if len(h_recall_result) > 0:
|
|
|
|
+
|
|
|
|
+ redis_helper.add_data_with_zset(key_name=h_recall_key_name, data=h_recall_result, expire_time=2 * 24 * 3600)
|
|
|
|
+
|
|
|
|
+ update_limit_video_score(initial_videos=h_recall_result, key_name=h_recall_key_name)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ h_rule_key = param.get('h_rule_key', None)
|
|
|
|
+ region_24h_rule_key = param.get('region_24h_rule_key', 'rule1')
|
|
|
|
+ by_24h_rule_key = param.get('24h_rule_key', None)
|
|
|
|
+ by_48h_rule_key = param.get('48h_rule_key', None)
|
|
|
|
+ dup_remove = param.get('dup_remove', True)
|
|
|
|
+
|
|
|
|
+ dup_to_redis(h_video_ids=h_video_ids, now_date=now_date, now_h=now_h, rule_key=rule_key, h_rule_key=h_rule_key,
|
|
|
|
+ region_24h_rule_key=region_24h_rule_key, by_24h_rule_key=by_24h_rule_key,
|
|
|
|
+ by_48h_rule_key=by_48h_rule_key, region=region, data_key=data_key,
|
|
|
|
+ rule_rank_h_flag=rule_rank_h_flag, political_filter=political_filter,
|
|
|
|
+ shield_config=shield_config, dup_remove=dup_remove)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def dup_data(h_video_ids, initial_key_name, dup_key_name, region, political_filter, shield_config, dup_remove):
|
|
|
|
+ redis_helper = RedisHelper()
|
|
|
|
+ if redis_helper.key_exists(key_name=initial_key_name):
|
|
|
|
+ initial_data = redis_helper.get_all_data_from_zset(key_name=initial_key_name, with_scores=True)
|
|
|
|
+
|
|
|
|
+ initial_video_ids = [int(video_id) for video_id, _ in initial_data]
|
|
|
|
+ shield_key_name_list = shield_config.get(region, None)
|
|
|
|
+ if shield_key_name_list is not None:
|
|
|
|
+ initial_video_ids = filter_shield_video(video_ids=initial_video_ids,
|
|
|
|
+ shield_key_name_list=shield_key_name_list)
|
|
|
|
+
|
|
|
|
+ if political_filter is True:
|
|
|
|
+ initial_video_ids = filter_political_videos(video_ids=initial_video_ids)
|
|
|
|
+
|
|
|
|
+ dup_data = {}
|
|
|
|
+
|
|
|
|
+ if dup_remove is True:
|
|
|
|
+ for video_id, score in initial_data:
|
|
|
|
+ if int(video_id) not in h_video_ids and int(video_id) in initial_video_ids:
|
|
|
|
+ dup_data[int(video_id)] = score
|
|
|
|
+ h_video_ids.append(int(video_id))
|
|
|
|
+ else:
|
|
|
|
+ for video_id, score in initial_data:
|
|
|
|
+ if int(video_id) in initial_video_ids:
|
|
|
|
+ dup_data[int(video_id)] = score
|
|
|
|
+
|
|
|
|
+ if len(dup_data) > 0:
|
|
|
|
+ redis_helper.add_data_with_zset(key_name=dup_key_name, data=dup_data, expire_time=2 * 24 * 3600)
|
|
|
|
+
|
|
|
|
+ update_limit_video_score(initial_videos=dup_data, key_name=dup_key_name)
|
|
|
|
+ return h_video_ids
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def dup_to_redis(h_video_ids, now_date, now_h, rule_key, h_rule_key, region_24h_rule_key, by_24h_rule_key, by_48h_rule_key,
|
|
|
|
+ region, data_key, rule_rank_h_flag, political_filter, shield_config, dup_remove):
|
|
|
|
+ """将地域分组小时级数据与其他召回视频池去重,存入对应的redis"""
|
|
|
|
+
|
|
|
|
+ if h_rule_key is not None:
|
|
|
|
+ h_key_name = \
|
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_BY_H_H}{data_key}:{h_rule_key}:" \
|
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
|
+ h_dup_key_name = \
|
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_DUP_H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
|
+ h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_key_name,
|
|
|
|
+ dup_key_name=h_dup_key_name, region=region, political_filter=political_filter,
|
|
|
|
+ shield_config=shield_config, dup_remove=dup_remove)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ region_24h_key_name = \
|
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_24H}{region}:{data_key}:{region_24h_rule_key}:" \
|
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
|
+ region_24h_dup_key_name = \
|
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
|
+ h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=region_24h_key_name,
|
|
|
|
+ dup_key_name=region_24h_dup_key_name, region=region, political_filter=political_filter,
|
|
|
|
+ shield_config=shield_config, dup_remove=dup_remove)
|
|
|
|
+
|
|
|
|
+ if rule_rank_h_flag == '48h':
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ h_48h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_48H}{data_key}:{by_48h_rule_key}:" \
|
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
|
+ h_48h_dup_key_name = \
|
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_48H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
|
+ h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_48h_key_name,
|
|
|
|
+ dup_key_name=h_48h_dup_key_name, region=region, political_filter=political_filter,
|
|
|
|
+ shield_config=shield_config, dup_remove=dup_remove)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ if by_48h_rule_key == 'rule1':
|
|
|
|
+ other_h_48h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_48H_OTHER}{data_key}:" \
|
|
|
|
+ f"{by_48h_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
|
+ other_h_48h_dup_key_name = \
|
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_48H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
|
+ h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=other_h_48h_key_name,
|
|
|
|
+ dup_key_name=other_h_48h_dup_key_name, region=region,
|
|
|
|
+ political_filter=political_filter, shield_config=shield_config,
|
|
|
|
+ dup_remove=dup_remove)
|
|
|
|
+
|
|
|
|
+ else:
|
|
|
|
+
|
|
|
|
+ h_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H}{data_key}:{by_24h_rule_key}:" \
|
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
|
+ h_24h_dup_key_name = \
|
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
|
+ h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_24h_key_name,
|
|
|
|
+ dup_key_name=h_24h_dup_key_name, region=region, political_filter=political_filter,
|
|
|
|
+ shield_config=shield_config, dup_remove=dup_remove)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ other_h_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H_OTHER}{data_key}:" \
|
|
|
|
+ f"{by_24h_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
|
+ other_h_24h_dup_key_name = \
|
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
|
+ h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=other_h_24h_key_name,
|
|
|
|
+ dup_key_name=other_h_24h_dup_key_name, region=region, political_filter=political_filter,
|
|
|
|
+ shield_config=shield_config, dup_remove=dup_remove)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def merge_df(df_left, df_right):
|
|
|
|
+ """
|
|
|
|
+ df按照videoid, code 合并,对应特征求和
|
|
|
|
+ :param df_left:
|
|
|
|
+ :param df_right:
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+ df_merged = pd.merge(df_left, df_right, on=['videoid', 'code'], how='outer', suffixes=['_x', '_y'])
|
|
|
|
+ df_merged.fillna(0, inplace=True)
|
|
|
|
+ feature_list = ['videoid', 'code']
|
|
|
|
+ for feature in features:
|
|
|
|
+ if feature in ['apptype', 'videoid', 'code']:
|
|
|
|
+ continue
|
|
|
|
+ df_merged[feature] = df_merged[f'{feature}_x'] + df_merged[f'{feature}_y']
|
|
|
|
+ feature_list.append(feature)
|
|
|
|
+ return df_merged[feature_list]
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def merge_df_with_score(df_left, df_right):
|
|
|
|
+ """
|
|
|
|
+ df 按照[videoid, code]合并,平台回流人数、回流人数、分数 分别求和
|
|
|
|
+ :param df_left:
|
|
|
|
+ :param df_right:
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+ df_merged = pd.merge(df_left, df_right, on=['videoid', 'code'], how='outer', suffixes=['_x', '_y'])
|
|
|
|
+ df_merged.fillna(0, inplace=True)
|
|
|
|
+ feature_list = ['videoid', 'code', 'lastonehour_return', 'platform_return', 'score']
|
|
|
|
+ for feature in feature_list[2:]:
|
|
|
|
+ df_merged[feature] = df_merged[f'{feature}_x'] + df_merged[f'{feature}_y']
|
|
|
|
+ return df_merged[feature_list]
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def process_with_region(region, df_merged, data_key, rule_key, rule_param, now_date, now_h,
|
|
|
|
+ rule_rank_h_flag, add_videos_with_pre_h, hour_count):
|
|
|
|
+ log_.info(f"region = {region} start...")
|
|
|
|
+
|
|
|
|
+ region_df = df_merged[df_merged['code'] == region]
|
|
|
|
+ log_.info(f'region = {region}, region_df count = {len(region_df)}')
|
|
|
|
+ score_df = cal_score(df=region_df, param=rule_param)
|
|
|
|
+ video_rank(df=score_df, now_date=now_date, now_h=now_h, rule_key=rule_key, param=rule_param,
|
|
|
|
+ region=region, data_key=data_key, rule_rank_h_flag=rule_rank_h_flag,
|
|
|
|
+ add_videos_with_pre_h=add_videos_with_pre_h, hour_count=hour_count)
|
|
|
|
+ log_.info(f"region = {region} end!")
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def process_with_region2(region, df_merged, data_key, rule_key, rule_param, now_date, now_h,
|
|
|
|
+ rule_rank_h_flag, add_videos_with_pre_h, hour_count):
|
|
|
|
+ log_.info(f"region = {region} start...")
|
|
|
|
+ region_score_df = df_merged[df_merged['code'] == region]
|
|
|
|
+ log_.info(f'region = {region}, region_score_df count = {len(region_score_df)}')
|
|
|
|
+ video_rank(df=region_score_df, now_date=now_date, now_h=now_h, region=region,
|
|
|
|
+ rule_key=rule_key, param=rule_param, data_key=data_key, rule_rank_h_flag=rule_rank_h_flag,
|
|
|
|
+ add_videos_with_pre_h=add_videos_with_pre_h, hour_count=hour_count)
|
|
|
|
+ log_.info(f"region = {region} end!")
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def process_with_app_type(app_type, params, region_code_list, feature_df, now_date, now_h, rule_rank_h_flag):
|
|
|
|
+ log_.info(f"app_type = {app_type} start...")
|
|
|
|
+ data_params_item = params.get('data_params')
|
|
|
|
+ rule_params_item = params.get('rule_params')
|
|
|
|
+ task_list = []
|
|
|
|
+ for param in params.get('params_list'):
|
|
|
|
+ data_key = param.get('data')
|
|
|
|
+ data_param = data_params_item.get(data_key)
|
|
|
|
+ log_.info(f"data_key = {data_key}, data_param = {data_param}")
|
|
|
|
+ df_list = [feature_df[feature_df['apptype'] == apptype] for apptype in data_param]
|
|
|
|
+ df_merged = reduce(merge_df, df_list)
|
|
|
|
+
|
|
|
|
+ rule_key = param.get('rule')
|
|
|
|
+ rule_param = rule_params_item.get(rule_key)
|
|
|
|
+ log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
|
|
|
|
+ task_list.extend(
|
|
|
|
+ [
|
|
|
|
+ gevent.spawn(process_with_region, region, df_merged, app_type, data_key, rule_key, rule_param,
|
|
|
|
+ now_date, now_h, rule_rank_h_flag)
|
|
|
|
+ for region in region_code_list
|
|
|
|
+ ]
|
|
|
|
+ )
|
|
|
|
+ gevent.joinall(task_list)
|
|
|
|
+ log_.info(f"app_type = {app_type} end!")
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def copy_data_for_city(region, city_code, data_key, rule_key, now_date, now_h, shield_config):
|
|
|
|
+ """copy 对应数据到城市对应redis,并做相应屏蔽视频过滤"""
|
|
|
|
+ log_.info(f"city_code = {city_code} start ...")
|
|
|
|
+ redis_helper = RedisHelper()
|
|
|
|
+ key_prefix_list = [
|
|
|
|
+ config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H,
|
|
|
|
+ config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H,
|
|
|
|
+ config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H,
|
|
|
|
+ config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H,
|
|
|
|
+ config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H,
|
|
|
|
+ ]
|
|
|
|
+ for key_prefix in key_prefix_list:
|
|
|
|
+ region_key = f"{key_prefix}{region}:{data_key}:{rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
|
+ city_key = f"{key_prefix}{city_code}:{data_key}:{rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
|
+ if not redis_helper.key_exists(key_name=region_key):
|
|
|
|
+ continue
|
|
|
|
+ region_data = redis_helper.get_all_data_from_zset(key_name=region_key, with_scores=True)
|
|
|
|
+ if not region_data:
|
|
|
|
+ continue
|
|
|
|
+
|
|
|
|
+ region_video_ids = [int(video_id) for video_id, _ in region_data]
|
|
|
|
+ shield_key_name_list = shield_config.get(city_code, None)
|
|
|
|
+
|
|
|
|
+ if shield_key_name_list is not None:
|
|
|
|
+ filtered_video_ids = filter_shield_video(video_ids=region_video_ids,
|
|
|
|
+ shield_key_name_list=shield_key_name_list)
|
|
|
|
+ else:
|
|
|
|
+ filtered_video_ids = region_video_ids
|
|
|
|
+ city_data = {}
|
|
|
|
+ for video_id, score in region_data:
|
|
|
|
+ if int(video_id) in filtered_video_ids:
|
|
|
|
+ city_data[int(video_id)] = score
|
|
|
|
+
|
|
|
|
+ if len(city_data) > 0:
|
|
|
|
+ redis_helper.add_data_with_zset(key_name=city_key, data=city_data, expire_time=2 * 24 * 3600)
|
|
|
|
+
|
|
|
|
+ log_.info(f"city_code = {city_code} end!")
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def process_with_param(param, data_params_item, rule_params_item, region_code_list, feature_df, now_date, now_h, rule_rank_h_flag):
|
|
|
|
+ log_.info(f"param = {param} start...")
|
|
|
|
+
|
|
|
|
+ data_key = param.get('data')
|
|
|
|
+ data_param = data_params_item.get(data_key)
|
|
|
|
+ log_.info(f"data_key = {data_key}, data_param = {data_param}")
|
|
|
|
+ rule_key = param.get('rule')
|
|
|
|
+ rule_param = rule_params_item.get(rule_key)
|
|
|
|
+ log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
|
|
|
|
+ merge_func = rule_param.get('merge_func', None)
|
|
|
|
+
|
|
|
|
+ add_videos_with_pre_h = rule_param.get('add_videos_with_pre_h', False)
|
|
|
|
+ hour_count = rule_param.get('hour_count', 0)
|
|
|
|
+
|
|
|
|
+ if merge_func == 2:
|
|
|
|
+ score_df_list = []
|
|
|
|
+ for apptype, weight in data_param.items():
|
|
|
|
+ df = feature_df[feature_df['apptype'] == apptype]
|
|
|
|
+
|
|
|
|
+ score_df = cal_score(df=df, param=rule_param)
|
|
|
|
+ score_df['score'] = score_df['score'] * weight
|
|
|
|
+ score_df_list.append(score_df)
|
|
|
|
+
|
|
|
|
+ df_merged = reduce(merge_df_with_score, score_df_list)
|
|
|
|
+
|
|
|
|
+ df_merged['platform_return_rate'] = df_merged['platform_return'] / df_merged['lastonehour_return']
|
|
|
|
+ task_list = [
|
|
|
|
+ gevent.spawn(process_with_region2,
|
|
|
|
+ region, df_merged, data_key, rule_key, rule_param, now_date, now_h, rule_rank_h_flag,
|
|
|
|
+ add_videos_with_pre_h, hour_count)
|
|
|
|
+ for region in region_code_list
|
|
|
|
+ ]
|
|
|
|
+ else:
|
|
|
|
+ df_list = [feature_df[feature_df['apptype'] == apptype] for apptype in data_param]
|
|
|
|
+ df_merged = reduce(merge_df, df_list)
|
|
|
|
+ task_list = [
|
|
|
|
+ gevent.spawn(process_with_region,
|
|
|
|
+ region, df_merged, data_key, rule_key, rule_param, now_date, now_h, rule_rank_h_flag,
|
|
|
|
+ add_videos_with_pre_h, hour_count)
|
|
|
|
+ for region in region_code_list
|
|
|
|
+ ]
|
|
|
|
+
|
|
|
|
+ gevent.joinall(task_list)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ shield_config = rule_param.get('shield_config', config_.SHIELD_CONFIG)
|
|
|
|
+ for region, city_list in config_.REGION_CITY_MAPPING.items():
|
|
|
|
+ t = [
|
|
|
|
+ gevent.spawn(
|
|
|
|
+ copy_data_for_city,
|
|
|
|
+ region, city_code, data_key, rule_key, now_date, now_h, shield_config
|
|
|
|
+ )
|
|
|
|
+ for city_code in city_list
|
|
|
|
+ ]
|
|
|
|
+ gevent.joinall(t)
|
|
|
|
+
|
|
|
|
+ log_.info(f"param = {param} end!")
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def rank_by_h(project, table, now_date, now_h, rule_params, region_code_list, rule_rank_h_flag):
|
|
|
|
+
|
|
|
|
+ feature_df = get_feature_data(project=project, table=table, now_date=now_date)
|
|
|
|
+ feature_df['apptype'] = feature_df['apptype'].astype(int)
|
|
|
|
+ data_params_item = rule_params.get('data_params')
|
|
|
|
+ rule_params_item = rule_params.get('rule_params')
|
|
|
|
+ params_list = rule_params.get('params_list')
|
|
|
|
+ pool = multiprocessing.Pool(processes=len(params_list))
|
|
|
|
+ for param in params_list:
|
|
|
|
+ pool.apply_async(
|
|
|
|
+ func=process_with_param,
|
|
|
|
+ args=(param, data_params_item, rule_params_item, region_code_list, feature_df, now_date, now_h, rule_rank_h_flag)
|
|
|
|
+ )
|
|
|
|
+ pool.close()
|
|
|
|
+ pool.join()
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ """
|
|
|
|
+ for app_type, params in rule_params.items():
|
|
|
|
+ log_.info(f"app_type = {app_type} start...")
|
|
|
|
+ data_params_item = params.get('data_params')
|
|
|
|
+ rule_params_item = params.get('rule_params')
|
|
|
|
+ for param in params.get('params_list'):
|
|
|
|
+ log_.info(f"param = {param} start...")
|
|
|
|
+ data_key = param.get('data')
|
|
|
|
+ data_param = data_params_item.get(data_key)
|
|
|
|
+ log_.info(f"data_key = {data_key}, data_param = {data_param}")
|
|
|
|
+ df_list = [feature_df[feature_df['apptype'] == apptype] for apptype in data_param]
|
|
|
|
+ df_merged = reduce(merge_df, df_list)
|
|
|
|
+ rule_key = param.get('rule')
|
|
|
|
+ rule_param = rule_params_item.get(rule_key)
|
|
|
|
+ log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
|
|
|
|
+
|
|
|
|
+ task_list = []
|
|
|
|
+ for region in region_code_list:
|
|
|
|
+ t = Thread(target=process_with_region,
|
|
|
|
+ args=(region, df_merged, app_type, data_key, rule_key, rule_param, now_date, now_h)
|
|
|
|
+ )
|
|
|
|
+ t.start()
|
|
|
|
+ task_list.append(t)
|
|
|
|
+ for t in task_list:
|
|
|
|
+ t.join()
|
|
|
|
+ log_.info(f"param = {param} end!")
|
|
|
|
+ log_.info(f"app_type = {app_type} end!")
|
|
|
|
+ """
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def h_bottom_process(param, rule_params_item, region_code_list, key_prefix, redis_dt, redis_h,
|
|
|
|
+ now_date, now_h, rule_rank_h_flag):
|
|
|
|
+ redis_helper = RedisHelper()
|
|
|
|
+ data_key = param.get('data')
|
|
|
|
+ rule_key = param.get('rule')
|
|
|
|
+ rule_param = rule_params_item.get(rule_key)
|
|
|
|
+ log_.info(f"data_key = {data_key}, rule_key = {rule_key}, rule_param = {rule_param}")
|
|
|
|
+ h_rule_key = rule_param.get('h_rule_key', None)
|
|
|
|
+ region_24h_rule_key = rule_param.get('region_24h_rule_key', 'rule1')
|
|
|
|
+ by_24h_rule_key = rule_param.get('24h_rule_key', None)
|
|
|
|
+ by_48h_rule_key = rule_param.get('48h_rule_key', None)
|
|
|
|
+
|
|
|
|
+ political_filter = param.get('political_filter', None)
|
|
|
|
+
|
|
|
|
+ shield_config = param.get('shield_config', config_.SHIELD_CONFIG)
|
|
|
|
+ dup_remove = param.get('dup_remove', True)
|
|
|
|
+ for region in region_code_list:
|
|
|
|
+ log_.info(f"region = {region}")
|
|
|
|
+ key_name = f"{key_prefix}{region}:{data_key}:{rule_key}:{redis_dt}:{redis_h}"
|
|
|
|
+ initial_data = redis_helper.get_all_data_from_zset(key_name=key_name, with_scores=True)
|
|
|
|
+ if initial_data is None:
|
|
|
|
+ initial_data = []
|
|
|
|
+ final_data = dict()
|
|
|
|
+ h_video_ids = []
|
|
|
|
+ for video_id, score in initial_data:
|
|
|
|
+ final_data[video_id] = score
|
|
|
|
+ h_video_ids.append(int(video_id))
|
|
|
|
+
|
|
|
|
+ final_key_name = \
|
|
|
|
+ f"{key_prefix}{region}:{data_key}:{rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
|
+ if len(final_data) > 0:
|
|
|
|
+ redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=2 * 24 * 3600)
|
|
|
|
+
|
|
|
|
+ dup_to_redis(h_video_ids=h_video_ids, now_date=now_date, now_h=now_h, rule_key=rule_key, h_rule_key=h_rule_key,
|
|
|
|
+ region_24h_rule_key=region_24h_rule_key, region=region,
|
|
|
|
+ data_key=data_key, by_24h_rule_key=by_24h_rule_key,
|
|
|
|
+ by_48h_rule_key=by_48h_rule_key, rule_rank_h_flag=rule_rank_h_flag,
|
|
|
|
+ political_filter=political_filter, shield_config=shield_config, dup_remove=dup_remove)
|
|
|
|
+
|
|
|
|
+ for region, city_list in config_.REGION_CITY_MAPPING.items():
|
|
|
|
+ t = [
|
|
|
|
+ gevent.spawn(
|
|
|
|
+ copy_data_for_city,
|
|
|
|
+ region, city_code, data_key, rule_key, now_date, now_h, shield_config
|
|
|
|
+ )
|
|
|
|
+ for city_code in city_list
|
|
|
|
+ ]
|
|
|
|
+ gevent.joinall(t)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def h_rank_bottom(now_date, now_h, rule_params, region_code_list, rule_rank_h_flag):
|
|
|
|
+ """未按时更新数据,用上一小时结果作为当前小时的数据"""
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ if now_h == 0:
|
|
|
|
+ redis_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
|
|
|
|
+ redis_h = 23
|
|
|
|
+ else:
|
|
|
|
+ redis_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
|
|
|
|
+ redis_h = now_h - 1
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ key_prefix = config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H
|
|
|
|
+ rule_params_item = rule_params.get('rule_params')
|
|
|
|
+ params_list = rule_params.get('params_list')
|
|
|
|
+ pool = multiprocessing.Pool(processes=len(params_list))
|
|
|
|
+ for param in params_list:
|
|
|
|
+ pool.apply_async(
|
|
|
|
+ func=h_bottom_process,
|
|
|
|
+ args=(param, rule_params_item, region_code_list, key_prefix, redis_dt, redis_h, now_date, now_h, rule_rank_h_flag)
|
|
|
|
+ )
|
|
|
|
+ pool.close()
|
|
|
|
+ pool.join()
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def h_timer_check():
|
|
|
|
+ try:
|
|
|
|
+ rule_rank_h_flag = sys.argv[1]
|
|
|
|
+
|
|
|
|
+ if rule_rank_h_flag == '48h':
|
|
|
|
+ rule_params = config_.RULE_PARAMS_REGION_APP_TYPE_48H
|
|
|
|
+ else:
|
|
|
|
+ rule_params = RULE_PARAMS
|
|
|
|
+ project = config_.PROJECT_REGION_APP_TYPE
|
|
|
|
+ table = config_.TABLE_REGION_APP_TYPE
|
|
|
|
+ region_code_list = [code for region, code in region_code.items()]
|
|
|
|
+ now_date = datetime.datetime.today()
|
|
|
|
+ log_.info(f"now_date: {datetime.datetime.strftime(now_date, '%Y%m%d%H')}, rule_rank_h_flag: {rule_rank_h_flag}")
|
|
|
|
+ now_h = datetime.datetime.now().hour
|
|
|
|
+ now_min = datetime.datetime.now().minute
|
|
|
|
+ if now_h == 0:
|
|
|
|
+ h_rank_bottom(now_date=now_date, now_h=now_h, rule_params=rule_params, region_code_list=region_code_list,
|
|
|
|
+ rule_rank_h_flag=rule_rank_h_flag)
|
|
|
|
+ log_.info(f"region_h_data end!")
|
|
|
|
+ return
|
|
|
|
+
|
|
|
|
+ h_data_count = h_data_check(project=project, table=table, now_date=now_date)
|
|
|
|
+ if h_data_count > 0:
|
|
|
|
+ log_.info(f'region_h_data_count = {h_data_count}')
|
|
|
|
+
|
|
|
|
+ rank_by_h(now_date=now_date, now_h=now_h, rule_params=rule_params,
|
|
|
|
+ project=project, table=table, region_code_list=region_code_list, rule_rank_h_flag=rule_rank_h_flag)
|
|
|
|
+ log_.info(f"region_h_data end!")
|
|
|
|
+ elif now_min > 40:
|
|
|
|
+ log_.info('h_recall data is None, use bottom data!')
|
|
|
|
+ h_rank_bottom(now_date=now_date, now_h=now_h, rule_params=rule_params, region_code_list=region_code_list,
|
|
|
|
+ rule_rank_h_flag=rule_rank_h_flag)
|
|
|
|
+ log_.info(f"region_h_data end!")
|
|
|
|
+ else:
|
|
|
|
+
|
|
|
|
+ Timer(60, h_timer_check).start()
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ except Exception as e:
|
|
|
|
+ log_.error(f"地域分组小时级数据更新失败, exception: {e}, traceback: {traceback.format_exc()}")
|
|
|
|
+ send_msg_to_feishu(
|
|
|
|
+ webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
|
|
|
|
+ key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
|
|
|
|
+ msg_text=f"rov-offline{config_.ENV_TEXT} - 地域分组小时级数据更新失败\n"
|
|
|
|
+ f"exception: {e}\n"
|
|
|
|
+ f"traceback: {traceback.format_exc()}"
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+if __name__ == '__main__':
|
|
|
|
+ log_.info(f"region_h_data start...")
|
|
|
|
+ h_timer_check()
|