|
@@ -336,9 +336,10 @@ def rank_for_layer1(run_dt, run_hour, gh):
|
|
|
|
|
|
# TODO: 修改权重计算策略
|
|
|
df['score'] = df['ros']
|
|
|
- # 处理每个分类 指定要保留的每个分类的得分最高数量SEND_N
|
|
|
- sampled_df = df.groupby('category1').apply(lambda x: x.nlargest(SEND_N, 'score')).reset_index(drop=True)
|
|
|
- # 添加'sort'列
|
|
|
+ # 按照 category1 分类后进行加权随机抽样
|
|
|
+ sampled_df = df.groupby('category1').apply(
|
|
|
+ lambda x: x.sample(n=SEND_N, weights=x['score'], replace=False)).reset_index(drop=True)
|
|
|
+ # 添加 'sort' 列
|
|
|
sampled_df['sort'] = sampled_df.groupby('category1')['score'].rank(method='first', ascending=False).astype(int)
|
|
|
# 按得分排序
|
|
|
sampled_df = sampled_df.sort_values(by=['category1', 'score'], ascending=[True, False]).reset_index(drop=True)
|
|
@@ -524,10 +525,10 @@ def build_and_transfer_data(run_dt, run_hour, project, **kwargs):
|
|
|
# writer.write(list(final_df.itertuples(index=False)))
|
|
|
|
|
|
# sync to MySQL
|
|
|
- # data_to_insert = [tuple(row) for row in final_df.itertuples(index=False)]
|
|
|
- # data_columns = list(final_df.columns)
|
|
|
- # mysql = MysqlHelper(CONFIG.MYSQL_CRAWLER_INFO)
|
|
|
- # mysql.batch_insert(RDS_RANK_RESULT_TABLE, data_to_insert, data_columns)
|
|
|
+ data_to_insert = [tuple(row) for row in final_df.itertuples(index=False)]
|
|
|
+ data_columns = list(final_df.columns)
|
|
|
+ mysql = MysqlHelper(CONFIG.MYSQL_CRAWLER_INFO)
|
|
|
+ mysql.batch_insert(RDS_RANK_RESULT_TABLE, data_to_insert, data_columns)
|
|
|
|
|
|
|
|
|
def main_loop():
|