|
@@ -336,9 +336,10 @@ def rank_for_layer1(run_dt, run_hour, gh):
|
|
|
|
|
|
|
|
|
df['score'] = df['ros']
|
|
|
-
|
|
|
- sampled_df = df.groupby('category1').apply(lambda x: x.nlargest(SEND_N, 'score')).reset_index(drop=True)
|
|
|
-
|
|
|
+
|
|
|
+ sampled_df = df.groupby('category1').apply(
|
|
|
+ lambda x: x.sample(n=SEND_N, weights=x['score'], replace=False)).reset_index(drop=True)
|
|
|
+
|
|
|
sampled_df['sort'] = sampled_df.groupby('category1')['score'].rank(method='first', ascending=False).astype(int)
|
|
|
|
|
|
sampled_df = sampled_df.sort_values(by=['category1', 'score'], ascending=[True, False]).reset_index(drop=True)
|
|
@@ -524,10 +525,10 @@ def build_and_transfer_data(run_dt, run_hour, project, **kwargs):
|
|
|
|
|
|
|
|
|
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
+ data_to_insert = [tuple(row) for row in final_df.itertuples(index=False)]
|
|
|
+ data_columns = list(final_df.columns)
|
|
|
+ mysql = MysqlHelper(CONFIG.MYSQL_CRAWLER_INFO)
|
|
|
+ mysql.batch_insert(RDS_RANK_RESULT_TABLE, data_to_insert, data_columns)
|
|
|
|
|
|
|
|
|
def main_loop():
|