소스 검색

修改按权重随机分配

xueyiming 6 달 전
부모
커밋
d4198243ec
1개의 변경된 파일8개의 추가작업 그리고 7개의 파일을 삭제
  1. 8 7
      alg_growth_3rd_gh_reply_video_v1.py

+ 8 - 7
alg_growth_3rd_gh_reply_video_v1.py

@@ -336,9 +336,10 @@ def rank_for_layer1(run_dt, run_hour, gh):
 
     # TODO: 修改权重计算策略
     df['score'] = df['ros']
-    # 处理每个分类  指定要保留的每个分类的得分最高数量SEND_N
-    sampled_df = df.groupby('category1').apply(lambda x: x.nlargest(SEND_N, 'score')).reset_index(drop=True)
-    # 添加'sort'列
+    # 按照 category1 分类后进行加权随机抽样
+    sampled_df = df.groupby('category1').apply(
+        lambda x: x.sample(n=SEND_N, weights=x['score'], replace=False)).reset_index(drop=True)
+    # 添加 'sort' 列
     sampled_df['sort'] = sampled_df.groupby('category1')['score'].rank(method='first', ascending=False).astype(int)
     # 按得分排序
     sampled_df = sampled_df.sort_values(by=['category1', 'score'], ascending=[True, False]).reset_index(drop=True)
@@ -524,10 +525,10 @@ def build_and_transfer_data(run_dt, run_hour, project, **kwargs):
     #     writer.write(list(final_df.itertuples(index=False)))
 
     # sync to MySQL
-    # data_to_insert = [tuple(row) for row in final_df.itertuples(index=False)]
-    # data_columns = list(final_df.columns)
-    # mysql = MysqlHelper(CONFIG.MYSQL_CRAWLER_INFO)
-    # mysql.batch_insert(RDS_RANK_RESULT_TABLE, data_to_insert, data_columns)
+    data_to_insert = [tuple(row) for row in final_df.itertuples(index=False)]
+    data_columns = list(final_df.columns)
+    mysql = MysqlHelper(CONFIG.MYSQL_CRAWLER_INFO)
+    mysql.batch_insert(RDS_RANK_RESULT_TABLE, data_to_insert, data_columns)
 
 
 def main_loop():