zhaohaipeng 9 hónapja
szülő
commit
50df15bbbc
1 módosított fájl, 7 hozzáadás és 14 törlés
  1. 7 14
      XGB/vov_xgboost_train.py

+ 7 - 14
XGB/vov_xgboost_train.py

@@ -32,20 +32,13 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(level
 
 def get_partition_df(table, dt):
     logger.info(f"开始下载: {table} -- {dt} 的数据")
-    df = pd.DataFrame()
-    try:
-        table_info = odps_client.get_table(table)
-        col_names = [col.name for col in table_info.table_schema.columns]
-        download_session = odps_client.get_download_session(table, dt)
-        logger.info(f"表: {table} 中的分区 {dt}, 共有 {download_session.count} 条数据")
-        with download_session.open_record_reader(0, download_session.count) as reader:
-            records = []
-            for record in reader:
-                records.append(record.values)  # 获取每一行的值
-            # 使用元数据中的列名
-            df = pd.DataFrame(records, columns=col_names)
-    except Exception as e:
-        logger.error(f"下载 {table} -- {dt} 的数据异常: ", e)
+
+    download_session = odps_client.get_download_session(table, dt)
+    logger.info(f"表: {table} 中的分区 {dt}, 共有 {download_session.count} 条数据")
+
+    with download_session.open_arrow_reader(0, download_session.count) as reader:
+        # 将所有数据加载到 DataFrame 中
+        df = pd.concat([batch.to_pandas() for batch in reader])
 
     logger.info(f"下载结束: {table} -- {dt} 的数据, 共计 {df.shape[0]} 条数据")
     return df