Просмотр исходного кода

refactor(feishu): 重构飞书上传功能,支持样式继承

- 读取飞书表头并按其顺序重排CSV数据
- 字段校验:警告不一致但继续执行
- 分批删除旧数据(每批最多5000行)
- 通过模板行继承样式,写入后删除模板
- 清理末尾多余空行

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
yangxiaohui 1 месяц назад
Родитель
Сommit
967799df45
1 измененных файлов с 169 добавлено и 10 удалено
  1. 169 10
      fetch_daily.py

+ 169 - 10
fetch_daily.py

@@ -239,8 +239,21 @@ def apply_cols_mapping(header, data_rows, cols_spec):
     return new_header, new_rows
     return new_header, new_rows
 
 
 
 
+def column_index_to_letter(col_idx):
+    """列索引转字母,如 1->A, 26->Z, 27->AA"""
+    result = ""
+    while col_idx > 0:
+        col_idx, remainder = divmod(col_idx - 1, 26)
+        result = chr(65 + remainder) + result
+    return result
+
+
 def upload_to_feishu(csv_file, sheet_token, sheet_id=None, sort_spec="dt:desc", cols_spec=None):
 def upload_to_feishu(csv_file, sheet_token, sheet_id=None, sort_spec="dt:desc", cols_spec=None):
-    """上传 CSV 文件到飞书表格
+    """上传 CSV 文件到飞书表格(通过模板行继承样式)
+
+    第1行: 表头
+    第2行: 样式模板(用于继承,最后删除)
+    第3行起: 数据
 
 
     Args:
     Args:
         csv_file: CSV 文件路径
         csv_file: CSV 文件路径
@@ -249,7 +262,7 @@ def upload_to_feishu(csv_file, sheet_token, sheet_id=None, sort_spec="dt:desc",
         sort_spec: 排序规格,如 "dt:desc,name:asc"
         sort_spec: 排序规格,如 "dt:desc,name:asc"
         cols_spec: 列映射规格,如 "dt:日期,name,value:数值"
         cols_spec: 列映射规格,如 "dt:日期,name,value:数值"
     """
     """
-    from feishu import write_data_to_sheet
+    from feishu import Client, LARK_HOST, APP_ID, APP_SECRET, request
 
 
     # 读取 CSV
     # 读取 CSV
     with open(csv_file, "r", encoding="utf-8") as f:
     with open(csv_file, "r", encoding="utf-8") as f:
@@ -281,14 +294,160 @@ def upload_to_feishu(csv_file, sheet_token, sheet_id=None, sort_spec="dt:desc",
     # 按列推断类型并转换
     # 按列推断类型并转换
     col_types = infer_column_types(data_rows)
     col_types = infer_column_types(data_rows)
     converted_rows = [convert_row_by_types(row, col_types) for row in data_rows]
     converted_rows = [convert_row_by_types(row, col_types) for row in data_rows]
-    data = [header] + converted_rows
-
-    print(f"上传到飞书: {len(data_rows)} 行数据")
-    write_data_to_sheet(
-        data=data,
-        sheet_token=sheet_token,
-        sheetid=sheet_id,  # None 时自动获取第一个工作表
-    )
+
+    # 初始化飞书客户端
+    client = Client(LARK_HOST)
+    access_token = client.get_tenant_access_token(APP_ID, APP_SECRET)
+
+    # 获取 sheet_id
+    if sheet_id is None:
+        sheet_id = client.get_sheetid(access_token, sheet_token)
+    print(f"Sheet ID: {sheet_id}")
+
+    # 获取表格信息
+    sheet_props = client.get_sheet_properties(access_token, sheet_token, sheet_id)
+    current_cols = sheet_props['column_count'] if sheet_props else 26
+    header_end_col = column_index_to_letter(current_cols)
+
+    # 读取飞书表头(获取所有列)
+    feishu_header = client.read_range_values(access_token, sheet_token, f"{sheet_id}!A1:{header_end_col}1")
+    if feishu_header and feishu_header[0]:
+        feishu_cols = feishu_header[0]
+        print(f"飞书表头: {feishu_cols}")
+        print(f"CSV表头: {header}")
+
+        # 校验字段一致性(警告但继续,以飞书表头为准)
+        feishu_set = set(feishu_cols)
+        csv_set = set(header)
+
+        missing_in_csv = feishu_set - csv_set
+        missing_in_feishu = csv_set - feishu_set
+
+        if missing_in_csv:
+            print(f"警告: CSV缺少字段(将填空值): {missing_in_csv}")
+        if missing_in_feishu:
+            print(f"警告: 飞书缺少字段(将忽略): {missing_in_feishu}")
+
+        # 按飞书表头顺序重排数据
+        csv_col_index = {name: i for i, name in enumerate(header)}
+        new_converted_rows = []
+        for row in converted_rows:
+            new_row = []
+            for col_name in feishu_cols:
+                if col_name in csv_col_index:
+                    new_row.append(row[csv_col_index[col_name]])
+                else:
+                    new_row.append("")  # CSV缺少的字段填空
+            new_converted_rows.append(new_row)
+
+        converted_rows = new_converted_rows
+        header = feishu_cols
+        print(f"已按飞书表头顺序重排数据")
+
+    total_rows = len(converted_rows)
+    num_cols = len(header)
+    end_col = column_index_to_letter(num_cols)
+
+    print(f"上传到飞书: {total_rows} 行数据")
+
+    batch_size = 500
+
+    # 获取当前行数(复用之前获取的 sheet_props)
+    current_rows = sheet_props['row_count'] if sheet_props else 2
+    print(f"当前行数: {current_rows}, 需要数据行: {total_rows}")
+
+    headers = {
+        'Content-Type': 'application/json; charset=utf-8',
+        'Authorization': f'Bearer {access_token}'
+    }
+
+    # 第1步:删除旧数据行(保留第1行表头 + 第2行样式模板),分批删除
+    if current_rows > 2:
+        print(f"清理旧数据({current_rows - 2}行)...")
+        rows_to_delete = current_rows - 2
+        delete_batch = 5000
+        while rows_to_delete > 0:
+            # 每次从第3行开始删除,删除后行号会自动调整
+            batch = min(rows_to_delete, delete_batch)
+            try:
+                client.delete_rows(access_token, sheet_token, sheet_id, 3, 2 + batch)
+                rows_to_delete -= batch
+                if rows_to_delete > 0:
+                    print(f"  已删除 {current_rows - 2 - rows_to_delete}/{current_rows - 2}")
+            except Exception as e:
+                print(f"  清理失败: {e}")
+                break
+
+    # 第2步:扩展表格容量(insert 不会自动扩展)
+    # 删除后当前只有2行(表头+模板),需要扩展到 2 + total_rows 行
+    needed_rows = 2 + total_rows
+    add_url = f"{LARK_HOST}/open-apis/sheets/v2/spreadsheets/{sheet_token}/dimension_range"
+    add_payload = {
+        "dimension": {
+            "sheetId": sheet_id,
+            "majorDimension": "ROWS",
+            "length": total_rows  # 添加数据行数
+        }
+    }
+    try:
+        request("POST", add_url, headers, add_payload)
+        print(f"扩展容量: +{total_rows} 行")
+    except Exception as e:
+        print(f"  扩展容量失败: {e}")
+
+    # 第3步:分批插入空行(继承第2行样式)并写入数据
+    print(f"插入并写入 {total_rows} 行...")
+    insert_url = f"{LARK_HOST}/open-apis/sheets/v2/spreadsheets/{sheet_token}/insert_dimension_range"
+
+    # 反向处理批次(从最后一批开始),因为每次都在第3行前插入
+    batches = [converted_rows[i:i + batch_size] for i in range(0, total_rows, batch_size)]
+    processed = 0
+
+    for batch in reversed(batches):
+        batch_count = len(batch)
+
+        # 在第3行前插入空行(继承第2行样式)
+        insert_payload = {
+            "dimension": {
+                "sheetId": sheet_id,
+                "majorDimension": "ROWS",
+                "startIndex": 2,  # 0-indexed, 第3行位置
+                "endIndex": 2 + batch_count
+            },
+            "inheritStyle": "BEFORE"
+        }
+        try:
+            request("POST", insert_url, headers, insert_payload)
+        except Exception as e:
+            print(f"  插入行失败: {e}")
+            break
+
+        # 写入数据到插入的行(第3行开始)
+        range_str = f"{sheet_id}!A3:{end_col}{2 + batch_count}"
+        client.batch_update_values(access_token, sheet_token, {
+            "valueRanges": [{"range": range_str, "values": batch}]
+        })
+
+        processed += batch_count
+        print(f"  处理: {processed}/{total_rows}")
+
+    # 第4步:删除末尾多余的空行(扩展容量时添加的)
+    final_row_count = 2 + total_rows  # 表头 + 模板 + 数据
+    current_row_count = 2 + total_rows * 2  # 扩展 + 插入
+    if current_row_count > final_row_count:
+        print(f"清理多余空行...")
+        try:
+            client.delete_rows(access_token, sheet_token, sheet_id, final_row_count + 1, current_row_count)
+        except Exception as e:
+            print(f"  清理失败: {e}")
+
+    # 第5步:删除模板行(第2行)
+    print(f"删除模板行...")
+    try:
+        client.delete_rows(access_token, sheet_token, sheet_id, 2, 2)
+    except Exception as e:
+        print(f"  删除模板行失败: {e}")
+
     print(f"飞书上传完成: {sheet_token}")
     print(f"飞书上传完成: {sheet_token}")