# -*- coding: UTF-8 -*-
import json
import base64
import hashlib
import os
from curl_cffi import requests as mj_requests
import requests
import os
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

# load from env
APP_ID = 'cli_a22acf2916b8500e'
APP_SECRET = 'tE0xAB2gZTMlBGdPczCGLcmpRlZQm5CQ'
LARK_HOST = 'https://open.feishu.cn'
APP_HOST = 'https://open.feishu.cn'
EMAIL = 'semsevens@email.com'

class LarkException(Exception):
    def __init__(self, code=0, msg=None):
        self.code = code
        self.msg = msg

    def __str__(self) -> str:
        return "{}:{}".format(self.code, self.msg)

    __repr__ = __str__

def request(method, url, headers, payload={}):
    response = requests.request(method, url, headers=headers, json=payload)
    # logging.info("URL: " + url)
    # logging.info("X-Tt-Logid: " + response.headers['X-Tt-Logid'])
    # logging.info("headers:\n"+json.dumps(headers,indent=2, ensure_ascii=False))
    # logging.info("payload:\n"+json.dumps(payload,indent=2, ensure_ascii=False))
    resp = {}
    if response.text[0] == '{':
        resp = response.json()
        # logging.info("response:\n"+json.dumps(resp,indent=2, ensure_ascii=False))
    else:
        pass
        # logging.info("response:\n"+response.text)
    code = resp.get("code", -1)
    if code == -1:
        code = resp.get("StatusCode", -1)
    if code == -1 and response.status_code != 200:
        response.raise_for_status()
    if code != 0:
        raise LarkException(code=code, msg=resp.get("msg", ""))
    return resp
def get_image_data_from_url(img_url, use_cache=True):
    # 计算URL的MD5哈希值
    url_hash = hashlib.md5(img_url.encode()).hexdigest()
    cache_dir = 'image_cache'
    cache_file = os.path.join(cache_dir, f'{url_hash}.json')

    if use_cache:
        # 检查缓存目录是否存在,如果不存在则创建
        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)

        # 检查缓存文件是否存在
        if os.path.exists(cache_file):
            with open(cache_file, 'r') as f:
                cached_data = json.load(f)
            return cached_data['image_data']

    # 如果缓存不存在,从URL获取图片
    if 'midjourney' in img_url:
        proxies = {
            'http': 'http://127.0.0.1:7890',
            'https': 'http://127.0.0.1:7890',
        }
        # response = mj_requests.get(img_url, impersonate="chrome100", proxies=proxies)
        response = mj_requests.get(img_url.replace("https://", "http://"), impersonate="chrome100")
    else:
        # proxies = {
        #     'http': 'http://t10952018781111:1ap37oc3@d844.kdltps.com:15818',
        #     'https': 'http://t10952018781111:1ap37oc3@d844.kdltps.com:15818',
        # }
        # proxies = {
        #     'http': None,
        #     'https': None,
        # }
        # response = requests.get(img_url.replace("https://", "http://"), proxies=proxies)
        response = requests.get(img_url)
        # response = requests.get(img_url, proxies=proxies)
    if response.status_code == 200:
        image_content = response.content
        missing_padding = 4 - len(image_content) % 4
        if missing_padding:
            image_content += b'=' * missing_padding
        image_data = base64.b64encode(image_content).decode('utf-8')

        # 将图片数据保存到缓存
        with open(cache_file, 'w') as f:
            json.dump({'image_data': image_data}, f)

        return image_data
    else:
        # import traceback
        # traceback.print_exc()
        raise Exception(f"无法从URL获取图片: {img_url}")
from PIL import Image
import io
import os
def get_image_size(img_url):
    img_data = get_image_data_from_url(img_url)
    img = Image.open(io.BytesIO(base64.b64decode(img_data)))
    width, height = img.size
    return width, height

if __name__ == "__main__":
    img_url = "https://sns-webpic.xhscdn.com/1040g2sg31c4vs26n12a05ph3cdp3cutm5prqo90"
    img_data = get_image_data_from_url(img_url)

    save_path = "/Users/nieqi/Downloads/save.json"
    with open(save_path, 'w') as f:
        f.write(img_data)

def column_id(col):
    '''column int to string id'''
    ans = ""
    i = col
    while i > 0:
        m = int((i-1) % 26)
        i = int((i-1) / 26)
        ans = chr(m+65) + ans
    return ans

def do_compress_image(image_data, image_type):
     # 压缩图片
    from PIL import Image
    import io
    import base64
    Image.MAX_IMAGE_PIXELS = None  # 禁用图片大小限制

    # 将base64转为图片对象
    image = Image.open(io.BytesIO(base64.b64decode(image_data)))

    # 计算压缩后的尺寸,保持宽高比
    max_size = 1600
    ratio = min(max_size/image.width, max_size/image.height)
    if ratio < 1:
        new_size = (int(image.width * ratio), int(image.height * ratio))
        image = image.resize(new_size, Image.Resampling.LANCZOS)

    # 在保存之前转换RGBA为RGB
    if image.mode == 'RGBA':
        # 创建白色背景
        background = Image.new('RGB', image.size, (255, 255, 255))
        # 将RGBA图片合成到白色背景上
        background.paste(image, mask=image.split()[3])  # 使用alpha通道作为mask
        image = background

    buffer = io.BytesIO()

    # 将 'JPG' 转换为 'JPEG'
    if image_type and image_type.upper() == 'JPG':
        image_type = 'JPEG'
    image_type = 'JPEG'
    # image.save(buffer, format=image_type.upper(), quality=95, optimize=True)
    image.save(buffer, format=image_type.upper(), quality=85, optimize=True)
    image_data = base64.b64encode(buffer.getvalue()).decode()
    return image_data

class Client(object):
    def __init__(self, lark_host):
        self._host = lark_host

    def get_tenant_access_token(self, app_id, app_secret):
        url = self._host+"/open-apis/auth/v3/app_access_token/internal/"
        headers = {
            'Content-Type': 'application/json; charset=utf-8'
        }
        payload = {
            'app_id': app_id,
            'app_secret': app_secret
        }
        resp = request("POST", url, headers, payload)
        return resp['tenant_access_token']

    def get_user_access_token(self, tenant_access_token, code):
        url = self._host+"/open-apis/authen/v1/access_token"
        headers = {
            'Content-Type': 'application/json; charset=utf-8'
        }
        payload = {
            "grant_type": "authorization_code",
            "code": code,
            "app_access_token": tenant_access_token
        }
        resp = request("POST", url, headers, payload)
        return resp['data']['access_token']

    def get_root_folder_token(self, access_token):
        url = self._host+"/open-apis/drive/explorer/v2/root_folder/meta"
        headers = {
            'Content-Type': 'application/json; charset=utf-8',
            'Authorization': 'Bearer '+access_token
        }
        resp = request("GET", url, headers)
        return resp['data']['token']

    def create_spreadsheet(self, access_token, foldertoken, title):
        url =self._host+"/open-apis/sheets/v3/spreadsheets"
        headers = {
            'Content-Type': 'application/json; charset=utf-8',
            'Authorization': 'Bearer '+access_token
        }
        payload={
            "title": title,
            "folder_token": foldertoken
        }
        resp = request("POST", url, headers, payload)
        return resp['data']['spreadsheet']['spreadsheet_token'], resp['data']['spreadsheet']['url']

    def get_sheetid(self, access_token, doctoken, sheet_index=0):
        url = self._host+"/open-apis/sheets/v2/spreadsheets/"+doctoken+"/metainfo"
        headers = {
            'Content-Type': 'application/json; charset=utf-8',
            'Authorization': 'Bearer '+access_token
        }
        resp = request("GET", url, headers)
        return resp['data']['sheets'][sheet_index]["sheetId"]

    def batch_update_values(self, access_token, doctoken, data):
        url =self._host+"/open-apis/sheets/v2/spreadsheets/"+doctoken+"/values_batch_update"
        headers = {
            'Content-Type': 'application/json; charset=utf-8',
            'Authorization': 'Bearer '+access_token
        }
        payload=data
        resp = request("POST", url, headers, payload)
        return resp['data']['spreadsheetToken']

    def batch_update_styles(self, access_token, doctoken, data):
        url =self._host+"/open-apis/sheets/v2/spreadsheets/"+doctoken+"/styles_batch_update"
        headers = {
            'Content-Type': 'application/json; charset=utf-8',
            'Authorization': 'Bearer '+access_token
        }
        payload=data
        resp = request("PUT", url, headers, payload)
        return resp['data']['spreadsheetToken']

    def add_permissions_member(self, access_token, doctoken, doctype, member_type, member_id, perm):
        url = self._host+"/open-apis/drive/v1/permissions/"+doctoken+"/members?type="+doctype+"&need_notification=false"
        headers = {
            'Content-Type': 'application/json; charset=utf-8',
            'Authorization': 'Bearer '+access_token
        }
        payload = {
            "member_type": member_type,
            "member_id": member_id,
            "perm": perm
        }
        request("POST", url, headers, payload)

    def write_image_to_cell(self, access_token, doctoken, sheetid, img_url, row, col, image_type, compress_image=True):
        url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/values_image"
        headers = {
            'Content-Type': 'application/json; charset=utf-8',
            'Authorization': f'Bearer {access_token}'
        }
        try:
            image_data = get_image_data_from_url(img_url)
        except Exception as e:
            print(img_url)
            print(e)
            return None, None
        if compress_image:
            image_data = do_compress_image(image_data, image_type)

        image_name = img_url.split('/')[-1].replace(f'.{image_type}', '')  # 从URL中提取文件名
        if compress_image:
            image_type = 'JPEG'
        cell_start = column_id(col)+str(row)
        range = f'{sheetid}!{cell_start}:{cell_start}'
        payload = {
            "range": range,
            "image": image_data,
            "name": f"{image_name}.{image_type}"
        }
        try:
            resp = request("POST", url, headers, payload)
        except Exception as e:
            print(img_url)
            print(image_name)
            print(image_type)
            print(e)
            return None, None
        return resp['data']['revision'], resp['data']['updateRange']

    def merge_cells(self, access_token, doctoken, sheetid, start_row, end_row, start_col, end_col):
        print(f"merge  start_row = {start_row} end_row = {end_row} start_col = {start_col} end_col = {end_col}")
        url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/merge_cells"
        headers = {
            'Content-Type': 'application/json; charset=utf-8',
            'Authorization': f'Bearer {access_token}'
        }

        start_col_id = column_id(start_col)
        end_col_id = column_id(end_col)

        payload = {
            "range": f"{sheetid}!{start_col_id}{start_row}:{end_col_id}{end_row}",
            "mergeType": "MERGE_ALL",
        }
        try:
            resp = request("POST", url, headers, payload)
        except Exception as e:
            print(e)
            return None
        return None

    def write_images_to_cell(self, access_token, doctoken, sheetid, img_url_list, row, col, compress_image=True, grid_width=None, grid_height=None, border_width=3, border_color=(200, 200, 200)):
        """
        将多张图片拼接后写入单元格
        
        Args:
            img_url_list: 图片URL列表
            row: 目标单元格行号
            col: 目标单元格列号
            compress_image: 是否压缩图片
            grid_width: 拼接图片的列数，如果为None则自动计算
            grid_height: 拼接图片的行数，如果为None则自动计算
            border_width: 边框宽度，像素
            border_color: 边框颜色，RGB元组
        """
        from PIL import Image, ImageDraw
        import io
        import base64
        import math

        # 下载所有图片
        images = []
        for img_url in img_url_list:
            try:
                image_type = get_image_type(img_url)
                if not image_type:
                    continue

                image_data = get_image_data_from_url(img_url)
                image = Image.open(io.BytesIO(base64.b64decode(image_data)))
                images.append(image)
            except Exception as e:
                print(f"下载图片失败: {img_url}")
                print(e)
                continue

        if not images:
            return None, None

        # 计算拼接图片的行列数
        img_count = len(images)
        if grid_width is None and grid_height is None:
            # 如果未指定行列数，计算最接近正方形的网格
            grid_width = math.ceil(math.sqrt(img_count))
            grid_height = math.ceil(img_count / grid_width)
        elif grid_width is None:
            # 如果只指定了行数，计算列数
            grid_width = math.ceil(img_count / grid_height)
        elif grid_height is None:
            # 如果只指定了列数，计算行数
            grid_height = math.ceil(img_count / grid_width)

        # 确保网格能容纳所有图片
        while grid_width * grid_height < img_count:
            if grid_width <= grid_height:
                grid_width += 1
            else:
                grid_height += 1

        # 调整所有图片到相同尺寸，保持原始比例
        if images:
            # 计算目标尺寸（使用平均尺寸作为参考）
            avg_width = sum(img.width for img in images) // len(images)
            avg_height = sum(img.height for img in images) // len(images)
            target_size = (avg_width, avg_height)
            
            # 调整图片尺寸，保持原始比例
            resized_images = []
            for img in images:
                # 计算保持比例的缩放尺寸
                img_ratio = img.width / img.height
                target_ratio = target_size[0] / target_size[1]
                
                if img_ratio > target_ratio:
                    # 图片比目标更宽，以宽度为准
                    new_width = target_size[0]
                    new_height = int(target_size[0] / img_ratio)
                else:
                    # 图片比目标更高，以高度为准
                    new_height = target_size[1]
                    new_width = int(target_size[1] * img_ratio)
                
                # 缩放图片，保持比例
                resized_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
                resized_images.append(resized_img)
            
            # 创建拼接画布
            canvas_width = grid_width * avg_width + (grid_width + 1) * border_width
            canvas_height = grid_height * avg_height + (grid_height + 1) * border_width
            canvas = Image.new('RGB', (canvas_width, canvas_height), border_color)
            
            # 拼接图片
            for i, img in enumerate(resized_images):
                row_idx = i // grid_width
                col_idx = i % grid_width
                
                # 计算每个网格单元的位置
                cell_x = col_idx * avg_width + (col_idx + 1) * border_width
                cell_y = row_idx * avg_height + (row_idx + 1) * border_width
                
                # 在网格单元中居中放置图片
                center_x = cell_x + (avg_width - img.width) // 2
                center_y = cell_y + (avg_height - img.height) // 2
                
                canvas.paste(img, (center_x, center_y))
            
            # 将拼接后的图片转换为base64
            output = io.BytesIO()
            if compress_image:
                canvas.save(output, format='JPEG', quality=85)
                image_type = 'JPEG'
            else:
                canvas.save(output, format='PNG')
                image_type = 'PNG'
            
            output.seek(0)
            image_data = base64.b64encode(output.getvalue()).decode()
            
            # 调用写入图片的API
            url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/values_image"
            headers = {
                'Content-Type': 'application/json; charset=utf-8',
                'Authorization': f'Bearer {access_token}'
            }
            
            cell_start = column_id(col) + str(row)
            range_val = f'{sheetid}!{cell_start}:{cell_start}'
            payload = {
                "range": range_val,
                "image": image_data,
                "name": f"combined_image.{image_type}"
            }
            
            try:
                resp = request("POST", url, headers, payload)
                return resp['data']['revision'], resp['data']['updateRange']
            except Exception as e:
                print(f"写入拼接图片失败: {e}")
                return None, None
        
        return None, None

    def read_range_values(self, access_token, doctoken, range_val):
        """
        读取指定范围的数据
        
        Args:
            access_token: 访问令牌
            doctoken: 表格token
            range_val: 范围，格式如 "Sheet1!A1:C10"
            
        Returns:
            读取到的数据列表
        """
        url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/values/{range_val}"
        headers = {
            'Content-Type': 'application/json; charset=utf-8',
            'Authorization': f'Bearer {access_token}'
        }
        
        try:
            resp = request("GET", url, headers)
            return resp['data']['valueRange']['values']
        except Exception as e:
            print(f"读取数据失败: {e}")
            return []

    def prepend_data(self, access_token, doctoken, range_val, values):
        """
        在指定位置前面插入数据
        
        Args:
            access_token: 访问令牌
            doctoken: 表格token
            range_val: 插入范围，格式如 "Sheet1!A1:C1"
            values: 要插入的数据
            
        Returns:
            操作结果
        """
        url = f"{self._host}/open-apis/sheets/v3/spreadsheets/{doctoken}/sheets/{range_val.split('!')[0]}/prepend"
        headers = {
            'Content-Type': 'application/json; charset=utf-8',
            'Authorization': f'Bearer {access_token}'
        }
        
        # 从range_val中提取行数
        range_part = range_val.split('!')[1]  # 如 "A1:Z1"
        start_cell = range_part.split(':')[0]  # 如 "A1"
        
        payload = {
            "values": values
        }
        
        try:
            resp = request("POST", url, headers, payload)
            return resp
        except Exception as e:
            print(f"插入数据失败: {e}")
            return None

    def insert_data_at_row(self, access_token, doctoken, sheetid, row, values):
        """
        在指定行插入数据（使用批量更新方式）
        
        Args:
            access_token: 访问令牌
            doctoken: 表格token  
            sheetid: 工作表ID
            row: 目标行号
            values: 要插入的数据
            
        Returns:
            操作结果
        """
        # 使用批量更新的方式插入数据
        cols = len(values[0]) if values else 1
        end_col = column_id(cols)
        range_val = f"{sheetid}!A{row}:{end_col}{row}"
        
        body = {
            "valueRanges": [
                {
                    "range": range_val,
                    "values": values
                }
            ]
        }
        
        try:
            result = self.batch_update_values(access_token, doctoken, body)
            return result
        except Exception as e:
            print(f"插入数据到第{row}行失败: {e}")
            return None

    def insert_rows_before(self, access_token, doctoken, sheetid, row_index, count=1):
        """
        在指定行前插入新行（基于飞书官方API）
        
        Args:
            access_token: 访问令牌
            doctoken: 表格token
            sheetid: 工作表ID
            row_index: 插入位置的行号（从1开始，在此行前插入）
            count: 插入行数（默认1行）
            
        Returns:
            操作结果
        """
        # 先获取工作表信息，检查当前行数
        sheet_props = self.get_sheet_properties(access_token, doctoken, sheetid)
        if not sheet_props:
            print("无法获取工作表信息，尝试直接插入")
            current_row_count = 1000  # 默认值
        else:
            current_row_count = sheet_props['row_count']
            print(f"当前工作表行数: {current_row_count}")
        
        # 如果要插入的位置超过了当前行数，使用追加模式
        if row_index > current_row_count:
            print(f"插入位置({row_index})超过当前行数({current_row_count})，使用追加模式")
            # 使用追加方式在末尾添加行
            return self.append_empty_rows(access_token, doctoken, sheetid, count)
        
        url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/insert_dimension_range"
        headers = {
            'Content-Type': 'application/json; charset=utf-8',
            'Authorization': f'Bearer {access_token}'
        }
        
        # 转换为0基索引：row_index=3表示第3行，对应startIndex=2
        start_index = row_index - 1  # 从0开始计数
        end_index = start_index + count  # 结束位置（不包含）
        
        # 确保 endIndex 不超过当前工作表的行数限制
        if end_index > current_row_count:
            print(f"警告：计算的endIndex({end_index})超过当前行数({current_row_count})，调整为追加模式")
            return self.append_empty_rows(access_token, doctoken, sheetid, count)
        
        # 智能选择继承样式：插入第2行时继承后面的数据行样式，其他情况继承前面的样式
        inherit_style = "AFTER" if row_index == 2 else "BEFORE"
        
        payload = {
            "dimension": {
                "sheetId": sheetid,
                "majorDimension": "ROWS",
                "startIndex": start_index,  # 从0开始计数
                "endIndex": end_index  # 结束位置（不包含此行）
            },
            "inheritStyle": inherit_style  # 智能继承样式
        }
        
        try:
            resp = request("POST", url, headers, payload)
            print(f"在第{row_index}行前成功插入{count}行（startIndex={start_index}, endIndex={end_index}, inheritStyle={inherit_style}）")
            return resp
        except Exception as e:
            print(f"在第{row_index}行前插入{count}行失败: {e}")
            # 如果插入失败，尝试追加模式
            print("尝试使用追加模式...")
            return self.append_empty_rows(access_token, doctoken, sheetid, count)

    def insert_row_with_images(self, access_token, doctoken, sheetid, row, values, compress_image=True, grid_width=None, grid_height=None, border_width=3, border_color=(200, 200, 200)):
        """
        在指定行插入数据并同时处理图片写入（覆盖方式）
        
        Args:
            access_token: 访问令牌
            doctoken: 表格token  
            sheetid: 工作表ID
            row: 目标行号
            values: 要插入的数据
            compress_image: 是否压缩图片
            grid_width: 拼接图片的列数
            grid_height: 拼接图片的行数
            border_width: 边框宽度
            border_color: 边框颜色
            
        Returns:
            操作结果
        """
        # 1. 先插入文本数据（覆盖指定行）
        result = self.insert_data_at_row(access_token, doctoken, sheetid, row, values)
        
        if not result:
            return None
            
        # 2. 同时处理图片写入
        if values and len(values) > 0:
            row_data = values[0]
            for col_index, cell in enumerate(row_data, start=1):
                if is_image_list_cell_url(cell):
                    # 处理图片列表
                    try:
                        img_urls = eval(cell)
                        self.write_images_to_cell(access_token, doctoken, sheetid, img_urls, row, col_index, compress_image, grid_width, grid_height, border_width, border_color)
                    except Exception as e:
                        print(f"写入图片列表失败 (第{row}行第{col_index}列): {e}")
                elif is_image_cell(cell):
                    # 处理单张图片
                    image_type = get_image_type(cell)
                    if image_type:
                        try:
                            self.write_image_to_cell(access_token, doctoken, sheetid, cell, row, col_index, image_type, compress_image)
                        except Exception as e:
                            print(f"写入单张图片失败 (第{row}行第{col_index}列): {e}")
        
        return result

    def update_specific_fields(self, access_token, doctoken, sheetid, row, field_updates, headers=None):
        """
        只更新指定字段，其他字段保持不变
        
        Args:
            access_token: 访问令牌
            doctoken: 表格token  
            sheetid: 工作表ID
            row: 目标行号（从1开始）
            field_updates: 字段更新字典，格式如 {"列名": "新值", "列B": "新值B"} 
                          或者 {列索引: "新值", 2: "新值B"}（从1开始计数）
            headers: 表头列表，用于列名到列索引的映射。如果为None，则field_updates的key必须是列索引
            
        Returns:
            操作结果
        """
        try:
            # 如果提供了headers且field_updates的key是列名，则转换为列索引
            if headers and field_updates:
                column_updates = {}
                for field_name, value in field_updates.items():
                    if isinstance(field_name, str):  # 如果是列名
                        try:
                            col_index = headers.index(field_name) + 1  # 转为1基索引
                            column_updates[col_index] = value
                        except ValueError:
                            print(f"警告：找不到列名 '{field_name}'，跳过更新")
                            continue
                    else:  # 如果已经是列索引
                        column_updates[field_name] = value
            else:
                column_updates = field_updates
            
            # 构建批量更新请求
            value_ranges = []
            for col_index, value in column_updates.items():
                col_letter = column_id(col_index)
                range_val = f"{sheetid}!{col_letter}{row}:{col_letter}{row}"
                value_ranges.append({
                    "range": range_val,
                    "values": [[value]]
                })
            
            body = {
                "valueRanges": value_ranges
            }
            
            result = self.batch_update_values(access_token, doctoken, body)
            
            if result:
                updated_fields = list(column_updates.keys())
                print(f"成功更新第{row}行的字段: {updated_fields}")
            
            return result
        except Exception as e:
            print(f"更新第{row}行指定字段失败: {e}")
            return None

    def update_row_with_specific_fields_and_images(self, access_token, doctoken, sheetid, row, field_updates, headers=None, compress_image=True, grid_width=None, grid_height=None, border_width=3, border_color=(200, 200, 200)):
        """
        更新指定字段并处理图片
        
        Args:
            access_token: 访问令牌
            doctoken: 表格token  
            sheetid: 工作表ID
            row: 目标行号
            field_updates: 字段更新字典
            headers: 表头列表
            compress_image: 是否压缩图片
            grid_width: 拼接图片的列数
            grid_height: 拼接图片的行数
            border_width: 边框宽度
            border_color: 边框颜色
            
        Returns:
            操作结果
        """
        # 1. 先更新文本数据
        result = self.update_specific_fields(access_token, doctoken, sheetid, row, field_updates, headers)
        
        if not result:
            return None
        
        # 2. 处理图片写入
        column_updates = {}
        if headers and field_updates:
            for field_name, value in field_updates.items():
                if isinstance(field_name, str):  # 如果是列名
                    try:
                        col_index = headers.index(field_name) + 1
                        column_updates[col_index] = value
                    except ValueError:
                        continue
                else:  # 如果已经是列索引
                    column_updates[field_name] = value
        else:
            column_updates = field_updates
        
        for col_index, cell in column_updates.items():
            if is_image_list_cell_url(cell):
                # 处理图片列表
                try:
                    img_urls = eval(cell)
                    self.write_images_to_cell(access_token, doctoken, sheetid, img_urls, row, col_index, compress_image, grid_width, grid_height, border_width, border_color)
                except Exception as e:
                    print(f"写入图片列表失败 (第{row}行第{col_index}列): {e}")
            elif is_image_cell(cell):
                # 处理单张图片
                image_type = get_image_type(cell)
                if image_type:
                    try:
                        self.write_image_to_cell(access_token, doctoken, sheetid, cell, row, col_index, image_type, compress_image)
                    except Exception as e:
                        print(f"写入单张图片失败 (第{row}行第{col_index}列): {e}")
        
        return result

    def insert_row_with_data_at_position(self, access_token, doctoken, sheetid, row_position, values, compress_image=True, grid_width=None, grid_height=None, border_width=3, border_color=(200, 200, 200)):
        """
        在指定位置真正插入新行并填入数据
        
        Args:
            access_token: 访问令牌
            doctoken: 表格token  
            sheetid: 工作表ID
            row_position: 插入位置（从1开始，在此行前插入）
            values: 要插入的数据
            compress_image: 是否压缩图片
            grid_width: 拼接图片的列数
            grid_height: 拼接图片的行数
            border_width: 边框宽度
            border_color: 边框颜色
            
        Returns:
            操作结果
        """
        # 获取当前工作表行数
        sheet_props = self.get_sheet_properties(access_token, doctoken, sheetid)
        current_row_count = sheet_props['row_count'] if sheet_props else 1
        
        # 1. 先插入空行
        insert_result = self.insert_rows_before(access_token, doctoken, sheetid, row_position, 1)
        
        if not insert_result:
            print(f"插入空行失败，无法在第{row_position}行插入数据")
            return None
        
        # 如果是追加模式（插入位置超过了原有行数），实际数据位置是当前行数+1
        actual_row_position = row_position
        if row_position > current_row_count:
            actual_row_position = current_row_count + 1
            print(f"追加模式：实际数据插入位置调整为第{actual_row_position}行")
            
        # 2. 再在新插入的行中填入数据
        result = self.insert_data_at_row(access_token, doctoken, sheetid, actual_row_position, values)
        
        if not result:
            print(f"插入数据失败")
            return None
            
        # 3. 同时处理图片写入
        if values and len(values) > 0:
            row_data = values[0]
            for col_index, cell in enumerate(row_data, start=1):
                if is_image_list_cell_url(cell):
                    # 处理图片列表
                    try:
                        img_urls = eval(cell)
                        self.write_images_to_cell(access_token, doctoken, sheetid, img_urls, actual_row_position, col_index, compress_image, grid_width, grid_height, border_width, border_color)
                    except Exception as e:
                        print(f"写入图片列表失败 (第{actual_row_position}行第{col_index}列): {e}")
                elif is_image_cell(cell):
                    # 处理单张图片
                    image_type = get_image_type(cell)
                    if image_type:
                        try:
                            self.write_image_to_cell(access_token, doctoken, sheetid, cell, actual_row_position, col_index, image_type, compress_image)
                        except Exception as e:
                            print(f"写入单张图片失败 (第{actual_row_position}行第{col_index}列): {e}")
        
        return result

    def get_sheet_info(self, access_token, doctoken, sheetid):
        """
        获取工作表的基础信息
        
        Args:
            access_token: 访问令牌
            doctoken: 表格token
            sheetid: 工作表ID
            
        Returns:
            工作表信息，包含行数、列数等
        """
        url = f"{self._host}/open-apis/sheets/v3/spreadsheets/{doctoken}/sheets/{sheetid}"
        headers = {
            'Content-Type': 'application/json; charset=utf-8',
            'Authorization': f'Bearer {access_token}'
        }
        
        try:
            resp = request("GET", url, headers)
            return resp['data']['sheet']
        except Exception as e:
            print(f"获取工作表信息失败: {e}")
            return None

    def get_sheet_properties(self, access_token, doctoken, sheetid):
        """
        获取工作表属性，包括行数和列数
        
        Args:
            access_token: 访问令牌
            doctoken: 表格token
            sheetid: 工作表ID
            
        Returns:
            dict: 包含 row_count, column_count 等信息
        """
        sheet_info = self.get_sheet_info(access_token, doctoken, sheetid)
        if sheet_info:
            grid_properties = sheet_info.get('grid_properties', {})
            return {
                'row_count': grid_properties.get('row_count', 0),
                'column_count': grid_properties.get('column_count', 0),
                'title': sheet_info.get('title', ''),
                'sheet_id': sheet_info.get('sheet_id', ''),
                'sheet_type': sheet_info.get('sheet_type', '')
            }
        return None

    def append_data(self, access_token, doctoken, range_val, values):
        """
        在指定位置后面追加数据
        
        Args:
            access_token: 访问令牌
            doctoken: 表格token
            range_val: 追加范围，格式如 "Sheet1!A1:C1"
            values: 要追加的数据
            
        Returns:
            操作结果
        """
        url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/values_append"
        headers = {
            'Content-Type': 'application/json; charset=utf-8',
            'Authorization': f'Bearer {access_token}'
        }
        
        payload = {
            "valueRange": {
                "range": range_val,
                "values": values
            }
        }
        
        try:
            resp = request("POST", url, headers, payload)
            return resp
        except Exception as e:
            print(f"追加数据失败: {e}")
            return None

    def delete_rows(self, access_token, doctoken, sheetid, start_row, end_row):
        """
        删除指定范围的行
        
        Args:
            access_token: 访问令牌
            doctoken: 表格token
            sheetid: 工作表ID
            start_row: 开始行号（从1开始）
            end_row: 结束行号（从1开始，包含）
            
        Returns:
            操作结果
        """
        url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/dimension_range"
        headers = {
            'Content-Type': 'application/json; charset=utf-8',
            'Authorization': f'Bearer {access_token}'
        }
        
        payload = {
            "dimension": {
                "sheetId": sheetid,
                "majorDimension": "ROWS",
                "startIndex": start_row,  # 从1开始计数，包含
                "endIndex": end_row       # 从1开始计数，包含
            }
        }
        
        try:
            resp = request("DELETE", url, headers, payload)
            return resp
        except Exception as e:
            print(f"删除第{start_row}-{end_row}行失败: {e}")
            return None

    def delete_single_row(self, access_token, doctoken, sheetid, row):
        """
        删除单行
        
        Args:
            access_token: 访问令牌
            doctoken: 表格token
            sheetid: 工作表ID
            row: 行号（从1开始）
            
        Returns:
            操作结果
        """
        return self.delete_rows(access_token, doctoken, sheetid, row, row)

    def append_empty_rows(self, access_token, doctoken, sheetid, count=1):
        """
        在工作表末尾追加空行
        
        Args:
            access_token: 访问令牌
            doctoken: 表格token
            sheetid: 工作表ID
            count: 追加行数（默认1行）
            
        Returns:
            操作结果
        """
        # 获取当前工作表信息
        sheet_props = self.get_sheet_properties(access_token, doctoken, sheetid)
        if not sheet_props:
            print("无法获取工作表信息，追加失败")
            return None
        
        current_row_count = sheet_props['row_count']
        current_col_count = sheet_props['column_count']
        
        print(f"在工作表末尾追加{count}行，当前行数: {current_row_count}")
        
        # 构造空数据行
        empty_values = [[''] * max(current_col_count, 1) for _ in range(count)]
        
        # 使用append_data在末尾追加
        range_val = f"{sheetid}!A{current_row_count + 1}:{column_id(max(current_col_count, 1))}{current_row_count + count}"
        
        try:
            result = self.append_data(access_token, doctoken, range_val, empty_values)
            if result:
                print(f"成功在末尾追加{count}行空行")
            return result
        except Exception as e:
            print(f"追加空行失败: {e}")
            return None

# -*- coding: UTF-8 -*-
import json
import logging
from datetime import datetime
import re
import os
import requests
from urllib.parse import urlparse

LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)

import os
logging.info(os.getcwd())

def column_id(col):
    '''column int to string id'''
    ans = ""
    i = col
    while i > 0:
        m = int((i-1) % 26)
        i = int((i-1) / 26)
        ans = chr(m+65) + ans
    return ans

def get_image_type(url):
    '''根据图片URL获取图片类型'''
    try:
        # 发送 HEAD 请求以获取头信息
        path = urlparse(url).path
        ext = path.split('.')[-1].lower()
        if ext in ['jpg', 'jpeg', 'png', 'gif']:
            return ext
        ext = 'jpeg'
        if 'jpg' in url:
            ext = 'jpg'
        if 'jpeg' in url:
            ext = 'jpeg'
        if 'png' in url:
            ext = 'png'
        if 'gif' in url:
            ext = 'gif'
        if "webp" in url:
            ext = "webp"
        # 如果无法确定类型，返回 None
        return ext
    except Exception as e:
        print(f"获取图片类型时出错: {str(e)}")
        return None

def is_image_cell(cell):
    # 判断是否包含中文字符
    if isinstance(cell, str):
        for char in cell:
            if '\u4e00' <= char <= '\u9fff':
                return False
    is_image = False
    if (
        isinstance(cell, str) and
        cell.startswith('http') and
        (
            re.match(r'https?://.+\.(jpg|jpeg|png|gif|webp).*', cell, re.I) or re.match(r'http?://.+\.(jpg|jpeg|png|gif|webp).*', cell, re.I) or
            ('xhscdn.com' in cell and 'format/jpg' in cell) or
            ('rescdn.yishihui.com' in cell and 'jpg' in cell) or
            'sns-webpic-qc.xhscdn.com' in cell or 'ci.xiaohongshu.com' in cell
        )
    ):
        is_image = True
    return is_image

def is_image_list_cell_url(cell):
    if isinstance(cell, str)  and cell.strip() and cell[0] == '[' and cell[-1] == ']':
        try:
            cell_obj = eval(cell)
        except:
            return False
        if type(cell_obj) == list:
            for c in cell_obj:
                if not is_image_cell(c):
                    return False
            return True
    return False

def write_images(client, access_token, token, sheetid, data, start_row=1, start_col=1, skip_col=[], compress_image=True, grid_width=None, grid_height=None, border_width=3, border_color=(200, 200, 200)):
    '''将图片URL写入单元格'''
    for row_index, row in enumerate(data, start=1):
        if row_index < start_row:
            print(f"跳过行: {row_index}")
            continue
        for col_index, cell in enumerate(row, start=1):
            # if cell is not None and "http" in cell and is_image_cell(cell) is False:
            #     print(f"is_image_cell = {is_image_cell(cell)}, {cell}")
            if col_index < start_col:
                continue
            if col_index in skip_col:
                continue
            if is_image_list_cell_url(cell):
                # print(f"is_image_list_cell_url = True , {cell}")
                client.write_images_to_cell(access_token, token, sheetid, eval(cell), row_index, col_index, compress_image, grid_width, grid_height, border_width, border_color)
            elif is_image_cell(cell):
                image_type = get_image_type(cell)
                if image_type:
                    client.write_image_to_cell(access_token, token, sheetid, cell, row_index, col_index,image_type, compress_image)

def merge_cells(client, access_token, token, sheetid, data ):
    row_cnt = len(data)
    col_cnt = len(data[0])

    for col in range(0,col_cnt):
        previous_row = 0
        previous_value = None
        for row in range(0,row_cnt):
            cell_value = data[row][col]

            if cell_value != previous_value :
                if row - previous_row > 1:
                    client.merge_cells(access_token, token, sheetid, previous_row+1, row, col+1, col+1)
                previous_row = row
                previous_value= cell_value


def pack_data(data, sheetid, start_row=1, start_col=1):
    rows = len(data)
    cols = len(data[0])
    range1 = f"{sheetid}!{column_id(start_col)}{start_row}:{column_id(cols)}{rows}"
    body = {
        "valueRanges": [
            {
                "range": range1,
                "values": []
            },
        ]
    }
    print(range1)
    for d in data[start_row-1:]:
        row = []
        for c in d[start_col-1:]:
            row.append(c)
        body["valueRanges"][0]["values"].append(row)
    return body

def write_data_to_sheet(data, sheet_token='IoTOsjZ4khIqlOtTxnec8oTbn7c', sheetid=None, skip_text=False, skip_images=False, start_row=1, start_col=1, skip_col=[], compress_image=True, grid_width=None, grid_height=None, border_width=3, border_color=(200, 200, 200)):
    '''测试函数'''
    # 初始化 API 客户端
    client = Client(LARK_HOST)

    # 获取租户访问令牌
    access_token = client.get_tenant_access_token(APP_ID, APP_SECRET)

    # 获取第一个 sheet_id
    if sheetid is None:
        sheetid = client.get_sheetid(access_token, sheet_token)
    print(f"Sheet ID: {sheetid}")


    # 构建并写入测试数据
    body = pack_data(data,
                     sheetid, start_row=start_row, start_col=start_col)
    if not skip_text:
        client.batch_update_values(access_token, sheet_token, body)

    # merge_cells(client, access_token, sheet_token, sheetid, data)

    # 写入图片
    if not skip_images:
        write_images(client, access_token, sheet_token, sheetid, data, start_row=start_row, start_col=start_col, skip_col=skip_col, compress_image=compress_image, grid_width=grid_width, grid_height=grid_height, border_width=border_width, border_color=border_color)


def get_test_data():
    data = [
        ["标题1", "标题2", "标题3", "图片"],
        [1, 2,2, "http://sns-webpic.xhscdn.com/1040g2sg316vc6tdrk4705o8h0c2095f1else4i8?imageView2/2/w/0/format/jpg/v3"],
        [4, "https://cdn.midjourney.com/f78df4d5-9b8b-4ec7-ae34-5cc04d176f87/0_0.png", 6, "dd"],
        # [7, 8, 9,  "https://sns-webpic.xhscdn.com/1040g2sg317l7814ck4705n3aa5ik4jgjahhcam0?imageView2/2/w/0/format/jpg/v3"],
    ]
    return data

from typing import List, Dict
import pandas as pd
import json
def to_feishu(
    res_list: List[Dict], 
    sheet_id: str = 'Qn9MAs',
    sheet_token: str = 'Rbsysi6FChzCp7tfv19crkWNnEb',
    start_row: int = 1,
    start_col: int = 1,
    grid_width: int = None,
    grid_height: int = None,
    border_width: int = 3,
    border_color: tuple = (200, 200, 200),
) -> None:
    """
    将数据导出到飞书表格
    
    Args:
        res_list: 数据列表
        sheet_id: 表格ID
        sheet_token: 表格token
        start_row: 起始行
        start_col: 起始列
        grid_width: 拼接图片的列数，如果为None则自动计算
        grid_height: 拼接图片的行数，如果为None则自动计算
        border_width: 边框宽度，像素
        border_color: 边框颜色，RGB元组
    """
    from tqdm import tqdm
    
    def truncate_by_bytes(text, max_bytes=450000):
        """按字节长度截断文本"""
        if not text:
            return ""
        text_str = str(text)
        encoded = text_str.encode('utf-8')
        if len(encoded) <= max_bytes:
            return text_str
        # 安全截断，避免截断多字节字符
        truncated = encoded[:max_bytes]
        while len(truncated) > 0:
            try:
                return truncated.decode('utf-8') + "...[已截断]"
            except UnicodeDecodeError:
                truncated = truncated[:-1]
        return ""
    
    res_new_v4 = []
    for row in tqdm(res_list):
        if not row:
            continue
        for k, v in row.items():
            if isinstance(v, list):
                if len(v) > 0 and v[0] and v[0].startswith('http'):
                    row[k] = truncate_by_bytes(str(v))
                else:
                    json_str = json.dumps(v, ensure_ascii=False, separators=(',', ':'))
                    row[k] = truncate_by_bytes(json_str)
            elif isinstance(v, dict):
                json_str = json.dumps(v, ensure_ascii=False, indent=2)
                row[k] = truncate_by_bytes(json_str)
            else:
                row[k] = truncate_by_bytes(v)
        res_new_v4.append(row)
    df = pd.DataFrame(res_new_v4)
    df.fillna('', inplace=True)
    header = df.columns.tolist()
    data_rows = df.values.tolist()
    data_with_header = [header] + data_rows
    
    write_data_to_sheet(
        data_with_header, 
        sheet_token=sheet_token, 
        sheetid=sheet_id, 
        start_col=start_col,
        start_row=start_row,
        grid_width=grid_width,
        grid_height=grid_height,
        border_width=border_width,
        border_color=border_color,
    )

def to_feishu_incremental(
    res_list: List[Dict],
    sort_field: str = '内容ID',
    sheet_id: str = 'Qn9MAs', 
    sheet_token: str = 'Rbsysi6FChzCp7tfv19crkWNnEb',
    unique_field: str = None,  # 用于去重的唯一字段，默认使用sort_field
    duplicate_strategy: str = 'skip',  # 重复数据处理策略：'skip'跳过, 'delete'删除后插入, 'update'更新
    update_fields: List[str] = None,  # 当duplicate_strategy='update'时，指定要更新的字段列表。None表示更新所有字段
    cleanup_duplicates: bool = True,  # 是否先清理现有表格中的重复数据
    keep_first: bool = True,  # 清理重复数据时保留第一个(True)还是最后一个(False)
    sort_ascending: bool = False,  # 排序顺序：True为升序(从小到大)，False为降序(从大到小)
    grid_width: int = None,
    grid_height: int = None,
    border_width: int = 3,
    border_color: tuple = (200, 200, 200),
) -> None:
    """
    逐行增量插入数据到飞书表格，按指定字段查找插入位置
    
    Args:
        res_list: 数据列表
        sort_field: 用于排序的字段名，如 '内容ID'
        sheet_id: 表格ID  
        sheet_token: 表格token
        unique_field: 用于去重的唯一字段，默认使用sort_field
        duplicate_strategy: 重复数据处理策略
            - 'skip': 跳过重复数据（默认）
            - 'delete': 删除重复数据后插入新数据
            - 'update': 更新重复数据的指定字段
        update_fields: 当duplicate_strategy='update'时，指定要更新的字段列表
            - None: 更新所有字段（除了unique_field）
            - ['字段1', '字段2']: 只更新指定的字段
        cleanup_duplicates: 是否先清理现有表格中的重复数据
        keep_first: 清理重复数据时保留第一个(True)还是最后一个(False)
        sort_ascending: 排序顺序，True为升序(从小到大)，False为降序(从大到小)，默认False
        grid_width: 拼接图片的列数，如果为None则自动计算
        grid_height: 拼接图片的行数，如果为None则自动计算
        border_width: 边框宽度，像素
        border_color: 边框颜色，RGB元组
    """
    from tqdm import tqdm
    import pandas as pd
    import json
    from typing import List
    
    def truncate_by_bytes(text, max_bytes=450000):
        """按字节长度截断文本"""
        if not text:
            return ""
        text_str = str(text)
        encoded = text_str.encode('utf-8')
        if len(encoded) <= max_bytes:
            return text_str
        # 安全截断，避免截断多字节字符
        truncated = encoded[:max_bytes]
        while len(truncated) > 0:
            try:
                return truncated.decode('utf-8') + "...[已截断]"
            except UnicodeDecodeError:
                truncated = truncated[:-1]
        return ""
    
    # 初始化 API 客户端
    client = Client(LARK_HOST)
    access_token = client.get_tenant_access_token(APP_ID, APP_SECRET)
    
    # 设置去重字段，默认使用排序字段
    if unique_field is None:
        unique_field = sort_field
    
    # 1. 获取工作表基础信息
    print("正在获取工作表信息...")
    sheet_props = client.get_sheet_properties(access_token, sheet_token, sheet_id)
    
    if not sheet_props:
        print("获取工作表信息失败，使用默认范围")
        max_col = 'ZZ'
        max_row = 1000
    else:
        print(f"工作表信息: 行数={sheet_props['row_count']}, 列数={sheet_props['column_count']}")
        max_col = column_id(sheet_props['column_count']) if sheet_props['column_count'] > 0 else 'ZZ'
        max_row = sheet_props['row_count'] if sheet_props['row_count'] > 0 else 1000
    
    # 2. 读取表头（使用精确范围）
    print("正在读取表头...")
    header_range = f"{sheet_id}!A1:{max_col}1"  # 表头总是从A列开始读取
    header_data = client.read_range_values(access_token, sheet_token, header_range)
    
    if not header_data or not header_data[0] or all(not cell.strip() for cell in header_data[0] if cell):
        print("表格为空，需要根据数据创建表头")
        # 从第一条数据中提取字段名作为表头
        if not res_list or not res_list[0]:
            print("错误：无法从空数据中创建表头")
            return
        
        # 提取字段名
        headers = list(res_list[0].keys())
        print(f"创建表头: {headers}")
        
        # 写入表头（表头不包含图片，使用普通插入即可）
        header_range = f"{sheet_id}!A1:{column_id(len(headers))}1"
        client.insert_data_at_row(access_token, sheet_token, sheet_id, 1, [headers])
        
        # 表头创建后，从第二行开始插入数据
        print("表头创建完成，开始插入数据...")
    else:
        # 解析现有表头
        headers = [cell.strip() for cell in header_data[0] if cell is not None]
        headers = [h for h in headers if h]  # 移除空字段
        print(f"读取到现有表头: {headers}")
    
    # 检查排序字段和去重字段是否存在
    if sort_field not in headers:
        print(f"警告: 排序字段 '{sort_field}' 未在表头中找到。可用字段: {headers}")
        # 如果找不到排序字段，就直接追加到末尾
        # 使用工作表信息中的行数，或从第二行开始（如果刚创建了表头）
        start_row = len(headers) + 1 if 'headers' in locals() else (max_row + 1 if sheet_props else 2)
        to_feishu(res_list, sheet_id, sheet_token, start_row, 1, grid_width, grid_height, border_width, border_color)
        return
    
    if unique_field not in headers:
        print(f"警告: 去重字段 '{unique_field}' 未在表头中找到，将使用排序字段 '{sort_field}' 进行去重")
        unique_field = sort_field
    
    sort_field_index = headers.index(sort_field)
    sort_field_col = column_id(sort_field_index + 1)  # 转换为列标识符，如A, B, C...
    
    unique_field_index = headers.index(unique_field)
    unique_field_col = column_id(unique_field_index + 1)  # 转换为列标识符，如A, B, C...
    
    # 3. 读取排序字段和去重字段的数据
    print(f"正在读取排序字段 '{sort_field}' 和去重字段 '{unique_field}' 列数据...")
    
    # 读取排序字段数据
    sort_data_range = f"{sheet_id}!{sort_field_col}2:{sort_field_col}{max_row}"
    all_sort_data = client.read_range_values(access_token, sheet_token, sort_data_range)
    
    # 读取去重字段数据（如果与排序字段不同）
    if unique_field != sort_field:
        unique_data_range = f"{sheet_id}!{unique_field_col}2:{unique_field_col}{max_row}"
        all_unique_data = client.read_range_values(access_token, sheet_token, unique_data_range)
    else:
        all_unique_data = all_sort_data
    
    # 先清理空白行（排序字段和去重字段都为空的行）
    print("检查并清理空白行...")
    empty_rows_to_delete = []
    
    if all_unique_data and all_sort_data:
        for i in range(min(len(all_unique_data), len(all_sort_data))):
            unique_row = all_unique_data[i] if i < len(all_unique_data) else None
            sort_row = all_sort_data[i] if i < len(all_sort_data) else None
            
            # 检查去重字段值
            unique_value = ""
            if unique_row and len(unique_row) > 0 and unique_row[0]:
                unique_value = str(unique_row[0]).strip()
            
            # 检查排序字段值
            sort_value = ""
            if sort_row and len(sort_row) > 0 and sort_row[0]:
                sort_value = str(sort_row[0]).strip()
            
            # 如果排序字段和去重字段都为空，标记为空白行
            if not unique_value and not sort_value:
                row_number = i + 2  # +2 因为从第2行开始，且行号从1开始
                empty_rows_to_delete.append(row_number)
                print(f"标记删除空白行: 第{row_number}行")
    
    # 删除空白行
    if empty_rows_to_delete:
        print(f"开始删除 {len(empty_rows_to_delete)} 个空白行...")
        # 按行号倒序删除，避免删除后行号变化的问题
        empty_rows_to_delete.sort(reverse=True)
        
        for row_to_delete in empty_rows_to_delete:
            delete_result = client.delete_single_row(access_token, sheet_token, sheet_id, row_to_delete)
            if delete_result:
                print(f"成功删除空白行: 第{row_to_delete}行")
            else:
                print(f"删除空白行失败: 第{row_to_delete}行")
        
        # 重新读取数据（删除后数据已经改变）
        print("重新读取数据（清理空白行后）...")
        # 重新读取排序字段数据
        sort_data_range = f"{sheet_id}!{sort_field_col}2:{sort_field_col}{max_row}"
        all_sort_data = client.read_range_values(access_token, sheet_token, sort_data_range)
        
        # 重新读取去重字段数据
        if unique_field != sort_field:
            unique_data_range = f"{sheet_id}!{unique_field_col}2:{unique_field_col}{max_row}"
            all_unique_data = client.read_range_values(access_token, sheet_token, unique_data_range)
        else:
            all_unique_data = all_sort_data
    
    # 构建现有数据的去重集合
    duplicate_rows_to_delete = []
    
    if cleanup_duplicates and all_unique_data:
        # 先分析重复数据
        seen_unique_values = {}  # 记录已见过的唯一值和对应行号
        actual_data_rows = []  # 记录实际有数据的行号
        
        print(f"开始分析重复数据，总共读取了 {len(all_unique_data)} 行数据")
        
        # 先找出所有有效数据行及其对应的实际行号（必须同时有排序字段和去重字段的值）
        for i in range(min(len(all_unique_data), len(all_sort_data) if all_sort_data else 0)):
            unique_row = all_unique_data[i] if i < len(all_unique_data) else None
            sort_row = all_sort_data[i] if i < len(all_sort_data) else None
            
            # 检查去重字段值
            unique_value = ""
            if unique_row and len(unique_row) > 0 and unique_row[0]:
                unique_value = str(unique_row[0]).strip()
            
            # 检查排序字段值
            sort_value = ""
            if sort_row and len(sort_row) > 0 and sort_row[0]:
                sort_value = str(sort_row[0]).strip()
            
            # 只有当排序字段和去重字段都有值时，才认为是有效数据
            if unique_value and sort_value:
                actual_row_number = i + 2  # +2 因为从第2行开始，且行号从1开始
                actual_data_rows.append((actual_row_number, unique_value, sort_value))
        
        print(f"找到 {len(actual_data_rows)} 行有效数据")
        
        # 分析重复数据
        for actual_row_number, unique_value, sort_value in actual_data_rows:
            if unique_value in seen_unique_values:
                # 发现重复数据
                if keep_first:
                    # 保留第一个，删除当前这个
                    duplicate_rows_to_delete.append(actual_row_number)
                    print(f"标记删除重复行: 第{actual_row_number}行 ({unique_field}={unique_value}, {sort_field}={sort_value})")
                else:
                    # 保留最后一个，删除之前的
                    previous_row = seen_unique_values[unique_value]
                    duplicate_rows_to_delete.append(previous_row)
                    print(f"标记删除重复行: 第{previous_row}行 ({unique_field}={unique_value}, {sort_field}={sort_value})")
                    seen_unique_values[unique_value] = actual_row_number
            else:
                # 第一次见到这个唯一值
                seen_unique_values[unique_value] = actual_row_number
        
        # 执行清理：删除重复行
        if duplicate_rows_to_delete:
            print(f"开始清理 {len(duplicate_rows_to_delete)} 行重复数据...")
            # 按行号倒序删除，避免删除后行号变化的问题
            duplicate_rows_to_delete.sort(reverse=True)
            
            for row_to_delete in duplicate_rows_to_delete:
                delete_result = client.delete_single_row(access_token, sheet_token, sheet_id, row_to_delete)
                if delete_result:
                    print(f"成功删除重复行: 第{row_to_delete}行")
                else:
                    print(f"删除重复行失败: 第{row_to_delete}行")
            
            # 重新读取数据（删除后数据已经改变）
            print("重新读取排序和去重字段数据...")
            # 重新读取排序字段数据
            sort_data_range = f"{sheet_id}!{sort_field_col}2:{sort_field_col}{max_row}"
            all_sort_data = client.read_range_values(access_token, sheet_token, sort_data_range)
            
            # 重新读取去重字段数据
            if unique_field != sort_field:
                unique_data_range = f"{sheet_id}!{unique_field_col}2:{unique_field_col}{max_row}"
                all_unique_data = client.read_range_values(access_token, sheet_token, unique_data_range)
            else:
                all_unique_data = all_sort_data
    
    # 构建最终的去重集合（处理清理后的数据，必须同时有排序字段和去重字段的值）
    existing_unique_values = set()
    existing_unique_rows = {}  # 用于update策略：{unique_value: row_number}
    if all_unique_data and all_sort_data:
        for i in range(min(len(all_unique_data), len(all_sort_data))):
            unique_row = all_unique_data[i] if i < len(all_unique_data) else None
            sort_row = all_sort_data[i] if i < len(all_sort_data) else None
            
            # 检查去重字段值
            unique_value = ""
            if unique_row and len(unique_row) > 0 and unique_row[0]:
                unique_value = str(unique_row[0]).strip()
            
            # 检查排序字段值
            sort_value = ""
            if sort_row and len(sort_row) > 0 and sort_row[0]:
                sort_value = str(sort_row[0]).strip()
            
            # 只有当排序字段和去重字段都有值时，才添加到去重集合
            if unique_value and sort_value:
                actual_row_number = i + 2  # +2 因为从第2行开始，且行号从1开始
                existing_unique_values.add(unique_value)
                existing_unique_rows[unique_value] = actual_row_number
    
    print(f"现有去重值数量: {len(existing_unique_values)}")
    print(existing_unique_values)
    
    # 获取排序数据用于插入位置计算（基于清理后的最新数据）
    sort_data = []
    if all_sort_data:
        # 同时检查排序字段和去重字段，确保数据完整性
        for i in range(min(len(all_sort_data), len(all_unique_data) if all_unique_data else 0)):
            sort_row = all_sort_data[i] if i < len(all_sort_data) else None
            unique_row = all_unique_data[i] if i < len(all_unique_data) else None
            
            # 检查排序字段值
            sort_value = ""
            if sort_row and len(sort_row) > 0 and sort_row[0]:
                sort_value = str(sort_row[0]).strip()
            
            # 检查去重字段值
            unique_value = ""
            if unique_row and len(unique_row) > 0 and unique_row[0]:
                unique_value = str(unique_row[0]).strip()
            
            # 只有当排序字段和去重字段都有值时，才加入排序数据
            if sort_value and unique_value:
                sort_data.append([sort_value])
    
    if not sort_data:
        print("未读取到排序字段数据，所有新数据将从第二行开始插入")
    
    # 处理新数据
    processed_data = []
    for row in tqdm(res_list, desc="处理数据"):
        if not row:
            continue
        processed_row = {}
        for k, v in row.items():
            if isinstance(v, list):
                if len(v) > 0 and v[0] and str(v[0]).startswith('http'):
                    processed_row[k] = truncate_by_bytes(str(v))
                else:
                    json_str = json.dumps(v, ensure_ascii=False, indent=1)
                    processed_row[k] = truncate_by_bytes(json_str)
            elif isinstance(v, dict):
                json_str = json.dumps(v, ensure_ascii=False, indent=1)
                processed_row[k] = truncate_by_bytes(json_str)
            else:
                processed_row[k] = truncate_by_bytes(v)
        processed_data.append(processed_row)
    
    # 转换为DataFrame以便操作
    df_new = pd.DataFrame(processed_data)
    df_new.fillna('', inplace=True)
    
    # 确保新数据包含所有必要的列
    for header in headers:
        if header not in df_new.columns:
            df_new[header] = ''
    
    # 按表头顺序重新排列列
    df_new = df_new.reindex(columns=headers, fill_value='')
    
    # 预处理：过滤重复数据并确定插入顺序
    print(f"预处理新数据：过滤重复并排序...")
    print(f"传入数据总量: {len(df_new)} 行")
    print(f"现有去重集合大小: {len(existing_unique_values)}")
    
    valid_rows = []
    update_rows = []  # 需要更新的行：[{row_number, values, unique_value}, ...]
    skipped_count = 0
    new_data_duplicates = 0  # 新数据内部重复计数
    updated_count = 0  # 更新计数
    
    for idx, new_row in df_new.iterrows():
        new_row_values = new_row.tolist()
        new_sort_value = str(new_row_values[sort_field_index])
        new_unique_value = str(new_row_values[unique_field_index])
        
        # 检查是否与现有数据重复
        if new_unique_value in existing_unique_values:
            if duplicate_strategy == 'update':
                # 更新策略：记录需要更新的行
                target_row = existing_unique_rows[new_unique_value]
                update_rows.append({
                    'row_number': target_row,
                    'values': new_row_values,
                    'unique_value': new_unique_value
                })
                print(f"标记更新现有数据: 第{target_row}行 {unique_field}={new_unique_value}")
                updated_count += 1
                continue
            elif duplicate_strategy == 'delete':
                # 删除策略：先删除现有行，再插入新数据
                target_row = existing_unique_rows[new_unique_value]
                delete_result = client.delete_single_row(access_token, sheet_token, sheet_id, target_row)
                if delete_result:
                    print(f"成功删除重复行: 第{target_row}行 {unique_field}={new_unique_value}")
                    # 从去重集合中移除，允许后续插入
                    existing_unique_values.remove(new_unique_value)
                    # 更新所有行号（删除后后面的行号会前移）
                    for key, row_num in existing_unique_rows.items():
                        if row_num > target_row:
                            existing_unique_rows[key] = row_num - 1
                    del existing_unique_rows[new_unique_value]
                else:
                    print(f"删除重复行失败: 第{target_row}行 {unique_field}={new_unique_value}")
                    skipped_count += 1
                    continue
            else:  # 'skip' 策略
                print(f"跳过与现有数据重复: {unique_field}={new_unique_value}")
                skipped_count += 1
                continue
        
        # 检查新数据内部是否重复
        already_processed = any(row['unique_value'] == new_unique_value for row in valid_rows)
        if already_processed:
            print(f"跳过新数据内部重复: {unique_field}={new_unique_value}")
            new_data_duplicates += 1
            continue
        
        # 添加到待插入列表
        valid_rows.append({
            'values': new_row_values,
            'sort_value': new_sort_value,
            'unique_value': new_unique_value
        })
    
    print(f"预处理完成：有效数据 {len(valid_rows)} 行，需要更新 {len(update_rows)} 行，跳过与现有重复 {skipped_count} 行，跳过新数据内部重复 {new_data_duplicates} 行")
    
    # 处理更新操作
    if update_rows:
        print(f"开始执行更新操作，共 {len(update_rows)} 行...")
        for update_data in tqdm(update_rows, desc="更新数据"):
            row_number = update_data['row_number']
            new_values = update_data['values']
            unique_value = update_data['unique_value']
            
            # 构建字段更新字典
            if update_fields is None:
                # 更新所有字段，但排除unique_field（避免修改关键字段）
                field_updates = {}
                for i, header in enumerate(headers):
                    if header != unique_field:  # 不更新去重字段
                        field_updates[header] = new_values[i]
                print(f"更新第{row_number}行所有字段（除了{unique_field}）: {unique_value}")
            else:
                # 只更新指定字段
                field_updates = {}
                for field_name in update_fields:
                    if field_name in headers:
                        field_index = headers.index(field_name)
                        field_updates[field_name] = new_values[field_index]
                    else:
                        print(f"警告：字段 '{field_name}' 不存在于表头中，跳过")
                print(f"更新第{row_number}行指定字段 {list(field_updates.keys())}: {unique_value}")
            
            # 执行更新
            if field_updates:
                result = client.update_row_with_specific_fields_and_images(
                    access_token, sheet_token, sheet_id, row_number, 
                    field_updates, headers, True, grid_width, grid_height, border_width, border_color
                )
                if result:
                    print(f"✅ 成功更新第{row_number}行")
                else:
                    print(f"❌ 更新第{row_number}行失败")
    
    if not valid_rows:
        if update_rows:
            print("所有数据均为更新操作，无新数据需要插入")
        else:
            print("没有新数据需要插入")
        return
    
    # 按排序字段排序新数据（根据sort_ascending参数决定排序方向）
    if sort_ascending:
        # 升序排序：小的值先插入（reverse=False）
        valid_rows.sort(key=lambda x: x['sort_value'], reverse=False)
        print(f"新数据排序完成，将按升序插入")
    else:
        # 降序排序：大的值先插入（reverse=True）
        valid_rows.sort(key=lambda x: x['sort_value'], reverse=True)
        print(f"新数据排序完成，将按降序插入")
    
    # 逐行插入已排序的数据
    for i, row_data in tqdm(enumerate(valid_rows), total=len(valid_rows), desc="插入数据"):
        new_row_values = row_data['values']
        new_sort_value = row_data['sort_value']
        new_unique_value = row_data['unique_value']
        
        # 找到合适的插入位置（根据sort_ascending参数确定排序方向）
        insert_row = len(sort_data) + 2  # 默认插入到末尾
        
        print(f"查找插入位置，新值: {new_sort_value}")
        
        # 找到两个相邻ID之间的正确插入位置
        if sort_ascending:
            # 升序排列：小 → 大，需要找到 prev_value < new_value < current_value 的位置
            for j in range(len(sort_data)):
                current_value = str(sort_data[j][0]) if sort_data[j] and len(sort_data[j]) > 0 else ""
                prev_value = str(sort_data[j-1][0]) if j > 0 and sort_data[j-1] and len(sort_data[j-1]) > 0 else None
                
                # 检查是否应该插入到当前位置
                if prev_value is None:
                    # 这是第一个位置，检查是否应该插入到最前面
                    if new_sort_value < current_value:
                        insert_row = j + 2  # +2 因为表头偏移
                        print(f"  插入到最前面第{insert_row}行: 新值{new_sort_value} < 第一个值{current_value}")
                        break
                else:
                    # 检查是否在两个相邻值之间
                    if new_sort_value >= prev_value and new_sort_value < current_value:
                        insert_row = j + 2  # +2 因为表头偏移
                        print(f"  插入到第{insert_row}行: {prev_value} <= {new_sort_value} < {current_value}")
                        break
                    elif new_sort_value == current_value:
                        # 值相等时插入到相等值之后
                        insert_row = j + 3  # +2(表头偏移) +1(插入到此行之后)
                        print(f"  插入到第{insert_row}行: 新值{new_sort_value} = 现有值{current_value}，插入其后")
                        break
            
            # 如果遍历完都没有找到位置，说明新值是最大的，插入到末尾
            if insert_row == len(sort_data) + 2:
                last_value = str(sort_data[-1][0]) if sort_data and sort_data[-1] and len(sort_data[-1]) > 0 else "无"
                print(f"  插入到末尾第{insert_row}行: 新值{new_sort_value} > 最后一个值{last_value}")
        else:
            # 降序排列：大 → 小，需要找到 prev_value > new_value > current_value 的位置
            for j in range(len(sort_data)):
                current_value = str(sort_data[j][0]) if sort_data[j] and len(sort_data[j]) > 0 else ""
                prev_value = str(sort_data[j-1][0]) if j > 0 and sort_data[j-1] and len(sort_data[j-1]) > 0 else None
                
                # 检查是否应该插入到当前位置
                if prev_value is None:
                    # 这是第一个位置，检查是否应该插入到最前面
                    if new_sort_value > current_value:
                        insert_row = j + 2  # +2 因为表头偏移
                        print(f"  插入到最前面第{insert_row}行: 新值{new_sort_value} > 第一个值{current_value}")
                        break
                else:
                    # 检查是否在两个相邻值之间
                    if new_sort_value <= prev_value and new_sort_value > current_value:
                        insert_row = j + 2  # +2 因为表头偏移
                        print(f"  插入到第{insert_row}行: {prev_value} >= {new_sort_value} > {current_value}")
                        break
                    elif new_sort_value == current_value:
                        # 值相等时插入到相等值之后
                        insert_row = j + 3  # +2(表头偏移) +1(插入到此行之后)
                        print(f"  插入到第{insert_row}行: 新值{new_sort_value} = 现有值{current_value}，插入其后")
                        break
            
            # 如果遍历完都没有找到位置，说明新值是最小的，插入到末尾
            if insert_row == len(sort_data) + 2:
                last_value = str(sort_data[-1][0]) if sort_data and sort_data[-1] and len(sort_data[-1]) > 0 else "无"
                print(f"  插入到末尾第{insert_row}行: 新值{new_sort_value} < 最后一个值{last_value}")
        
        print(f"[{i+1}/{len(valid_rows)}] 最终插入位置: 第 {insert_row} 行: {sort_field}={new_sort_value}")
        
        # 插入数据到指定行（真正插入新行）
        result = client.insert_row_with_data_at_position(access_token, sheet_token, sheet_id, insert_row, [new_row_values], True, grid_width, grid_height, border_width, border_color)
        
        if result:
            print(f"成功插入数据和图片到第 {insert_row} 行")
            # 更新sort_data：在正确的位置添加新的排序值
            sort_data_index = insert_row - 2  # 转换为sort_data的索引（-2因为表头偏移）
            sort_data.insert(sort_data_index, [new_sort_value])
            # 更新去重集合
            existing_unique_values.add(new_unique_value)
        else:
            print(f"插入数据到第 {insert_row} 行失败")


if __name__ == "__main__":
    # data = get_test_data()
    # sheet_token = 'IoTOsjZ4khIqlOtTxnec8oTbn7c'
    # sheetid = 'K9c4LG'
    # write_data_to_sheet(data, sheetid=sheetid)

    # is_image_cell_result = is_image_cell('["http://sns-webpic-qc.xhscdn.com/202501021415/1a6e88908930afce92b09206d5a482f8/1040g2sg31b74rf6k7g5g5oo7i8vkgev59lkjet0!nd_whlt34_webp_wm_1","http://sns-webpic-qc.xhscdn.com/202501021415/1a6e88908930afce92b09206d5a482f8/1040g2sg31b74rf6k7g5g5oo7i8vkgev59lkjet0!nd_whlt34_webp_wm_1"]')
    # print(is_image_cell_result)
    
    # 新增函数使用示例
    """
    示例：使用 to_feishu_incremental 增量插入数据
    
    # 测试数据
    test_data = [
        {
            '内容ID': '1001', 
            '标题': '测试标题1', 
            '内容': '测试内容1',
            '图片': '["http://example.com/image1.jpg", "http://example.com/image2.jpg"]'
        },
        {
            '内容ID': '1003', 
            '标题': '测试标题2', 
            '内容': '测试内容2',
            '图片': 'http://example.com/image3.jpg'
        }
    ]
    
         # 调用增量插入函数
     to_feishu_incremental(
         res_list=test_data,
         sort_field='内容ID',  # 按此字段排序
         sheet_id='your_sheet_id', 
         sheet_token='your_sheet_token',
         unique_field='内容ID',  # 去重字段，默认使用sort_field
         duplicate_strategy='update',  # 重复处理策略：'skip'跳过, 'delete'删除后插入, 'update'更新指定字段
         update_fields=['标题', '内容', '图片'],  # 当strategy='update'时，只更新这些字段
         cleanup_duplicates=True,  # 先清理现有表格中的重复数据
         keep_first=True,  # 清理时保留第一个重复项
         sort_ascending=False,  # 排序顺序：False为降序(大→小)，True为升序(小→大)
         grid_width=2,  # 图片拼接列数
         grid_height=2,  # 图片拼接行数
     )
    
    # 排序方向示例：
    
    # 示例1：按时间戳降序排序（最新的在前面）- 适合新闻、动态等时间敏感内容
    to_feishu_incremental(
        res_list=news_data,
        sort_field='发布时间',
        sort_ascending=False,  # 降序，最新时间在前面
        # ... 其他参数
    )
    
    # 示例2：按ID升序排序（从小到大）- 适合有明确编号顺序的内容
    to_feishu_incremental(
        res_list=product_data,
        sort_field='产品ID',
        sort_ascending=True,  # 升序，小ID在前面
        # ... 其他参数
    )
    
    # 示例3：按优先级降序排序（高优先级在前面）- 适合任务、问题等需要优先级管理的内容
    to_feishu_incremental(
        res_list=task_data,
        sort_field='优先级',
        sort_ascending=False,  # 降序，高优先级在前面
        # ... 其他参数
    )
    
    功能说明：
    1. **智能表头处理**：
       - 如果表格为空，自动从数据中提取字段名创建表头
       - 如果表格已有数据，读取现有表头结构
    2. **空白行清理**：
       - 自动检测并删除排序字段和去重字段都为空的空白行
       - 确保数据的连续性和逻辑一致性
    3. **重复数据清理**：
       - cleanup_duplicates=True: 先清理现有表格中的重复数据
       - keep_first: 保留第一个或最后一个重复项
    4. **智能去重检查**：
       - 基于 unique_field 字段检查数据是否已存在
       - 预处理阶段过滤重复数据，避免插入过程中的状态变化问题
    5. **排序插入**：根据指定的 sort_field 字段和 sort_ascending 参数查找插入位置
       - sort_ascending=False（默认）：降序排序，较大的值插入到较前面的位置
       - sort_ascending=True：升序排序，较小的值插入到较前面的位置
    6. **逐行数据插入**：按排序顺序逐行插入数据，保持表格整体有序
    7. **完整图片支持**：自动处理图片写入，支持单张图片和图片列表
    8. **图片拼接功能**：支持多图拼接，可设置拼接的行列数和边框样式
    
    适用场景：
    - ✅ 空表格：自动创建表头并插入数据
    - ✅ 已有重复数据的表格：先清理重复，再智能插入
    - ✅ 增量数据更新：逐条插入，保持排序，自动去重
    - ✅ 重复运行安全：不会插入重复数据
    - ✅ 数据清理：一键清理现有重复数据
    - ✅ 灵活排序：支持升序和降序两种排序方式
    """