feishu.py 80 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949
  1. # -*- coding: UTF-8 -*-
  2. import json
  3. import base64
  4. import hashlib
  5. import os
  6. from curl_cffi import requests as mj_requests
  7. import requests
  8. import os
  9. from dotenv import load_dotenv, find_dotenv
  10. load_dotenv(find_dotenv())
  11. # load from env
  12. APP_ID = 'cli_a22acf2916b8500e'
  13. APP_SECRET = 'tE0xAB2gZTMlBGdPczCGLcmpRlZQm5CQ'
  14. LARK_HOST = 'https://open.feishu.cn'
  15. APP_HOST = 'https://open.feishu.cn'
  16. EMAIL = 'semsevens@email.com'
  17. class LarkException(Exception):
  18. def __init__(self, code=0, msg=None):
  19. self.code = code
  20. self.msg = msg
  21. def __str__(self) -> str:
  22. return "{}:{}".format(self.code, self.msg)
  23. __repr__ = __str__
  24. def request(method, url, headers, payload={}):
  25. response = requests.request(method, url, headers=headers, json=payload)
  26. # logging.info("URL: " + url)
  27. # logging.info("X-Tt-Logid: " + response.headers['X-Tt-Logid'])
  28. # logging.info("headers:\n"+json.dumps(headers,indent=2, ensure_ascii=False))
  29. # logging.info("payload:\n"+json.dumps(payload,indent=2, ensure_ascii=False))
  30. resp = {}
  31. if response.text[0] == '{':
  32. resp = response.json()
  33. # logging.info("response:\n"+json.dumps(resp,indent=2, ensure_ascii=False))
  34. else:
  35. pass
  36. # logging.info("response:\n"+response.text)
  37. code = resp.get("code", -1)
  38. if code == -1:
  39. code = resp.get("StatusCode", -1)
  40. if code == -1 and response.status_code != 200:
  41. response.raise_for_status()
  42. if code != 0:
  43. raise LarkException(code=code, msg=resp.get("msg", ""))
  44. return resp
  45. def get_image_data_from_url(img_url, use_cache=True):
  46. # 计算URL的MD5哈希值
  47. url_hash = hashlib.md5(img_url.encode()).hexdigest()
  48. cache_dir = 'image_cache'
  49. cache_file = os.path.join(cache_dir, f'{url_hash}.json')
  50. if use_cache:
  51. # 检查缓存目录是否存在,如果不存在则创建
  52. if not os.path.exists(cache_dir):
  53. os.makedirs(cache_dir)
  54. # 检查缓存文件是否存在
  55. if os.path.exists(cache_file):
  56. with open(cache_file, 'r') as f:
  57. cached_data = json.load(f)
  58. return cached_data['image_data']
  59. # 如果缓存不存在,从URL获取图片
  60. if 'midjourney' in img_url:
  61. proxies = {
  62. 'http': 'http://127.0.0.1:7890',
  63. 'https': 'http://127.0.0.1:7890',
  64. }
  65. # response = mj_requests.get(img_url, impersonate="chrome100", proxies=proxies)
  66. response = mj_requests.get(img_url.replace("https://", "http://"), impersonate="chrome100")
  67. else:
  68. # proxies = {
  69. # 'http': 'http://t10952018781111:1ap37oc3@d844.kdltps.com:15818',
  70. # 'https': 'http://t10952018781111:1ap37oc3@d844.kdltps.com:15818',
  71. # }
  72. # proxies = {
  73. # 'http': None,
  74. # 'https': None,
  75. # }
  76. # response = requests.get(img_url.replace("https://", "http://"), proxies=proxies)
  77. response = requests.get(img_url)
  78. # response = requests.get(img_url, proxies=proxies)
  79. if response.status_code == 200:
  80. image_content = response.content
  81. missing_padding = 4 - len(image_content) % 4
  82. if missing_padding:
  83. image_content += b'=' * missing_padding
  84. image_data = base64.b64encode(image_content).decode('utf-8')
  85. # 将图片数据保存到缓存
  86. with open(cache_file, 'w') as f:
  87. json.dump({'image_data': image_data}, f)
  88. return image_data
  89. else:
  90. # import traceback
  91. # traceback.print_exc()
  92. raise Exception(f"无法从URL获取图片: {img_url}")
  93. from PIL import Image
  94. import io
  95. import os
  96. def get_image_size(img_url):
  97. img_data = get_image_data_from_url(img_url)
  98. img = Image.open(io.BytesIO(base64.b64decode(img_data)))
  99. width, height = img.size
  100. return width, height
  101. if __name__ == "__main__":
  102. img_url = "https://sns-webpic.xhscdn.com/1040g2sg31c4vs26n12a05ph3cdp3cutm5prqo90"
  103. img_data = get_image_data_from_url(img_url)
  104. save_path = "/Users/nieqi/Downloads/save.json"
  105. with open(save_path, 'w') as f:
  106. f.write(img_data)
  107. def column_id(col):
  108. '''column int to string id'''
  109. ans = ""
  110. i = col
  111. while i > 0:
  112. m = int((i-1) % 26)
  113. i = int((i-1) / 26)
  114. ans = chr(m+65) + ans
  115. return ans
  116. def do_compress_image(image_data, image_type):
  117. # 压缩图片
  118. from PIL import Image
  119. import io
  120. import base64
  121. Image.MAX_IMAGE_PIXELS = None # 禁用图片大小限制
  122. # 将base64转为图片对象
  123. image = Image.open(io.BytesIO(base64.b64decode(image_data)))
  124. # 计算压缩后的尺寸,保持宽高比
  125. max_size = 1600
  126. ratio = min(max_size/image.width, max_size/image.height)
  127. if ratio < 1:
  128. new_size = (int(image.width * ratio), int(image.height * ratio))
  129. image = image.resize(new_size, Image.Resampling.LANCZOS)
  130. # 在保存之前转换RGBA为RGB
  131. if image.mode == 'RGBA':
  132. # 创建白色背景
  133. background = Image.new('RGB', image.size, (255, 255, 255))
  134. # 将RGBA图片合成到白色背景上
  135. background.paste(image, mask=image.split()[3]) # 使用alpha通道作为mask
  136. image = background
  137. buffer = io.BytesIO()
  138. # 将 'JPG' 转换为 'JPEG'
  139. if image_type and image_type.upper() == 'JPG':
  140. image_type = 'JPEG'
  141. image_type = 'JPEG'
  142. # image.save(buffer, format=image_type.upper(), quality=95, optimize=True)
  143. image.save(buffer, format=image_type.upper(), quality=85, optimize=True)
  144. image_data = base64.b64encode(buffer.getvalue()).decode()
  145. return image_data
  146. class Client(object):
  147. def __init__(self, lark_host):
  148. self._host = lark_host
  149. def get_tenant_access_token(self, app_id, app_secret):
  150. url = self._host+"/open-apis/auth/v3/app_access_token/internal/"
  151. headers = {
  152. 'Content-Type': 'application/json; charset=utf-8'
  153. }
  154. payload = {
  155. 'app_id': app_id,
  156. 'app_secret': app_secret
  157. }
  158. resp = request("POST", url, headers, payload)
  159. return resp['tenant_access_token']
  160. def get_user_access_token(self, tenant_access_token, code):
  161. url = self._host+"/open-apis/authen/v1/access_token"
  162. headers = {
  163. 'Content-Type': 'application/json; charset=utf-8'
  164. }
  165. payload = {
  166. "grant_type": "authorization_code",
  167. "code": code,
  168. "app_access_token": tenant_access_token
  169. }
  170. resp = request("POST", url, headers, payload)
  171. return resp['data']['access_token']
  172. def get_root_folder_token(self, access_token):
  173. url = self._host+"/open-apis/drive/explorer/v2/root_folder/meta"
  174. headers = {
  175. 'Content-Type': 'application/json; charset=utf-8',
  176. 'Authorization': 'Bearer '+access_token
  177. }
  178. resp = request("GET", url, headers)
  179. return resp['data']['token']
  180. def create_spreadsheet(self, access_token, foldertoken, title):
  181. url =self._host+"/open-apis/sheets/v3/spreadsheets"
  182. headers = {
  183. 'Content-Type': 'application/json; charset=utf-8',
  184. 'Authorization': 'Bearer '+access_token
  185. }
  186. payload={
  187. "title": title,
  188. "folder_token": foldertoken
  189. }
  190. resp = request("POST", url, headers, payload)
  191. return resp['data']['spreadsheet']['spreadsheet_token'], resp['data']['spreadsheet']['url']
  192. def get_sheetid(self, access_token, doctoken, sheet_index=0):
  193. url = self._host+"/open-apis/sheets/v2/spreadsheets/"+doctoken+"/metainfo"
  194. headers = {
  195. 'Content-Type': 'application/json; charset=utf-8',
  196. 'Authorization': 'Bearer '+access_token
  197. }
  198. resp = request("GET", url, headers)
  199. return resp['data']['sheets'][sheet_index]["sheetId"]
  200. def batch_update_values(self, access_token, doctoken, data):
  201. url =self._host+"/open-apis/sheets/v2/spreadsheets/"+doctoken+"/values_batch_update"
  202. headers = {
  203. 'Content-Type': 'application/json; charset=utf-8',
  204. 'Authorization': 'Bearer '+access_token
  205. }
  206. payload=data
  207. resp = request("POST", url, headers, payload)
  208. return resp['data']['spreadsheetToken']
  209. def batch_update_styles(self, access_token, doctoken, data):
  210. url =self._host+"/open-apis/sheets/v2/spreadsheets/"+doctoken+"/styles_batch_update"
  211. headers = {
  212. 'Content-Type': 'application/json; charset=utf-8',
  213. 'Authorization': 'Bearer '+access_token
  214. }
  215. payload=data
  216. resp = request("PUT", url, headers, payload)
  217. return resp['data']['spreadsheetToken']
  218. def add_permissions_member(self, access_token, doctoken, doctype, member_type, member_id, perm):
  219. url = self._host+"/open-apis/drive/v1/permissions/"+doctoken+"/members?type="+doctype+"&need_notification=false"
  220. headers = {
  221. 'Content-Type': 'application/json; charset=utf-8',
  222. 'Authorization': 'Bearer '+access_token
  223. }
  224. payload = {
  225. "member_type": member_type,
  226. "member_id": member_id,
  227. "perm": perm
  228. }
  229. request("POST", url, headers, payload)
  230. def write_image_to_cell(self, access_token, doctoken, sheetid, img_url, row, col, image_type, compress_image=True):
  231. url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/values_image"
  232. headers = {
  233. 'Content-Type': 'application/json; charset=utf-8',
  234. 'Authorization': f'Bearer {access_token}'
  235. }
  236. try:
  237. image_data = get_image_data_from_url(img_url)
  238. except Exception as e:
  239. print(img_url)
  240. print(e)
  241. return None, None
  242. if compress_image:
  243. image_data = do_compress_image(image_data, image_type)
  244. image_name = img_url.split('/')[-1].replace(f'.{image_type}', '') # 从URL中提取文件名
  245. if compress_image:
  246. image_type = 'JPEG'
  247. cell_start = column_id(col)+str(row)
  248. range = f'{sheetid}!{cell_start}:{cell_start}'
  249. payload = {
  250. "range": range,
  251. "image": image_data,
  252. "name": f"{image_name}.{image_type}"
  253. }
  254. try:
  255. resp = request("POST", url, headers, payload)
  256. except Exception as e:
  257. print(img_url)
  258. print(image_name)
  259. print(image_type)
  260. print(e)
  261. return None, None
  262. return resp['data']['revision'], resp['data']['updateRange']
  263. def merge_cells(self, access_token, doctoken, sheetid, start_row, end_row, start_col, end_col):
  264. print(f"merge start_row = {start_row} end_row = {end_row} start_col = {start_col} end_col = {end_col}")
  265. url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/merge_cells"
  266. headers = {
  267. 'Content-Type': 'application/json; charset=utf-8',
  268. 'Authorization': f'Bearer {access_token}'
  269. }
  270. start_col_id = column_id(start_col)
  271. end_col_id = column_id(end_col)
  272. payload = {
  273. "range": f"{sheetid}!{start_col_id}{start_row}:{end_col_id}{end_row}",
  274. "mergeType": "MERGE_ALL",
  275. }
  276. try:
  277. resp = request("POST", url, headers, payload)
  278. except Exception as e:
  279. print(e)
  280. return None
  281. return None
  282. def write_images_to_cell(self, access_token, doctoken, sheetid, img_url_list, row, col, compress_image=True, grid_width=None, grid_height=None, border_width=3, border_color=(200, 200, 200)):
  283. """
  284. 将多张图片拼接后写入单元格
  285. Args:
  286. img_url_list: 图片URL列表
  287. row: 目标单元格行号
  288. col: 目标单元格列号
  289. compress_image: 是否压缩图片
  290. grid_width: 拼接图片的列数,如果为None则自动计算
  291. grid_height: 拼接图片的行数,如果为None则自动计算
  292. border_width: 边框宽度,像素
  293. border_color: 边框颜色,RGB元组
  294. """
  295. from PIL import Image, ImageDraw
  296. import io
  297. import base64
  298. import math
  299. # 下载所有图片
  300. images = []
  301. for img_url in img_url_list:
  302. try:
  303. image_type = get_image_type(img_url)
  304. if not image_type:
  305. continue
  306. image_data = get_image_data_from_url(img_url)
  307. image = Image.open(io.BytesIO(base64.b64decode(image_data)))
  308. images.append(image)
  309. except Exception as e:
  310. print(f"下载图片失败: {img_url}")
  311. print(e)
  312. continue
  313. if not images:
  314. return None, None
  315. # 计算拼接图片的行列数
  316. img_count = len(images)
  317. if grid_width is None and grid_height is None:
  318. # 如果未指定行列数,计算最接近正方形的网格
  319. grid_width = math.ceil(math.sqrt(img_count))
  320. grid_height = math.ceil(img_count / grid_width)
  321. elif grid_width is None:
  322. # 如果只指定了行数,计算列数
  323. grid_width = math.ceil(img_count / grid_height)
  324. elif grid_height is None:
  325. # 如果只指定了列数,计算行数
  326. grid_height = math.ceil(img_count / grid_width)
  327. # 确保网格能容纳所有图片
  328. while grid_width * grid_height < img_count:
  329. if grid_width <= grid_height:
  330. grid_width += 1
  331. else:
  332. grid_height += 1
  333. # 调整所有图片到相同尺寸,保持原始比例
  334. if images:
  335. # 计算目标尺寸(使用平均尺寸作为参考)
  336. avg_width = sum(img.width for img in images) // len(images)
  337. avg_height = sum(img.height for img in images) // len(images)
  338. target_size = (avg_width, avg_height)
  339. # 调整图片尺寸,保持原始比例
  340. resized_images = []
  341. for img in images:
  342. # 计算保持比例的缩放尺寸
  343. img_ratio = img.width / img.height
  344. target_ratio = target_size[0] / target_size[1]
  345. if img_ratio > target_ratio:
  346. # 图片比目标更宽,以宽度为准
  347. new_width = target_size[0]
  348. new_height = int(target_size[0] / img_ratio)
  349. else:
  350. # 图片比目标更高,以高度为准
  351. new_height = target_size[1]
  352. new_width = int(target_size[1] * img_ratio)
  353. # 缩放图片,保持比例
  354. resized_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
  355. resized_images.append(resized_img)
  356. # 创建拼接画布
  357. canvas_width = grid_width * avg_width + (grid_width + 1) * border_width
  358. canvas_height = grid_height * avg_height + (grid_height + 1) * border_width
  359. canvas = Image.new('RGB', (canvas_width, canvas_height), border_color)
  360. # 拼接图片
  361. for i, img in enumerate(resized_images):
  362. row_idx = i // grid_width
  363. col_idx = i % grid_width
  364. # 计算每个网格单元的位置
  365. cell_x = col_idx * avg_width + (col_idx + 1) * border_width
  366. cell_y = row_idx * avg_height + (row_idx + 1) * border_width
  367. # 在网格单元中居中放置图片
  368. center_x = cell_x + (avg_width - img.width) // 2
  369. center_y = cell_y + (avg_height - img.height) // 2
  370. canvas.paste(img, (center_x, center_y))
  371. # 将拼接后的图片转换为base64
  372. output = io.BytesIO()
  373. if compress_image:
  374. canvas.save(output, format='JPEG', quality=85)
  375. image_type = 'JPEG'
  376. else:
  377. canvas.save(output, format='PNG')
  378. image_type = 'PNG'
  379. output.seek(0)
  380. image_data = base64.b64encode(output.getvalue()).decode()
  381. # 调用写入图片的API
  382. url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/values_image"
  383. headers = {
  384. 'Content-Type': 'application/json; charset=utf-8',
  385. 'Authorization': f'Bearer {access_token}'
  386. }
  387. cell_start = column_id(col) + str(row)
  388. range_val = f'{sheetid}!{cell_start}:{cell_start}'
  389. payload = {
  390. "range": range_val,
  391. "image": image_data,
  392. "name": f"combined_image.{image_type}"
  393. }
  394. try:
  395. resp = request("POST", url, headers, payload)
  396. return resp['data']['revision'], resp['data']['updateRange']
  397. except Exception as e:
  398. print(f"写入拼接图片失败: {e}")
  399. return None, None
  400. return None, None
  401. def read_range_values(self, access_token, doctoken, range_val):
  402. """
  403. 读取指定范围的数据
  404. Args:
  405. access_token: 访问令牌
  406. doctoken: 表格token
  407. range_val: 范围,格式如 "Sheet1!A1:C10"
  408. Returns:
  409. 读取到的数据列表
  410. """
  411. url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/values/{range_val}"
  412. headers = {
  413. 'Content-Type': 'application/json; charset=utf-8',
  414. 'Authorization': f'Bearer {access_token}'
  415. }
  416. try:
  417. resp = request("GET", url, headers)
  418. return resp['data']['valueRange']['values']
  419. except Exception as e:
  420. print(f"读取数据失败: {e}")
  421. return []
  422. def prepend_data(self, access_token, doctoken, range_val, values):
  423. """
  424. 在指定位置前面插入数据
  425. Args:
  426. access_token: 访问令牌
  427. doctoken: 表格token
  428. range_val: 插入范围,格式如 "Sheet1!A1:C1"
  429. values: 要插入的数据
  430. Returns:
  431. 操作结果
  432. """
  433. url = f"{self._host}/open-apis/sheets/v3/spreadsheets/{doctoken}/sheets/{range_val.split('!')[0]}/prepend"
  434. headers = {
  435. 'Content-Type': 'application/json; charset=utf-8',
  436. 'Authorization': f'Bearer {access_token}'
  437. }
  438. # 从range_val中提取行数
  439. range_part = range_val.split('!')[1] # 如 "A1:Z1"
  440. start_cell = range_part.split(':')[0] # 如 "A1"
  441. payload = {
  442. "values": values
  443. }
  444. try:
  445. resp = request("POST", url, headers, payload)
  446. return resp
  447. except Exception as e:
  448. print(f"插入数据失败: {e}")
  449. return None
  450. def insert_data_at_row(self, access_token, doctoken, sheetid, row, values):
  451. """
  452. 在指定行插入数据(使用批量更新方式)
  453. Args:
  454. access_token: 访问令牌
  455. doctoken: 表格token
  456. sheetid: 工作表ID
  457. row: 目标行号
  458. values: 要插入的数据
  459. Returns:
  460. 操作结果
  461. """
  462. # 使用批量更新的方式插入数据
  463. cols = len(values[0]) if values else 1
  464. end_col = column_id(cols)
  465. range_val = f"{sheetid}!A{row}:{end_col}{row}"
  466. body = {
  467. "valueRanges": [
  468. {
  469. "range": range_val,
  470. "values": values
  471. }
  472. ]
  473. }
  474. try:
  475. result = self.batch_update_values(access_token, doctoken, body)
  476. return result
  477. except Exception as e:
  478. print(f"插入数据到第{row}行失败: {e}")
  479. return None
  480. def insert_rows_before(self, access_token, doctoken, sheetid, row_index, count=1, inherit_style="BEFORE"):
  481. """
  482. 在指定行前插入新行(基于飞书 insert_dimension_range API)
  483. 注意: insert_dimension_range 要求 endIndex <= 当前 sheet 行数。
  484. 如果 sheet 行数不够(例如删除旧数据后只剩 2 行,却要插入 5000 行),
  485. 需先调用 append_empty_rows 扩展占位行,再调用本方法插入带样式的行。
  486. 典型用法:
  487. client.append_empty_rows(token, doc, sheet, count) # 先扩展
  488. client.insert_rows_before(token, doc, sheet, row, count) # 再插入(继承样式)
  489. # 写完数据后删除多余占位行
  490. Args:
  491. access_token: 访问令牌
  492. doctoken: 表格token
  493. sheetid: 工作表ID
  494. row_index: 插入位置的行号(从1开始,在此行前插入)
  495. count: 插入行数(默认1行)
  496. inherit_style: 样式继承方向,"BEFORE"/"AFTER"/None
  497. Returns:
  498. 操作结果
  499. Raises:
  500. Exception: 插入失败时抛出,由调用方处理
  501. """
  502. # 先获取工作表信息,检查当前行数
  503. sheet_props = self.get_sheet_properties(access_token, doctoken, sheetid)
  504. if not sheet_props:
  505. print("无法获取工作表信息,尝试直接插入")
  506. current_row_count = 1000 # 默认值
  507. else:
  508. current_row_count = sheet_props['row_count']
  509. print(f"当前工作表行数: {current_row_count}")
  510. # 不能在不存在的行前面插入(允许在末尾+1位置插入)
  511. if row_index > current_row_count + 1:
  512. print(f"插入位置({row_index})超过当前行数+1({current_row_count + 1}),使用追加模式")
  513. return self.append_empty_rows(access_token, doctoken, sheetid, count)
  514. url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/insert_dimension_range"
  515. headers = {
  516. 'Content-Type': 'application/json; charset=utf-8',
  517. 'Authorization': f'Bearer {access_token}'
  518. }
  519. # 转换为0基索引:row_index=3表示第3行,对应startIndex=2
  520. start_index = row_index - 1 # 从0开始计数
  521. end_index = start_index + count # 结束位置(不包含)
  522. payload = {
  523. "dimension": {
  524. "sheetId": sheetid,
  525. "majorDimension": "ROWS",
  526. "startIndex": start_index, # 从0开始计数
  527. "endIndex": end_index # 结束位置(不包含此行)
  528. }
  529. }
  530. if inherit_style is not None:
  531. payload["inheritStyle"] = inherit_style
  532. try:
  533. resp = request("POST", url, headers, payload)
  534. print(f"在第{row_index}行前成功插入{count}行(startIndex={start_index}, endIndex={end_index}, inheritStyle={inherit_style})")
  535. return resp
  536. except Exception as e:
  537. print(f"在第{row_index}行前插入{count}行失败: {e}")
  538. raise # 让调用方决定如何处理
  539. def insert_row_with_images(self, access_token, doctoken, sheetid, row, values, compress_image=True, grid_width=None, grid_height=None, border_width=3, border_color=(200, 200, 200)):
  540. """
  541. 在指定行插入数据并同时处理图片写入(覆盖方式)
  542. Args:
  543. access_token: 访问令牌
  544. doctoken: 表格token
  545. sheetid: 工作表ID
  546. row: 目标行号
  547. values: 要插入的数据
  548. compress_image: 是否压缩图片
  549. grid_width: 拼接图片的列数
  550. grid_height: 拼接图片的行数
  551. border_width: 边框宽度
  552. border_color: 边框颜色
  553. Returns:
  554. 操作结果
  555. """
  556. # 1. 先插入文本数据(覆盖指定行)
  557. result = self.insert_data_at_row(access_token, doctoken, sheetid, row, values)
  558. if not result:
  559. return None
  560. # 2. 同时处理图片写入
  561. if values and len(values) > 0:
  562. row_data = values[0]
  563. for col_index, cell in enumerate(row_data, start=1):
  564. if is_image_list_cell_url(cell):
  565. # 处理图片列表
  566. try:
  567. img_urls = eval(cell)
  568. self.write_images_to_cell(access_token, doctoken, sheetid, img_urls, row, col_index, compress_image, grid_width, grid_height, border_width, border_color)
  569. except Exception as e:
  570. print(f"写入图片列表失败 (第{row}行第{col_index}列): {e}")
  571. elif is_image_cell(cell):
  572. # 处理单张图片
  573. image_type = get_image_type(cell)
  574. if image_type:
  575. try:
  576. self.write_image_to_cell(access_token, doctoken, sheetid, cell, row, col_index, image_type, compress_image)
  577. except Exception as e:
  578. print(f"写入单张图片失败 (第{row}行第{col_index}列): {e}")
  579. return result
  580. def update_specific_fields(self, access_token, doctoken, sheetid, row, field_updates, headers=None):
  581. """
  582. 只更新指定字段,其他字段保持不变
  583. Args:
  584. access_token: 访问令牌
  585. doctoken: 表格token
  586. sheetid: 工作表ID
  587. row: 目标行号(从1开始)
  588. field_updates: 字段更新字典,格式如 {"列名": "新值", "列B": "新值B"}
  589. 或者 {列索引: "新值", 2: "新值B"}(从1开始计数)
  590. headers: 表头列表,用于列名到列索引的映射。如果为None,则field_updates的key必须是列索引
  591. Returns:
  592. 操作结果
  593. """
  594. try:
  595. # 如果提供了headers且field_updates的key是列名,则转换为列索引
  596. if headers and field_updates:
  597. column_updates = {}
  598. for field_name, value in field_updates.items():
  599. if isinstance(field_name, str): # 如果是列名
  600. try:
  601. col_index = headers.index(field_name) + 1 # 转为1基索引
  602. column_updates[col_index] = value
  603. except ValueError:
  604. print(f"警告:找不到列名 '{field_name}',跳过更新")
  605. continue
  606. else: # 如果已经是列索引
  607. column_updates[field_name] = value
  608. else:
  609. column_updates = field_updates
  610. # 构建批量更新请求
  611. value_ranges = []
  612. for col_index, value in column_updates.items():
  613. col_letter = column_id(col_index)
  614. range_val = f"{sheetid}!{col_letter}{row}:{col_letter}{row}"
  615. value_ranges.append({
  616. "range": range_val,
  617. "values": [[value]]
  618. })
  619. body = {
  620. "valueRanges": value_ranges
  621. }
  622. result = self.batch_update_values(access_token, doctoken, body)
  623. if result:
  624. updated_fields = list(column_updates.keys())
  625. print(f"成功更新第{row}行的字段: {updated_fields}")
  626. return result
  627. except Exception as e:
  628. print(f"更新第{row}行指定字段失败: {e}")
  629. return None
  630. def update_row_with_specific_fields_and_images(self, access_token, doctoken, sheetid, row, field_updates, headers=None, compress_image=True, grid_width=None, grid_height=None, border_width=3, border_color=(200, 200, 200)):
  631. """
  632. 更新指定字段并处理图片
  633. Args:
  634. access_token: 访问令牌
  635. doctoken: 表格token
  636. sheetid: 工作表ID
  637. row: 目标行号
  638. field_updates: 字段更新字典
  639. headers: 表头列表
  640. compress_image: 是否压缩图片
  641. grid_width: 拼接图片的列数
  642. grid_height: 拼接图片的行数
  643. border_width: 边框宽度
  644. border_color: 边框颜色
  645. Returns:
  646. 操作结果
  647. """
  648. # 1. 先更新文本数据
  649. result = self.update_specific_fields(access_token, doctoken, sheetid, row, field_updates, headers)
  650. if not result:
  651. return None
  652. # 2. 处理图片写入
  653. column_updates = {}
  654. if headers and field_updates:
  655. for field_name, value in field_updates.items():
  656. if isinstance(field_name, str): # 如果是列名
  657. try:
  658. col_index = headers.index(field_name) + 1
  659. column_updates[col_index] = value
  660. except ValueError:
  661. continue
  662. else: # 如果已经是列索引
  663. column_updates[field_name] = value
  664. else:
  665. column_updates = field_updates
  666. for col_index, cell in column_updates.items():
  667. if is_image_list_cell_url(cell):
  668. # 处理图片列表
  669. try:
  670. img_urls = eval(cell)
  671. self.write_images_to_cell(access_token, doctoken, sheetid, img_urls, row, col_index, compress_image, grid_width, grid_height, border_width, border_color)
  672. except Exception as e:
  673. print(f"写入图片列表失败 (第{row}行第{col_index}列): {e}")
  674. elif is_image_cell(cell):
  675. # 处理单张图片
  676. image_type = get_image_type(cell)
  677. if image_type:
  678. try:
  679. self.write_image_to_cell(access_token, doctoken, sheetid, cell, row, col_index, image_type, compress_image)
  680. except Exception as e:
  681. print(f"写入单张图片失败 (第{row}行第{col_index}列): {e}")
  682. return result
  683. def insert_row_with_data_at_position(self, access_token, doctoken, sheetid, row_position, values, compress_image=True, grid_width=None, grid_height=None, border_width=3, border_color=(200, 200, 200)):
  684. """
  685. 在指定位置真正插入新行并填入数据
  686. Args:
  687. access_token: 访问令牌
  688. doctoken: 表格token
  689. sheetid: 工作表ID
  690. row_position: 插入位置(从1开始,在此行前插入)
  691. values: 要插入的数据
  692. compress_image: 是否压缩图片
  693. grid_width: 拼接图片的列数
  694. grid_height: 拼接图片的行数
  695. border_width: 边框宽度
  696. border_color: 边框颜色
  697. Returns:
  698. 操作结果
  699. """
  700. # 获取当前工作表行数
  701. sheet_props = self.get_sheet_properties(access_token, doctoken, sheetid)
  702. current_row_count = sheet_props['row_count'] if sheet_props else 1
  703. # 1. 先插入空行
  704. insert_result = self.insert_rows_before(access_token, doctoken, sheetid, row_position, 1)
  705. if not insert_result:
  706. print(f"插入空行失败,无法在第{row_position}行插入数据")
  707. return None
  708. # 如果是追加模式(插入位置超过了原有行数),实际数据位置是当前行数+1
  709. actual_row_position = row_position
  710. if row_position > current_row_count:
  711. actual_row_position = current_row_count + 1
  712. print(f"追加模式:实际数据插入位置调整为第{actual_row_position}行")
  713. # 2. 再在新插入的行中填入数据
  714. result = self.insert_data_at_row(access_token, doctoken, sheetid, actual_row_position, values)
  715. if not result:
  716. print(f"插入数据失败")
  717. return None
  718. # 3. 同时处理图片写入
  719. if values and len(values) > 0:
  720. row_data = values[0]
  721. for col_index, cell in enumerate(row_data, start=1):
  722. if is_image_list_cell_url(cell):
  723. # 处理图片列表
  724. try:
  725. img_urls = eval(cell)
  726. self.write_images_to_cell(access_token, doctoken, sheetid, img_urls, actual_row_position, col_index, compress_image, grid_width, grid_height, border_width, border_color)
  727. except Exception as e:
  728. print(f"写入图片列表失败 (第{actual_row_position}行第{col_index}列): {e}")
  729. elif is_image_cell(cell):
  730. # 处理单张图片
  731. image_type = get_image_type(cell)
  732. if image_type:
  733. try:
  734. self.write_image_to_cell(access_token, doctoken, sheetid, cell, actual_row_position, col_index, image_type, compress_image)
  735. except Exception as e:
  736. print(f"写入单张图片失败 (第{actual_row_position}行第{col_index}列): {e}")
  737. return result
  738. def get_sheet_info(self, access_token, doctoken, sheetid):
  739. """
  740. 获取工作表的基础信息
  741. Args:
  742. access_token: 访问令牌
  743. doctoken: 表格token
  744. sheetid: 工作表ID
  745. Returns:
  746. 工作表信息,包含行数、列数等
  747. """
  748. url = f"{self._host}/open-apis/sheets/v3/spreadsheets/{doctoken}/sheets/{sheetid}"
  749. headers = {
  750. 'Content-Type': 'application/json; charset=utf-8',
  751. 'Authorization': f'Bearer {access_token}'
  752. }
  753. try:
  754. resp = request("GET", url, headers)
  755. return resp['data']['sheet']
  756. except Exception as e:
  757. print(f"获取工作表信息失败: {e}")
  758. return None
  759. def get_sheet_properties(self, access_token, doctoken, sheetid):
  760. """
  761. 获取工作表属性,包括行数和列数
  762. Args:
  763. access_token: 访问令牌
  764. doctoken: 表格token
  765. sheetid: 工作表ID
  766. Returns:
  767. dict: 包含 row_count, column_count 等信息
  768. """
  769. sheet_info = self.get_sheet_info(access_token, doctoken, sheetid)
  770. if sheet_info:
  771. grid_properties = sheet_info.get('grid_properties', {})
  772. return {
  773. 'row_count': grid_properties.get('row_count', 0),
  774. 'column_count': grid_properties.get('column_count', 0),
  775. 'title': sheet_info.get('title', ''),
  776. 'sheet_id': sheet_info.get('sheet_id', ''),
  777. 'sheet_type': sheet_info.get('sheet_type', '')
  778. }
  779. return None
  780. def append_data(self, access_token, doctoken, range_val, values):
  781. """
  782. 在指定位置后面追加数据
  783. Args:
  784. access_token: 访问令牌
  785. doctoken: 表格token
  786. range_val: 追加范围,格式如 "Sheet1!A1:C1"
  787. values: 要追加的数据
  788. Returns:
  789. 操作结果
  790. """
  791. url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/values_append"
  792. headers = {
  793. 'Content-Type': 'application/json; charset=utf-8',
  794. 'Authorization': f'Bearer {access_token}'
  795. }
  796. payload = {
  797. "valueRange": {
  798. "range": range_val,
  799. "values": values
  800. }
  801. }
  802. try:
  803. resp = request("POST", url, headers, payload)
  804. return resp
  805. except Exception as e:
  806. print(f"追加数据失败: {e}")
  807. return None
  808. def delete_rows(self, access_token, doctoken, sheetid, start_row, end_row):
  809. """
  810. 删除指定范围的行
  811. Args:
  812. access_token: 访问令牌
  813. doctoken: 表格token
  814. sheetid: 工作表ID
  815. start_row: 开始行号(从1开始)
  816. end_row: 结束行号(从1开始,包含)
  817. Returns:
  818. 操作结果
  819. """
  820. url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/dimension_range"
  821. headers = {
  822. 'Content-Type': 'application/json; charset=utf-8',
  823. 'Authorization': f'Bearer {access_token}'
  824. }
  825. payload = {
  826. "dimension": {
  827. "sheetId": sheetid,
  828. "majorDimension": "ROWS",
  829. "startIndex": start_row, # 1-based inclusive
  830. "endIndex": end_row # 1-based inclusive
  831. }
  832. }
  833. try:
  834. resp = request("DELETE", url, headers, payload)
  835. return resp
  836. except Exception as e:
  837. print(f"删除第{start_row}-{end_row}行失败: {e}")
  838. return None
  839. def delete_single_row(self, access_token, doctoken, sheetid, row):
  840. """
  841. 删除单行
  842. Args:
  843. access_token: 访问令牌
  844. doctoken: 表格token
  845. sheetid: 工作表ID
  846. row: 行号(从1开始)
  847. Returns:
  848. 操作结果
  849. """
  850. return self.delete_rows(access_token, doctoken, sheetid, row, row)
  851. def append_empty_rows(self, access_token, doctoken, sheetid, count=1):
  852. """
  853. 在工作表末尾追加空行(不继承样式)
  854. 常与 insert_rows_before 配合使用:先用本方法扩展 sheet 行数(绕过
  855. insert_dimension_range 的 endIndex <= sheetMaxRowCount 限制),
  856. 再用 insert_rows_before 插入带样式的行。
  857. Args:
  858. access_token: 访问令牌
  859. doctoken: 表格token
  860. sheetid: 工作表ID
  861. count: 追加行数(默认1行)
  862. Returns:
  863. 操作结果
  864. """
  865. # 获取当前工作表信息
  866. sheet_props = self.get_sheet_properties(access_token, doctoken, sheetid)
  867. if not sheet_props:
  868. print("无法获取工作表信息,追加失败")
  869. return None
  870. current_row_count = sheet_props['row_count']
  871. current_col_count = sheet_props['column_count']
  872. print(f"在工作表末尾追加{count}行,当前行数: {current_row_count}")
  873. # 构造空数据行
  874. empty_values = [[''] * max(current_col_count, 1) for _ in range(count)]
  875. # 使用append_data在末尾追加
  876. range_val = f"{sheetid}!A{current_row_count + 1}:{column_id(max(current_col_count, 1))}{current_row_count + count}"
  877. try:
  878. result = self.append_data(access_token, doctoken, range_val, empty_values)
  879. if result:
  880. print(f"成功在末尾追加{count}行空行")
  881. return result
  882. except Exception as e:
  883. print(f"追加空行失败: {e}")
  884. return None
  885. # -*- coding: UTF-8 -*-
  886. import json
  887. import logging
  888. from datetime import datetime
  889. import re
  890. import os
  891. import requests
  892. from urllib.parse import urlparse
  893. LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
  894. logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
  895. import os
  896. logging.info(os.getcwd())
  897. def column_id(col):
  898. '''column int to string id'''
  899. ans = ""
  900. i = col
  901. while i > 0:
  902. m = int((i-1) % 26)
  903. i = int((i-1) / 26)
  904. ans = chr(m+65) + ans
  905. return ans
  906. def get_image_type(url):
  907. '''根据图片URL获取图片类型'''
  908. try:
  909. # 发送 HEAD 请求以获取头信息
  910. path = urlparse(url).path
  911. ext = path.split('.')[-1].lower()
  912. if ext in ['jpg', 'jpeg', 'png', 'gif']:
  913. return ext
  914. ext = 'jpeg'
  915. if 'jpg' in url:
  916. ext = 'jpg'
  917. if 'jpeg' in url:
  918. ext = 'jpeg'
  919. if 'png' in url:
  920. ext = 'png'
  921. if 'gif' in url:
  922. ext = 'gif'
  923. if "webp" in url:
  924. ext = "webp"
  925. # 如果无法确定类型,返回 None
  926. return ext
  927. except Exception as e:
  928. print(f"获取图片类型时出错: {str(e)}")
  929. return None
  930. def is_image_cell(cell):
  931. # 判断是否包含中文字符
  932. if isinstance(cell, str):
  933. for char in cell:
  934. if '\u4e00' <= char <= '\u9fff':
  935. return False
  936. is_image = False
  937. if (
  938. isinstance(cell, str) and
  939. cell.startswith('http') and
  940. (
  941. re.match(r'https?://.+\.(jpg|jpeg|png|gif|webp).*', cell, re.I) or re.match(r'http?://.+\.(jpg|jpeg|png|gif|webp).*', cell, re.I) or
  942. ('xhscdn.com' in cell and 'format/jpg' in cell) or
  943. ('rescdn.yishihui.com' in cell and 'jpg' in cell) or
  944. 'sns-webpic-qc.xhscdn.com' in cell or 'ci.xiaohongshu.com' in cell
  945. )
  946. ):
  947. is_image = True
  948. return is_image
  949. def is_image_list_cell_url(cell):
  950. if isinstance(cell, str) and cell.strip() and cell[0] == '[' and cell[-1] == ']':
  951. try:
  952. cell_obj = eval(cell)
  953. except:
  954. return False
  955. if type(cell_obj) == list:
  956. for c in cell_obj:
  957. if not is_image_cell(c):
  958. return False
  959. return True
  960. return False
  961. def write_images(client, access_token, token, sheetid, data, start_row=1, start_col=1, skip_col=[], compress_image=True, grid_width=None, grid_height=None, border_width=3, border_color=(200, 200, 200)):
  962. '''将图片URL写入单元格'''
  963. for row_index, row in enumerate(data, start=1):
  964. if row_index < start_row:
  965. print(f"跳过行: {row_index}")
  966. continue
  967. for col_index, cell in enumerate(row, start=1):
  968. # if cell is not None and "http" in cell and is_image_cell(cell) is False:
  969. # print(f"is_image_cell = {is_image_cell(cell)}, {cell}")
  970. if col_index < start_col:
  971. continue
  972. if col_index in skip_col:
  973. continue
  974. if is_image_list_cell_url(cell):
  975. # print(f"is_image_list_cell_url = True , {cell}")
  976. client.write_images_to_cell(access_token, token, sheetid, eval(cell), row_index, col_index, compress_image, grid_width, grid_height, border_width, border_color)
  977. elif is_image_cell(cell):
  978. image_type = get_image_type(cell)
  979. if image_type:
  980. client.write_image_to_cell(access_token, token, sheetid, cell, row_index, col_index,image_type, compress_image)
  981. def merge_cells(client, access_token, token, sheetid, data ):
  982. row_cnt = len(data)
  983. col_cnt = len(data[0])
  984. for col in range(0,col_cnt):
  985. previous_row = 0
  986. previous_value = None
  987. for row in range(0,row_cnt):
  988. cell_value = data[row][col]
  989. if cell_value != previous_value :
  990. if row - previous_row > 1:
  991. client.merge_cells(access_token, token, sheetid, previous_row+1, row, col+1, col+1)
  992. previous_row = row
  993. previous_value= cell_value
  994. def pack_data(data, sheetid, start_row=1, start_col=1):
  995. rows = len(data)
  996. cols = len(data[0])
  997. range1 = f"{sheetid}!{column_id(start_col)}{start_row}:{column_id(cols)}{rows}"
  998. body = {
  999. "valueRanges": [
  1000. {
  1001. "range": range1,
  1002. "values": []
  1003. },
  1004. ]
  1005. }
  1006. print(range1)
  1007. for d in data[start_row-1:]:
  1008. row = []
  1009. for c in d[start_col-1:]:
  1010. row.append(c)
  1011. body["valueRanges"][0]["values"].append(row)
  1012. return body
  1013. def write_data_to_sheet(data, sheet_token='IoTOsjZ4khIqlOtTxnec8oTbn7c', sheetid=None, skip_text=False, skip_images=False, start_row=1, start_col=1, skip_col=[], compress_image=True, grid_width=None, grid_height=None, border_width=3, border_color=(200, 200, 200)):
  1014. '''测试函数'''
  1015. # 初始化 API 客户端
  1016. client = Client(LARK_HOST)
  1017. # 获取租户访问令牌
  1018. access_token = client.get_tenant_access_token(APP_ID, APP_SECRET)
  1019. # 获取第一个 sheet_id
  1020. if sheetid is None:
  1021. sheetid = client.get_sheetid(access_token, sheet_token)
  1022. print(f"Sheet ID: {sheetid}")
  1023. # 构建并写入测试数据
  1024. body = pack_data(data,
  1025. sheetid, start_row=start_row, start_col=start_col)
  1026. if not skip_text:
  1027. client.batch_update_values(access_token, sheet_token, body)
  1028. # merge_cells(client, access_token, sheet_token, sheetid, data)
  1029. # 写入图片
  1030. if not skip_images:
  1031. write_images(client, access_token, sheet_token, sheetid, data, start_row=start_row, start_col=start_col, skip_col=skip_col, compress_image=compress_image, grid_width=grid_width, grid_height=grid_height, border_width=border_width, border_color=border_color)
  1032. def get_test_data():
  1033. data = [
  1034. ["标题1", "标题2", "标题3", "图片"],
  1035. [1, 2,2, "http://sns-webpic.xhscdn.com/1040g2sg316vc6tdrk4705o8h0c2095f1else4i8?imageView2/2/w/0/format/jpg/v3"],
  1036. [4, "https://cdn.midjourney.com/f78df4d5-9b8b-4ec7-ae34-5cc04d176f87/0_0.png", 6, "dd"],
  1037. # [7, 8, 9, "https://sns-webpic.xhscdn.com/1040g2sg317l7814ck4705n3aa5ik4jgjahhcam0?imageView2/2/w/0/format/jpg/v3"],
  1038. ]
  1039. return data
  1040. from typing import List, Dict
  1041. import pandas as pd
  1042. import json
  1043. def to_feishu(
  1044. res_list: List[Dict],
  1045. sheet_id: str = 'Qn9MAs',
  1046. sheet_token: str = 'Rbsysi6FChzCp7tfv19crkWNnEb',
  1047. start_row: int = 1,
  1048. start_col: int = 1,
  1049. grid_width: int = None,
  1050. grid_height: int = None,
  1051. border_width: int = 3,
  1052. border_color: tuple = (200, 200, 200),
  1053. ) -> None:
  1054. """
  1055. 将数据导出到飞书表格
  1056. Args:
  1057. res_list: 数据列表
  1058. sheet_id: 表格ID
  1059. sheet_token: 表格token
  1060. start_row: 起始行
  1061. start_col: 起始列
  1062. grid_width: 拼接图片的列数,如果为None则自动计算
  1063. grid_height: 拼接图片的行数,如果为None则自动计算
  1064. border_width: 边框宽度,像素
  1065. border_color: 边框颜色,RGB元组
  1066. """
  1067. from tqdm import tqdm
  1068. def truncate_by_bytes(text, max_bytes=450000):
  1069. """按字节长度截断文本"""
  1070. if not text:
  1071. return ""
  1072. text_str = str(text)
  1073. encoded = text_str.encode('utf-8')
  1074. if len(encoded) <= max_bytes:
  1075. return text_str
  1076. # 安全截断,避免截断多字节字符
  1077. truncated = encoded[:max_bytes]
  1078. while len(truncated) > 0:
  1079. try:
  1080. return truncated.decode('utf-8') + "...[已截断]"
  1081. except UnicodeDecodeError:
  1082. truncated = truncated[:-1]
  1083. return ""
  1084. res_new_v4 = []
  1085. for row in tqdm(res_list):
  1086. if not row:
  1087. continue
  1088. for k, v in row.items():
  1089. if isinstance(v, list):
  1090. if len(v) > 0 and v[0] and v[0].startswith('http'):
  1091. row[k] = truncate_by_bytes(str(v))
  1092. else:
  1093. json_str = json.dumps(v, ensure_ascii=False, separators=(',', ':'))
  1094. row[k] = truncate_by_bytes(json_str)
  1095. elif isinstance(v, dict):
  1096. json_str = json.dumps(v, ensure_ascii=False, indent=2)
  1097. row[k] = truncate_by_bytes(json_str)
  1098. else:
  1099. row[k] = truncate_by_bytes(v)
  1100. res_new_v4.append(row)
  1101. df = pd.DataFrame(res_new_v4)
  1102. df.fillna('', inplace=True)
  1103. header = df.columns.tolist()
  1104. data_rows = df.values.tolist()
  1105. data_with_header = [header] + data_rows
  1106. write_data_to_sheet(
  1107. data_with_header,
  1108. sheet_token=sheet_token,
  1109. sheetid=sheet_id,
  1110. start_col=start_col,
  1111. start_row=start_row,
  1112. grid_width=grid_width,
  1113. grid_height=grid_height,
  1114. border_width=border_width,
  1115. border_color=border_color,
  1116. )
  1117. def to_feishu_incremental(
  1118. res_list: List[Dict],
  1119. sort_field: str = '内容ID',
  1120. sheet_id: str = 'Qn9MAs',
  1121. sheet_token: str = 'Rbsysi6FChzCp7tfv19crkWNnEb',
  1122. unique_field: str = None, # 用于去重的唯一字段,默认使用sort_field
  1123. duplicate_strategy: str = 'skip', # 重复数据处理策略:'skip'跳过, 'delete'删除后插入, 'update'更新
  1124. update_fields: List[str] = None, # 当duplicate_strategy='update'时,指定要更新的字段列表。None表示更新所有字段
  1125. cleanup_duplicates: bool = True, # 是否先清理现有表格中的重复数据
  1126. keep_first: bool = True, # 清理重复数据时保留第一个(True)还是最后一个(False)
  1127. sort_ascending: bool = False, # 排序顺序:True为升序(从小到大),False为降序(从大到小)
  1128. grid_width: int = None,
  1129. grid_height: int = None,
  1130. border_width: int = 3,
  1131. border_color: tuple = (200, 200, 200),
  1132. ) -> None:
  1133. """
  1134. 逐行增量插入数据到飞书表格,按指定字段查找插入位置
  1135. Args:
  1136. res_list: 数据列表
  1137. sort_field: 用于排序的字段名,如 '内容ID'
  1138. sheet_id: 表格ID
  1139. sheet_token: 表格token
  1140. unique_field: 用于去重的唯一字段,默认使用sort_field
  1141. duplicate_strategy: 重复数据处理策略
  1142. - 'skip': 跳过重复数据(默认)
  1143. - 'delete': 删除重复数据后插入新数据
  1144. - 'update': 更新重复数据的指定字段
  1145. update_fields: 当duplicate_strategy='update'时,指定要更新的字段列表
  1146. - None: 更新所有字段(除了unique_field)
  1147. - ['字段1', '字段2']: 只更新指定的字段
  1148. cleanup_duplicates: 是否先清理现有表格中的重复数据
  1149. keep_first: 清理重复数据时保留第一个(True)还是最后一个(False)
  1150. sort_ascending: 排序顺序,True为升序(从小到大),False为降序(从大到小),默认False
  1151. grid_width: 拼接图片的列数,如果为None则自动计算
  1152. grid_height: 拼接图片的行数,如果为None则自动计算
  1153. border_width: 边框宽度,像素
  1154. border_color: 边框颜色,RGB元组
  1155. """
  1156. from tqdm import tqdm
  1157. import pandas as pd
  1158. import json
  1159. from typing import List
  1160. def truncate_by_bytes(text, max_bytes=450000):
  1161. """按字节长度截断文本"""
  1162. if not text:
  1163. return ""
  1164. text_str = str(text)
  1165. encoded = text_str.encode('utf-8')
  1166. if len(encoded) <= max_bytes:
  1167. return text_str
  1168. # 安全截断,避免截断多字节字符
  1169. truncated = encoded[:max_bytes]
  1170. while len(truncated) > 0:
  1171. try:
  1172. return truncated.decode('utf-8') + "...[已截断]"
  1173. except UnicodeDecodeError:
  1174. truncated = truncated[:-1]
  1175. return ""
  1176. # 初始化 API 客户端
  1177. client = Client(LARK_HOST)
  1178. access_token = client.get_tenant_access_token(APP_ID, APP_SECRET)
  1179. # 设置去重字段,默认使用排序字段
  1180. if unique_field is None:
  1181. unique_field = sort_field
  1182. # 1. 获取工作表基础信息
  1183. print("正在获取工作表信息...")
  1184. sheet_props = client.get_sheet_properties(access_token, sheet_token, sheet_id)
  1185. if not sheet_props:
  1186. print("获取工作表信息失败,使用默认范围")
  1187. max_col = 'ZZ'
  1188. max_row = 1000
  1189. else:
  1190. print(f"工作表信息: 行数={sheet_props['row_count']}, 列数={sheet_props['column_count']}")
  1191. max_col = column_id(sheet_props['column_count']) if sheet_props['column_count'] > 0 else 'ZZ'
  1192. max_row = sheet_props['row_count'] if sheet_props['row_count'] > 0 else 1000
  1193. # 2. 读取表头(使用精确范围)
  1194. print("正在读取表头...")
  1195. header_range = f"{sheet_id}!A1:{max_col}1" # 表头总是从A列开始读取
  1196. header_data = client.read_range_values(access_token, sheet_token, header_range)
  1197. if not header_data or not header_data[0] or all(not cell.strip() for cell in header_data[0] if cell):
  1198. print("表格为空,需要根据数据创建表头")
  1199. # 从第一条数据中提取字段名作为表头
  1200. if not res_list or not res_list[0]:
  1201. print("错误:无法从空数据中创建表头")
  1202. return
  1203. # 提取字段名
  1204. headers = list(res_list[0].keys())
  1205. print(f"创建表头: {headers}")
  1206. # 写入表头(表头不包含图片,使用普通插入即可)
  1207. header_range = f"{sheet_id}!A1:{column_id(len(headers))}1"
  1208. client.insert_data_at_row(access_token, sheet_token, sheet_id, 1, [headers])
  1209. # 表头创建后,从第二行开始插入数据
  1210. print("表头创建完成,开始插入数据...")
  1211. else:
  1212. # 解析现有表头
  1213. headers = [cell.strip() for cell in header_data[0] if cell is not None]
  1214. headers = [h for h in headers if h] # 移除空字段
  1215. print(f"读取到现有表头: {headers}")
  1216. # 检查排序字段和去重字段是否存在
  1217. if sort_field not in headers:
  1218. print(f"警告: 排序字段 '{sort_field}' 未在表头中找到。可用字段: {headers}")
  1219. # 如果找不到排序字段,就直接追加到末尾
  1220. # 使用工作表信息中的行数,或从第二行开始(如果刚创建了表头)
  1221. start_row = len(headers) + 1 if 'headers' in locals() else (max_row + 1 if sheet_props else 2)
  1222. to_feishu(res_list, sheet_id, sheet_token, start_row, 1, grid_width, grid_height, border_width, border_color)
  1223. return
  1224. if unique_field not in headers:
  1225. print(f"警告: 去重字段 '{unique_field}' 未在表头中找到,将使用排序字段 '{sort_field}' 进行去重")
  1226. unique_field = sort_field
  1227. sort_field_index = headers.index(sort_field)
  1228. sort_field_col = column_id(sort_field_index + 1) # 转换为列标识符,如A, B, C...
  1229. unique_field_index = headers.index(unique_field)
  1230. unique_field_col = column_id(unique_field_index + 1) # 转换为列标识符,如A, B, C...
  1231. # 3. 读取排序字段和去重字段的数据
  1232. print(f"正在读取排序字段 '{sort_field}' 和去重字段 '{unique_field}' 列数据...")
  1233. # 读取排序字段数据
  1234. sort_data_range = f"{sheet_id}!{sort_field_col}2:{sort_field_col}{max_row}"
  1235. all_sort_data = client.read_range_values(access_token, sheet_token, sort_data_range)
  1236. # 读取去重字段数据(如果与排序字段不同)
  1237. if unique_field != sort_field:
  1238. unique_data_range = f"{sheet_id}!{unique_field_col}2:{unique_field_col}{max_row}"
  1239. all_unique_data = client.read_range_values(access_token, sheet_token, unique_data_range)
  1240. else:
  1241. all_unique_data = all_sort_data
  1242. # 先清理空白行(排序字段和去重字段都为空的行)
  1243. print("检查并清理空白行...")
  1244. empty_rows_to_delete = []
  1245. if all_unique_data and all_sort_data:
  1246. for i in range(min(len(all_unique_data), len(all_sort_data))):
  1247. unique_row = all_unique_data[i] if i < len(all_unique_data) else None
  1248. sort_row = all_sort_data[i] if i < len(all_sort_data) else None
  1249. # 检查去重字段值
  1250. unique_value = ""
  1251. if unique_row and len(unique_row) > 0 and unique_row[0]:
  1252. unique_value = str(unique_row[0]).strip()
  1253. # 检查排序字段值
  1254. sort_value = ""
  1255. if sort_row and len(sort_row) > 0 and sort_row[0]:
  1256. sort_value = str(sort_row[0]).strip()
  1257. # 如果排序字段和去重字段都为空,标记为空白行
  1258. if not unique_value and not sort_value:
  1259. row_number = i + 2 # +2 因为从第2行开始,且行号从1开始
  1260. empty_rows_to_delete.append(row_number)
  1261. print(f"标记删除空白行: 第{row_number}行")
  1262. # 删除空白行
  1263. if empty_rows_to_delete:
  1264. print(f"开始删除 {len(empty_rows_to_delete)} 个空白行...")
  1265. # 按行号倒序删除,避免删除后行号变化的问题
  1266. empty_rows_to_delete.sort(reverse=True)
  1267. for row_to_delete in empty_rows_to_delete:
  1268. delete_result = client.delete_single_row(access_token, sheet_token, sheet_id, row_to_delete)
  1269. if delete_result:
  1270. print(f"成功删除空白行: 第{row_to_delete}行")
  1271. else:
  1272. print(f"删除空白行失败: 第{row_to_delete}行")
  1273. # 重新读取数据(删除后数据已经改变)
  1274. print("重新读取数据(清理空白行后)...")
  1275. # 重新读取排序字段数据
  1276. sort_data_range = f"{sheet_id}!{sort_field_col}2:{sort_field_col}{max_row}"
  1277. all_sort_data = client.read_range_values(access_token, sheet_token, sort_data_range)
  1278. # 重新读取去重字段数据
  1279. if unique_field != sort_field:
  1280. unique_data_range = f"{sheet_id}!{unique_field_col}2:{unique_field_col}{max_row}"
  1281. all_unique_data = client.read_range_values(access_token, sheet_token, unique_data_range)
  1282. else:
  1283. all_unique_data = all_sort_data
  1284. # 构建现有数据的去重集合
  1285. duplicate_rows_to_delete = []
  1286. if cleanup_duplicates and all_unique_data:
  1287. # 先分析重复数据
  1288. seen_unique_values = {} # 记录已见过的唯一值和对应行号
  1289. actual_data_rows = [] # 记录实际有数据的行号
  1290. print(f"开始分析重复数据,总共读取了 {len(all_unique_data)} 行数据")
  1291. # 先找出所有有效数据行及其对应的实际行号(必须同时有排序字段和去重字段的值)
  1292. for i in range(min(len(all_unique_data), len(all_sort_data) if all_sort_data else 0)):
  1293. unique_row = all_unique_data[i] if i < len(all_unique_data) else None
  1294. sort_row = all_sort_data[i] if i < len(all_sort_data) else None
  1295. # 检查去重字段值
  1296. unique_value = ""
  1297. if unique_row and len(unique_row) > 0 and unique_row[0]:
  1298. unique_value = str(unique_row[0]).strip()
  1299. # 检查排序字段值
  1300. sort_value = ""
  1301. if sort_row and len(sort_row) > 0 and sort_row[0]:
  1302. sort_value = str(sort_row[0]).strip()
  1303. # 只有当排序字段和去重字段都有值时,才认为是有效数据
  1304. if unique_value and sort_value:
  1305. actual_row_number = i + 2 # +2 因为从第2行开始,且行号从1开始
  1306. actual_data_rows.append((actual_row_number, unique_value, sort_value))
  1307. print(f"找到 {len(actual_data_rows)} 行有效数据")
  1308. # 分析重复数据
  1309. for actual_row_number, unique_value, sort_value in actual_data_rows:
  1310. if unique_value in seen_unique_values:
  1311. # 发现重复数据
  1312. if keep_first:
  1313. # 保留第一个,删除当前这个
  1314. duplicate_rows_to_delete.append(actual_row_number)
  1315. print(f"标记删除重复行: 第{actual_row_number}行 ({unique_field}={unique_value}, {sort_field}={sort_value})")
  1316. else:
  1317. # 保留最后一个,删除之前的
  1318. previous_row = seen_unique_values[unique_value]
  1319. duplicate_rows_to_delete.append(previous_row)
  1320. print(f"标记删除重复行: 第{previous_row}行 ({unique_field}={unique_value}, {sort_field}={sort_value})")
  1321. seen_unique_values[unique_value] = actual_row_number
  1322. else:
  1323. # 第一次见到这个唯一值
  1324. seen_unique_values[unique_value] = actual_row_number
  1325. # 执行清理:删除重复行
  1326. if duplicate_rows_to_delete:
  1327. print(f"开始清理 {len(duplicate_rows_to_delete)} 行重复数据...")
  1328. # 按行号倒序删除,避免删除后行号变化的问题
  1329. duplicate_rows_to_delete.sort(reverse=True)
  1330. for row_to_delete in duplicate_rows_to_delete:
  1331. delete_result = client.delete_single_row(access_token, sheet_token, sheet_id, row_to_delete)
  1332. if delete_result:
  1333. print(f"成功删除重复行: 第{row_to_delete}行")
  1334. else:
  1335. print(f"删除重复行失败: 第{row_to_delete}行")
  1336. # 重新读取数据(删除后数据已经改变)
  1337. print("重新读取排序和去重字段数据...")
  1338. # 重新读取排序字段数据
  1339. sort_data_range = f"{sheet_id}!{sort_field_col}2:{sort_field_col}{max_row}"
  1340. all_sort_data = client.read_range_values(access_token, sheet_token, sort_data_range)
  1341. # 重新读取去重字段数据
  1342. if unique_field != sort_field:
  1343. unique_data_range = f"{sheet_id}!{unique_field_col}2:{unique_field_col}{max_row}"
  1344. all_unique_data = client.read_range_values(access_token, sheet_token, unique_data_range)
  1345. else:
  1346. all_unique_data = all_sort_data
  1347. # 构建最终的去重集合(处理清理后的数据,必须同时有排序字段和去重字段的值)
  1348. existing_unique_values = set()
  1349. existing_unique_rows = {} # 用于update策略:{unique_value: row_number}
  1350. if all_unique_data and all_sort_data:
  1351. for i in range(min(len(all_unique_data), len(all_sort_data))):
  1352. unique_row = all_unique_data[i] if i < len(all_unique_data) else None
  1353. sort_row = all_sort_data[i] if i < len(all_sort_data) else None
  1354. # 检查去重字段值
  1355. unique_value = ""
  1356. if unique_row and len(unique_row) > 0 and unique_row[0]:
  1357. unique_value = str(unique_row[0]).strip()
  1358. # 检查排序字段值
  1359. sort_value = ""
  1360. if sort_row and len(sort_row) > 0 and sort_row[0]:
  1361. sort_value = str(sort_row[0]).strip()
  1362. # 只有当排序字段和去重字段都有值时,才添加到去重集合
  1363. if unique_value and sort_value:
  1364. actual_row_number = i + 2 # +2 因为从第2行开始,且行号从1开始
  1365. existing_unique_values.add(unique_value)
  1366. existing_unique_rows[unique_value] = actual_row_number
  1367. print(f"现有去重值数量: {len(existing_unique_values)}")
  1368. print(existing_unique_values)
  1369. # 获取排序数据用于插入位置计算(基于清理后的最新数据)
  1370. sort_data = []
  1371. if all_sort_data:
  1372. # 同时检查排序字段和去重字段,确保数据完整性
  1373. for i in range(min(len(all_sort_data), len(all_unique_data) if all_unique_data else 0)):
  1374. sort_row = all_sort_data[i] if i < len(all_sort_data) else None
  1375. unique_row = all_unique_data[i] if i < len(all_unique_data) else None
  1376. # 检查排序字段值
  1377. sort_value = ""
  1378. if sort_row and len(sort_row) > 0 and sort_row[0]:
  1379. sort_value = str(sort_row[0]).strip()
  1380. # 检查去重字段值
  1381. unique_value = ""
  1382. if unique_row and len(unique_row) > 0 and unique_row[0]:
  1383. unique_value = str(unique_row[0]).strip()
  1384. # 只有当排序字段和去重字段都有值时,才加入排序数据
  1385. if sort_value and unique_value:
  1386. sort_data.append([sort_value])
  1387. if not sort_data:
  1388. print("未读取到排序字段数据,所有新数据将从第二行开始插入")
  1389. # 处理新数据
  1390. processed_data = []
  1391. for row in tqdm(res_list, desc="处理数据"):
  1392. if not row:
  1393. continue
  1394. processed_row = {}
  1395. for k, v in row.items():
  1396. if isinstance(v, list):
  1397. if len(v) > 0 and v[0] and str(v[0]).startswith('http'):
  1398. processed_row[k] = truncate_by_bytes(str(v))
  1399. else:
  1400. json_str = json.dumps(v, ensure_ascii=False, indent=1)
  1401. processed_row[k] = truncate_by_bytes(json_str)
  1402. elif isinstance(v, dict):
  1403. json_str = json.dumps(v, ensure_ascii=False, indent=1)
  1404. processed_row[k] = truncate_by_bytes(json_str)
  1405. else:
  1406. processed_row[k] = truncate_by_bytes(v)
  1407. processed_data.append(processed_row)
  1408. # 转换为DataFrame以便操作
  1409. df_new = pd.DataFrame(processed_data)
  1410. df_new.fillna('', inplace=True)
  1411. # 确保新数据包含所有必要的列
  1412. for header in headers:
  1413. if header not in df_new.columns:
  1414. df_new[header] = ''
  1415. # 按表头顺序重新排列列
  1416. df_new = df_new.reindex(columns=headers, fill_value='')
  1417. # 预处理:过滤重复数据并确定插入顺序
  1418. print(f"预处理新数据:过滤重复并排序...")
  1419. print(f"传入数据总量: {len(df_new)} 行")
  1420. print(f"现有去重集合大小: {len(existing_unique_values)}")
  1421. valid_rows = []
  1422. update_rows = [] # 需要更新的行:[{row_number, values, unique_value}, ...]
  1423. skipped_count = 0
  1424. new_data_duplicates = 0 # 新数据内部重复计数
  1425. updated_count = 0 # 更新计数
  1426. for idx, new_row in df_new.iterrows():
  1427. new_row_values = new_row.tolist()
  1428. new_sort_value = str(new_row_values[sort_field_index])
  1429. new_unique_value = str(new_row_values[unique_field_index])
  1430. # 检查是否与现有数据重复
  1431. if new_unique_value in existing_unique_values:
  1432. if duplicate_strategy == 'update':
  1433. # 更新策略:记录需要更新的行
  1434. target_row = existing_unique_rows[new_unique_value]
  1435. update_rows.append({
  1436. 'row_number': target_row,
  1437. 'values': new_row_values,
  1438. 'unique_value': new_unique_value
  1439. })
  1440. print(f"标记更新现有数据: 第{target_row}行 {unique_field}={new_unique_value}")
  1441. updated_count += 1
  1442. continue
  1443. elif duplicate_strategy == 'delete':
  1444. # 删除策略:先删除现有行,再插入新数据
  1445. target_row = existing_unique_rows[new_unique_value]
  1446. delete_result = client.delete_single_row(access_token, sheet_token, sheet_id, target_row)
  1447. if delete_result:
  1448. print(f"成功删除重复行: 第{target_row}行 {unique_field}={new_unique_value}")
  1449. # 从去重集合中移除,允许后续插入
  1450. existing_unique_values.remove(new_unique_value)
  1451. # 更新所有行号(删除后后面的行号会前移)
  1452. for key, row_num in existing_unique_rows.items():
  1453. if row_num > target_row:
  1454. existing_unique_rows[key] = row_num - 1
  1455. del existing_unique_rows[new_unique_value]
  1456. else:
  1457. print(f"删除重复行失败: 第{target_row}行 {unique_field}={new_unique_value}")
  1458. skipped_count += 1
  1459. continue
  1460. else: # 'skip' 策略
  1461. print(f"跳过与现有数据重复: {unique_field}={new_unique_value}")
  1462. skipped_count += 1
  1463. continue
  1464. # 检查新数据内部是否重复
  1465. already_processed = any(row['unique_value'] == new_unique_value for row in valid_rows)
  1466. if already_processed:
  1467. print(f"跳过新数据内部重复: {unique_field}={new_unique_value}")
  1468. new_data_duplicates += 1
  1469. continue
  1470. # 添加到待插入列表
  1471. valid_rows.append({
  1472. 'values': new_row_values,
  1473. 'sort_value': new_sort_value,
  1474. 'unique_value': new_unique_value
  1475. })
  1476. print(f"预处理完成:有效数据 {len(valid_rows)} 行,需要更新 {len(update_rows)} 行,跳过与现有重复 {skipped_count} 行,跳过新数据内部重复 {new_data_duplicates} 行")
  1477. # 处理更新操作
  1478. if update_rows:
  1479. print(f"开始执行更新操作,共 {len(update_rows)} 行...")
  1480. for update_data in tqdm(update_rows, desc="更新数据"):
  1481. row_number = update_data['row_number']
  1482. new_values = update_data['values']
  1483. unique_value = update_data['unique_value']
  1484. # 构建字段更新字典
  1485. if update_fields is None:
  1486. # 更新所有字段,但排除unique_field(避免修改关键字段)
  1487. field_updates = {}
  1488. for i, header in enumerate(headers):
  1489. if header != unique_field: # 不更新去重字段
  1490. field_updates[header] = new_values[i]
  1491. print(f"更新第{row_number}行所有字段(除了{unique_field}): {unique_value}")
  1492. else:
  1493. # 只更新指定字段
  1494. field_updates = {}
  1495. for field_name in update_fields:
  1496. if field_name in headers:
  1497. field_index = headers.index(field_name)
  1498. field_updates[field_name] = new_values[field_index]
  1499. else:
  1500. print(f"警告:字段 '{field_name}' 不存在于表头中,跳过")
  1501. print(f"更新第{row_number}行指定字段 {list(field_updates.keys())}: {unique_value}")
  1502. # 执行更新
  1503. if field_updates:
  1504. result = client.update_row_with_specific_fields_and_images(
  1505. access_token, sheet_token, sheet_id, row_number,
  1506. field_updates, headers, True, grid_width, grid_height, border_width, border_color
  1507. )
  1508. if result:
  1509. print(f"✅ 成功更新第{row_number}行")
  1510. else:
  1511. print(f"❌ 更新第{row_number}行失败")
  1512. if not valid_rows:
  1513. if update_rows:
  1514. print("所有数据均为更新操作,无新数据需要插入")
  1515. else:
  1516. print("没有新数据需要插入")
  1517. return
  1518. # 按排序字段排序新数据(根据sort_ascending参数决定排序方向)
  1519. if sort_ascending:
  1520. # 升序排序:小的值先插入(reverse=False)
  1521. valid_rows.sort(key=lambda x: x['sort_value'], reverse=False)
  1522. print(f"新数据排序完成,将按升序插入")
  1523. else:
  1524. # 降序排序:大的值先插入(reverse=True)
  1525. valid_rows.sort(key=lambda x: x['sort_value'], reverse=True)
  1526. print(f"新数据排序完成,将按降序插入")
  1527. # 逐行插入已排序的数据
  1528. for i, row_data in tqdm(enumerate(valid_rows), total=len(valid_rows), desc="插入数据"):
  1529. new_row_values = row_data['values']
  1530. new_sort_value = row_data['sort_value']
  1531. new_unique_value = row_data['unique_value']
  1532. # 找到合适的插入位置(根据sort_ascending参数确定排序方向)
  1533. insert_row = len(sort_data) + 2 # 默认插入到末尾
  1534. print(f"查找插入位置,新值: {new_sort_value}")
  1535. # 找到两个相邻ID之间的正确插入位置
  1536. if sort_ascending:
  1537. # 升序排列:小 → 大,需要找到 prev_value < new_value < current_value 的位置
  1538. for j in range(len(sort_data)):
  1539. current_value = str(sort_data[j][0]) if sort_data[j] and len(sort_data[j]) > 0 else ""
  1540. prev_value = str(sort_data[j-1][0]) if j > 0 and sort_data[j-1] and len(sort_data[j-1]) > 0 else None
  1541. # 检查是否应该插入到当前位置
  1542. if prev_value is None:
  1543. # 这是第一个位置,检查是否应该插入到最前面
  1544. if new_sort_value < current_value:
  1545. insert_row = j + 2 # +2 因为表头偏移
  1546. print(f" 插入到最前面第{insert_row}行: 新值{new_sort_value} < 第一个值{current_value}")
  1547. break
  1548. else:
  1549. # 检查是否在两个相邻值之间
  1550. if new_sort_value >= prev_value and new_sort_value < current_value:
  1551. insert_row = j + 2 # +2 因为表头偏移
  1552. print(f" 插入到第{insert_row}行: {prev_value} <= {new_sort_value} < {current_value}")
  1553. break
  1554. elif new_sort_value == current_value:
  1555. # 值相等时插入到相等值之后
  1556. insert_row = j + 3 # +2(表头偏移) +1(插入到此行之后)
  1557. print(f" 插入到第{insert_row}行: 新值{new_sort_value} = 现有值{current_value},插入其后")
  1558. break
  1559. # 如果遍历完都没有找到位置,说明新值是最大的,插入到末尾
  1560. if insert_row == len(sort_data) + 2:
  1561. last_value = str(sort_data[-1][0]) if sort_data and sort_data[-1] and len(sort_data[-1]) > 0 else "无"
  1562. print(f" 插入到末尾第{insert_row}行: 新值{new_sort_value} > 最后一个值{last_value}")
  1563. else:
  1564. # 降序排列:大 → 小,需要找到 prev_value > new_value > current_value 的位置
  1565. for j in range(len(sort_data)):
  1566. current_value = str(sort_data[j][0]) if sort_data[j] and len(sort_data[j]) > 0 else ""
  1567. prev_value = str(sort_data[j-1][0]) if j > 0 and sort_data[j-1] and len(sort_data[j-1]) > 0 else None
  1568. # 检查是否应该插入到当前位置
  1569. if prev_value is None:
  1570. # 这是第一个位置,检查是否应该插入到最前面
  1571. if new_sort_value > current_value:
  1572. insert_row = j + 2 # +2 因为表头偏移
  1573. print(f" 插入到最前面第{insert_row}行: 新值{new_sort_value} > 第一个值{current_value}")
  1574. break
  1575. else:
  1576. # 检查是否在两个相邻值之间
  1577. if new_sort_value <= prev_value and new_sort_value > current_value:
  1578. insert_row = j + 2 # +2 因为表头偏移
  1579. print(f" 插入到第{insert_row}行: {prev_value} >= {new_sort_value} > {current_value}")
  1580. break
  1581. elif new_sort_value == current_value:
  1582. # 值相等时插入到相等值之后
  1583. insert_row = j + 3 # +2(表头偏移) +1(插入到此行之后)
  1584. print(f" 插入到第{insert_row}行: 新值{new_sort_value} = 现有值{current_value},插入其后")
  1585. break
  1586. # 如果遍历完都没有找到位置,说明新值是最小的,插入到末尾
  1587. if insert_row == len(sort_data) + 2:
  1588. last_value = str(sort_data[-1][0]) if sort_data and sort_data[-1] and len(sort_data[-1]) > 0 else "无"
  1589. print(f" 插入到末尾第{insert_row}行: 新值{new_sort_value} < 最后一个值{last_value}")
  1590. print(f"[{i+1}/{len(valid_rows)}] 最终插入位置: 第 {insert_row} 行: {sort_field}={new_sort_value}")
  1591. # 插入数据到指定行(真正插入新行)
  1592. result = client.insert_row_with_data_at_position(access_token, sheet_token, sheet_id, insert_row, [new_row_values], True, grid_width, grid_height, border_width, border_color)
  1593. if result:
  1594. print(f"成功插入数据和图片到第 {insert_row} 行")
  1595. # 更新sort_data:在正确的位置添加新的排序值
  1596. sort_data_index = insert_row - 2 # 转换为sort_data的索引(-2因为表头偏移)
  1597. sort_data.insert(sort_data_index, [new_sort_value])
  1598. # 更新去重集合
  1599. existing_unique_values.add(new_unique_value)
  1600. else:
  1601. print(f"插入数据到第 {insert_row} 行失败")
  1602. if __name__ == "__main__":
  1603. # data = get_test_data()
  1604. # sheet_token = 'IoTOsjZ4khIqlOtTxnec8oTbn7c'
  1605. # sheetid = 'K9c4LG'
  1606. # write_data_to_sheet(data, sheetid=sheetid)
  1607. # is_image_cell_result = is_image_cell('["http://sns-webpic-qc.xhscdn.com/202501021415/1a6e88908930afce92b09206d5a482f8/1040g2sg31b74rf6k7g5g5oo7i8vkgev59lkjet0!nd_whlt34_webp_wm_1","http://sns-webpic-qc.xhscdn.com/202501021415/1a6e88908930afce92b09206d5a482f8/1040g2sg31b74rf6k7g5g5oo7i8vkgev59lkjet0!nd_whlt34_webp_wm_1"]')
  1608. # print(is_image_cell_result)
  1609. # 新增函数使用示例
  1610. """
  1611. 示例:使用 to_feishu_incremental 增量插入数据
  1612. # 测试数据
  1613. test_data = [
  1614. {
  1615. '内容ID': '1001',
  1616. '标题': '测试标题1',
  1617. '内容': '测试内容1',
  1618. '图片': '["http://example.com/image1.jpg", "http://example.com/image2.jpg"]'
  1619. },
  1620. {
  1621. '内容ID': '1003',
  1622. '标题': '测试标题2',
  1623. '内容': '测试内容2',
  1624. '图片': 'http://example.com/image3.jpg'
  1625. }
  1626. ]
  1627. # 调用增量插入函数
  1628. to_feishu_incremental(
  1629. res_list=test_data,
  1630. sort_field='内容ID', # 按此字段排序
  1631. sheet_id='your_sheet_id',
  1632. sheet_token='your_sheet_token',
  1633. unique_field='内容ID', # 去重字段,默认使用sort_field
  1634. duplicate_strategy='update', # 重复处理策略:'skip'跳过, 'delete'删除后插入, 'update'更新指定字段
  1635. update_fields=['标题', '内容', '图片'], # 当strategy='update'时,只更新这些字段
  1636. cleanup_duplicates=True, # 先清理现有表格中的重复数据
  1637. keep_first=True, # 清理时保留第一个重复项
  1638. sort_ascending=False, # 排序顺序:False为降序(大→小),True为升序(小→大)
  1639. grid_width=2, # 图片拼接列数
  1640. grid_height=2, # 图片拼接行数
  1641. )
  1642. # 排序方向示例:
  1643. # 示例1:按时间戳降序排序(最新的在前面)- 适合新闻、动态等时间敏感内容
  1644. to_feishu_incremental(
  1645. res_list=news_data,
  1646. sort_field='发布时间',
  1647. sort_ascending=False, # 降序,最新时间在前面
  1648. # ... 其他参数
  1649. )
  1650. # 示例2:按ID升序排序(从小到大)- 适合有明确编号顺序的内容
  1651. to_feishu_incremental(
  1652. res_list=product_data,
  1653. sort_field='产品ID',
  1654. sort_ascending=True, # 升序,小ID在前面
  1655. # ... 其他参数
  1656. )
  1657. # 示例3:按优先级降序排序(高优先级在前面)- 适合任务、问题等需要优先级管理的内容
  1658. to_feishu_incremental(
  1659. res_list=task_data,
  1660. sort_field='优先级',
  1661. sort_ascending=False, # 降序,高优先级在前面
  1662. # ... 其他参数
  1663. )
  1664. 功能说明:
  1665. 1. **智能表头处理**:
  1666. - 如果表格为空,自动从数据中提取字段名创建表头
  1667. - 如果表格已有数据,读取现有表头结构
  1668. 2. **空白行清理**:
  1669. - 自动检测并删除排序字段和去重字段都为空的空白行
  1670. - 确保数据的连续性和逻辑一致性
  1671. 3. **重复数据清理**:
  1672. - cleanup_duplicates=True: 先清理现有表格中的重复数据
  1673. - keep_first: 保留第一个或最后一个重复项
  1674. 4. **智能去重检查**:
  1675. - 基于 unique_field 字段检查数据是否已存在
  1676. - 预处理阶段过滤重复数据,避免插入过程中的状态变化问题
  1677. 5. **排序插入**:根据指定的 sort_field 字段和 sort_ascending 参数查找插入位置
  1678. - sort_ascending=False(默认):降序排序,较大的值插入到较前面的位置
  1679. - sort_ascending=True:升序排序,较小的值插入到较前面的位置
  1680. 6. **逐行数据插入**:按排序顺序逐行插入数据,保持表格整体有序
  1681. 7. **完整图片支持**:自动处理图片写入,支持单张图片和图片列表
  1682. 8. **图片拼接功能**:支持多图拼接,可设置拼接的行列数和边框样式
  1683. 适用场景:
  1684. - ✅ 空表格:自动创建表头并插入数据
  1685. - ✅ 已有重复数据的表格:先清理重复,再智能插入
  1686. - ✅ 增量数据更新:逐条插入,保持排序,自动去重
  1687. - ✅ 重复运行安全:不会插入重复数据
  1688. - ✅ 数据清理:一键清理现有重复数据
  1689. - ✅ 灵活排序:支持升序和降序两种排序方式
  1690. """