feishu_lib.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/5/27
  4. import json
  5. import time
  6. import requests
  7. import urllib3
  8. from common import Common
  9. proxies = {"http": None, "https": None}
  10. class Feishu:
  11. """
  12. 编辑飞书云文档
  13. """
  14. # 看一看爬虫数据表
  15. kanyikan_url = "https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?"
  16. # 快手爬虫数据表
  17. kuaishou_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?"
  18. # 微视爬虫数据表
  19. weishi_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?"
  20. # 小年糕爬虫数据表
  21. xiaoniangao_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?"
  22. # twitter 爬虫吧
  23. twitter_url = "https://whtlrai9ej.feishu.cn/sheets/shtcn6BYfYuqegIP13ORB6rI2dh?"
  24. # 飞书路径token
  25. @classmethod
  26. def spreadsheettoken(cls, crawler):
  27. """
  28. :param crawler: 哪个爬虫
  29. """
  30. if crawler == "kanyikan":
  31. return "shtcngRPoDYAi24x52j2nDuHMih"
  32. elif crawler == "kuaishou":
  33. return "shtcnp4SaJt37q6OOOrYzPMjQkg"
  34. elif crawler == "weishi":
  35. return "shtcn5YSWg91JfVGzj0SFZIRRPh"
  36. elif crawler == "xiaoniangao":
  37. return "shtcnYxiyQ1wLklo1W5Kdqc9cGh"
  38. elif crawler == "twitter":
  39. return "shtcn6BYfYuqegIP13ORB6rI2dh"
  40. # 获取飞书api token
  41. @classmethod
  42. def get_token(cls):
  43. """
  44. 获取飞书api token
  45. :return:
  46. """
  47. time.sleep(1)
  48. url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
  49. post_data = {"app_id": "cli_a13ad2afa438d00b", # 这里账号密码是发布应用的后台账号及密码
  50. "app_secret": "4tK9LY9VbiQlY5umhE42dclBFo6t4p5O"}
  51. try:
  52. urllib3.disable_warnings()
  53. response = requests.post(url=url, data=post_data, proxies=proxies, verify=False)
  54. tenant_access_token = response.json()["tenant_access_token"]
  55. return tenant_access_token
  56. except Exception as e:
  57. Common.logger().error("获取飞书 api token 异常:{}", e)
  58. # 获取表格元数据
  59. @classmethod
  60. def get_metainfo(cls, crawler):
  61. """
  62. 获取表格元数据
  63. :return:
  64. """
  65. get_metainfo_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  66. + cls.spreadsheettoken(crawler) + "/metainfo"
  67. headers = {
  68. "Authorization": "Bearer " + cls.get_token(),
  69. "Content-Type": "application/json; charset=utf-8"
  70. }
  71. params = {
  72. "extFields": "protectedRange", # 额外返回的字段,extFields=protectedRange时返回保护行列信息
  73. "user_id_type": "open_id" # 返回的用户id类型,可选open_id,union_id
  74. }
  75. try:
  76. urllib3.disable_warnings()
  77. r = requests.get(url=get_metainfo_url, headers=headers, params=params, proxies=proxies, verify=False)
  78. response = json.loads(r.content.decode("utf8"))
  79. return response
  80. except Exception as e:
  81. Common.logger().error("获取表格元数据异常:{}", e)
  82. # 读取工作表中所有数据
  83. @classmethod
  84. def get_values_batch(cls, crawler, sheetid):
  85. """
  86. 读取工作表中所有数据
  87. :param crawler: 哪个爬虫
  88. :param sheetid: 哪张表
  89. :return: 所有数据
  90. """
  91. get_values_batch_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  92. + cls.spreadsheettoken(crawler) + "/values_batch_get"
  93. headers = {
  94. "Authorization": "Bearer " + cls.get_token(),
  95. "Content-Type": "application/json; charset=utf-8"
  96. }
  97. params = {
  98. # 多个查询范围 如 url?ranges=range1,range2 ,其中 range 包含 sheetId 与单元格范围两部分
  99. "ranges": sheetid,
  100. # valueRenderOption=ToString 可返回纯文本的值(数值类型除外);
  101. # valueRenderOption=FormattedValue 计算并格式化单元格;
  102. # valueRenderOption=Formula单元格中含有公式时返回公式本身;
  103. # valueRenderOption=UnformattedValue计算但不对单元格进行格式化
  104. "valueRenderOption": "ToString",
  105. # dateTimeRenderOption=FormattedString 计算并将时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
  106. "dateTimeRenderOption": "",
  107. # 返回的用户id类型,可选open_id,union_id
  108. "user_id_type": "open_id"
  109. }
  110. try:
  111. urllib3.disable_warnings()
  112. r = requests.get(url=get_values_batch_url, headers=headers, params=params, proxies=proxies, verify=False)
  113. response = json.loads(r.content.decode("utf8"))
  114. values = response["data"]["valueRanges"][0]["values"]
  115. return values
  116. except Exception as e:
  117. Common.logger().error("读取工作表所有数据异常:{}", e)
  118. # 工作表,插入行或列
  119. @classmethod
  120. def insert_columns(cls, crawler, sheetid, majordimension, startindex, endindex):
  121. """
  122. 工作表插入行或列
  123. :param crawler: 哪个爬虫
  124. :param sheetid:哪张工作表
  125. :param majordimension:行或者列
  126. :param startindex:开始位置
  127. :param endindex:结束位置
  128. """
  129. insert_columns_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  130. + cls.spreadsheettoken(crawler) + "/insert_dimension_range"
  131. headers = {
  132. "Authorization": "Bearer " + cls.get_token(),
  133. "Content-Type": "application/json; charset=utf-8"
  134. }
  135. body = {
  136. "dimension": {
  137. "sheetId": sheetid,
  138. "majorDimension": majordimension, # 默认 ROWS ,可选 ROWS、COLUMNS
  139. "startIndex": startindex, # 开始的位置
  140. "endIndex": endindex # 结束的位置
  141. },
  142. "inheritStyle": "AFTER" # BEFORE 或 AFTER,不填为不继承 style
  143. }
  144. try:
  145. urllib3.disable_warnings()
  146. r = requests.post(url=insert_columns_url, headers=headers, json=body, proxies=proxies, verify=False)
  147. Common.logger().info("插入行或列:{}", r.json()["msg"])
  148. except Exception as e:
  149. Common.logger().error("插入行或列异常:{}", e)
  150. # 写入数据
  151. @classmethod
  152. def update_values(cls, crawler, sheetid, ranges, values):
  153. """
  154. 写入数据
  155. :param crawler: 哪个爬虫
  156. :param sheetid:哪张工作表
  157. :param ranges:单元格范围
  158. :param values:写入的具体数据,list
  159. """
  160. update_values_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  161. + cls.spreadsheettoken(crawler) + "/values_batch_update"
  162. headers = {
  163. "Authorization": "Bearer " + cls.get_token(),
  164. "Content-Type": "application/json; charset=utf-8"
  165. }
  166. body = {
  167. "valueRanges": [
  168. {
  169. "range": sheetid + "!" + ranges,
  170. "values": values
  171. },
  172. ],
  173. }
  174. try:
  175. urllib3.disable_warnings()
  176. r = requests.post(url=update_values_url, headers=headers, json=body, proxies=proxies, verify=False)
  177. Common.logger().info("写入数据:{}", r.json()["msg"])
  178. except Exception as e:
  179. Common.logger().error("写入数据异常:{}", e)
  180. # 合并单元格
  181. @classmethod
  182. def merge_cells(cls, crawler, sheetid, ranges):
  183. """
  184. 合并单元格
  185. :param crawler: 哪个爬虫
  186. :param sheetid:哪张工作表
  187. :param ranges:需要合并的单元格范围
  188. """
  189. merge_cells_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  190. + cls.spreadsheettoken(crawler) + "/merge_cells"
  191. headers = {
  192. "Authorization": "Bearer " + cls.get_token(),
  193. "Content-Type": "application/json; charset=utf-8"
  194. }
  195. body = {
  196. "range": sheetid + "!" + ranges,
  197. "mergeType": "MERGE_ROWS"
  198. }
  199. try:
  200. urllib3.disable_warnings()
  201. r = requests.post(url=merge_cells_url, headers=headers, json=body, proxies=proxies, verify=False)
  202. Common.logger().info("合并单元格:{}", r.json()["msg"])
  203. except Exception as e:
  204. Common.logger().error("合并单元格异常:{}", e)
  205. # 读取单元格数据
  206. @classmethod
  207. def get_range_value(cls, crawler, sheetid, cell):
  208. """
  209. 读取单元格内容
  210. :param crawler: 哪个爬虫
  211. :param sheetid: 哪张工作表
  212. :param cell: 哪个单元格
  213. :return: 单元格内容
  214. """
  215. get_range_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  216. + cls.spreadsheettoken(crawler) + "/values/" + sheetid + "!" + cell
  217. headers = {
  218. "Authorization": "Bearer " + cls.get_token(),
  219. "Content-Type": "application/json; charset=utf-8"
  220. }
  221. params = {
  222. # valueRenderOption=ToString 可返回纯文本的值(数值类型除外);
  223. # valueRenderOption=FormattedValue 计算并格式化单元格;
  224. # valueRenderOption=Formula 单元格中含有公式时返回公式本身;
  225. # valueRenderOption=UnformattedValue 计算但不对单元格进行格式化。
  226. "valueRenderOption": "FormattedValue",
  227. # dateTimeRenderOption=FormattedString 计算并对时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
  228. "dateTimeRenderOption": "",
  229. # 返回的用户id类型,可选open_id,union_id
  230. "user_id_type": "open_id"
  231. }
  232. try:
  233. urllib3.disable_warnings()
  234. r = requests.get(url=get_range_value_url, headers=headers, params=params, proxies=proxies, verify=False)
  235. return r.json()["data"]["valueRange"]["values"][0]
  236. except Exception as e:
  237. Common.logger().error("读取单元格数据异常:{}", e)
  238. # 删除行或列,可选 ROWS、COLUMNS
  239. @classmethod
  240. def dimension_range(cls, crawler, sheetid, major_dimension, startindex, endindex):
  241. """
  242. 删除行或列
  243. :param crawler: 哪个爬虫
  244. :param sheetid:工作表
  245. :param major_dimension:默认 ROWS ,可选 ROWS、COLUMNS
  246. :param startindex:开始的位置
  247. :param endindex:结束的位置
  248. :return:
  249. """
  250. dimension_range_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  251. + cls.spreadsheettoken(crawler) + "/dimension_range"
  252. headers = {
  253. "Authorization": "Bearer " + cls.get_token(),
  254. "Content-Type": "application/json; charset=utf-8"
  255. }
  256. body = {
  257. "dimension": {
  258. "sheetId": sheetid,
  259. "majorDimension": major_dimension,
  260. "startIndex": startindex,
  261. "endIndex": endindex
  262. }
  263. }
  264. try:
  265. urllib3.disable_warnings()
  266. r = requests.delete(url=dimension_range_url, headers=headers, json=body, proxies=proxies, verify=False)
  267. Common.logger().info("删除视频数据:{}", r.json()["msg"])
  268. except Exception as e:
  269. Common.logger().error("删除视频数据异常:{}", e)
  270. # 查找单元格
  271. @classmethod
  272. def find_cell(cls, crawler, sheetid, find_text):
  273. """
  274. 查找单元格
  275. :param crawler: 哪个爬虫
  276. :param sheetid: 哪张表
  277. # :param ranges: 单元格范围
  278. :param find_text: 查找的字符
  279. :return: 返回单元格索引
  280. """
  281. find_cell_url = "https://open.feishu.cn/open-apis/sheets/v3/spreadsheets/" \
  282. + cls.spreadsheettoken(crawler) + "/sheets/" \
  283. + sheetid + "/find"
  284. headers = {
  285. "Authorization": "Bearer " + cls.get_token(),
  286. "Content-Type": "application/json; charset=utf-8"
  287. }
  288. rows_count = len(cls.get_values_batch("twitter", "db114c"))
  289. body = {
  290. "find_condition": {
  291. "range": sheetid+"!A1:A"+str(rows_count),
  292. "match_case": True, # 是否忽略大小写
  293. "match_entire_cell": False, # 是否匹配整个单元格
  294. "search_by_regex": False, # 是否为正则匹配
  295. "include_formulas": False # 是否搜索公式内容
  296. },
  297. "find": find_text # 搜索内容
  298. }
  299. try:
  300. urllib3.disable_warnings()
  301. r = requests.post(url=find_cell_url, headers=headers, json=body, proxies=proxies, verify=False)
  302. Common.logger().info("查找单元格:{}", r.json()["msg"])
  303. matched_cell = r.json()["data"]["find_result"]["matched_cells"][0].split("A")[-1]
  304. return matched_cell
  305. except Exception as e:
  306. Common.logger().error("查找单元格异常:{}", e)
  307. if __name__ == "__main__":
  308. feishu = Feishu()
  309. print(feishu.find_cell("twitter", "db114c", "956929025645035522"))
  310. print(type(feishu.find_cell("twitter", "db114c", "956929025645035522")))
  311. pass