feishu_lib.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/10/12
  4. import json
  5. import requests
  6. import urllib3
  7. # from crawler_shipinhao.main.common import Common
  8. from main.common import Common
  9. proxies = {"http": None, "https": None}
  10. class Feishu:
  11. """
  12. 编辑飞书云文档
  13. """
  14. # 看一看爬虫数据表
  15. kanyikan_url = "https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?"
  16. # 快手爬虫数据表
  17. kuaishou_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?"
  18. # 微视爬虫数据表
  19. weishi_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?"
  20. # 小年糕爬虫数据表
  21. xiaoniangao_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?"
  22. # 数据监控表
  23. crawler_monitor = "https://w42nne6hzg.feishu.cn/sheets/shtcnlZWYazInhf7Z60jkbLRJyd?"
  24. # 本山祝福数据表
  25. crawler_benshanzhufu = "https://w42nne6hzg.feishu.cn/sheets/shtcnGh2rrsPYM4iVNEBO7OqWrb?"
  26. # 公众号爬虫表
  27. gzh_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnexNXnpDLHhARw0QdiwbYuA?"
  28. # 视频号表
  29. shipinhao_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcn9rOdZRAGFbRkWpn7hqEHGc?'
  30. # 知青总群
  31. zhiqingzongqun_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnjmhKdJOKdqnEzJcZb5xaHc?'
  32. # 飞书路径token
  33. @classmethod
  34. def spreadsheettoken(cls, crawler):
  35. """
  36. :param crawler: 哪个爬虫
  37. """
  38. if crawler == "kanyikan":
  39. return "shtcngRPoDYAi24x52j2nDuHMih"
  40. elif crawler == "kuaishou":
  41. return "shtcnp4SaJt37q6OOOrYzPMjQkg"
  42. elif crawler == "weishi":
  43. return "shtcn5YSWg91JfVGzj0SFZIRRPh"
  44. elif crawler == "xiaoniangao":
  45. return "shtcnYxiyQ1wLklo1W5Kdqc9cGh"
  46. elif crawler == "monitor":
  47. return "shtcnlZWYazInhf7Z60jkbLRJyd"
  48. elif crawler == "bszf":
  49. return "shtcnGh2rrsPYM4iVNEBO7OqWrb"
  50. elif crawler == "gzh":
  51. return "shtcnexNXnpDLHhARw0QdiwbYuA"
  52. elif crawler == "shipinhao":
  53. return "shtcn9rOdZRAGFbRkWpn7hqEHGc"
  54. elif crawler == "zhiqingzongqun":
  55. return "shtcnjmhKdJOKdqnEzJcZb5xaHc"
  56. # 获取飞书api token
  57. @classmethod
  58. def get_token(cls, log_type):
  59. """
  60. 获取飞书api token
  61. :return:
  62. """
  63. url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
  64. post_data = {"app_id": "cli_a13ad2afa438d00b", # 这里账号密码是发布应用的后台账号及密码
  65. "app_secret": "4tK9LY9VbiQlY5umhE42dclBFo6t4p5O"}
  66. try:
  67. urllib3.disable_warnings()
  68. response = requests.post(url=url, data=post_data, proxies=proxies, verify=False)
  69. tenant_access_token = response.json()["tenant_access_token"]
  70. return tenant_access_token
  71. except Exception as e:
  72. Common.logger(log_type).error("获取飞书 api token 异常:{}", e)
  73. # 获取表格元数据
  74. @classmethod
  75. def get_metainfo(cls, log_type, crawler):
  76. """
  77. 获取表格元数据
  78. :return:
  79. """
  80. get_metainfo_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  81. + cls.spreadsheettoken(crawler) + "/metainfo"
  82. headers = {
  83. "Authorization": "Bearer " + cls.get_token(log_type),
  84. "Content-Type": "application/json; charset=utf-8"
  85. }
  86. params = {
  87. "extFields": "protectedRange", # 额外返回的字段,extFields=protectedRange时返回保护行列信息
  88. "user_id_type": "open_id" # 返回的用户id类型,可选open_id,union_id
  89. }
  90. try:
  91. urllib3.disable_warnings()
  92. r = requests.get(url=get_metainfo_url, headers=headers, params=params, proxies=proxies, verify=False)
  93. response = json.loads(r.content.decode("utf8"))
  94. return response
  95. except Exception as e:
  96. Common.logger(log_type).error("获取表格元数据异常:{}", e)
  97. # 读取工作表中所有数据
  98. @classmethod
  99. def get_values_batch(cls, log_type, crawler, sheetid):
  100. """
  101. 读取工作表中所有数据
  102. :param log_type: 启用哪个 log
  103. :param crawler: 哪个爬虫
  104. :param sheetid: 哪张表
  105. :return: 所有数据
  106. """
  107. get_values_batch_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  108. + cls.spreadsheettoken(crawler) + "/values_batch_get"
  109. headers = {
  110. "Authorization": "Bearer " + cls.get_token(log_type),
  111. "Content-Type": "application/json; charset=utf-8"
  112. }
  113. params = {
  114. # 多个查询范围 如 url?ranges=range1,range2 ,其中 range 包含 sheetId 与单元格范围两部分
  115. "ranges": sheetid,
  116. # valueRenderOption=ToString 可返回纯文本的值(数值类型除外);
  117. # valueRenderOption=FormattedValue 计算并格式化单元格;
  118. # valueRenderOption=Formula单元格中含有公式时返回公式本身;
  119. # valueRenderOption=UnformattedValue计算但不对单元格进行格式化
  120. "valueRenderOption": "ToString",
  121. # dateTimeRenderOption=FormattedString 计算并将时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
  122. "dateTimeRenderOption": "",
  123. # 返回的用户id类型,可选open_id,union_id
  124. "user_id_type": "open_id"
  125. }
  126. try:
  127. urllib3.disable_warnings()
  128. r = requests.get(url=get_values_batch_url, headers=headers, params=params, proxies=proxies, verify=False)
  129. # print(r.text)
  130. response = json.loads(r.content.decode("utf8"))
  131. values = response["data"]["valueRanges"][0]["values"]
  132. return values
  133. except Exception as e:
  134. Common.logger(log_type).error("读取工作表所有数据异常:{}", e)
  135. # 工作表,插入行或列
  136. @classmethod
  137. def insert_columns(cls, log_type, crawler, sheetid, majordimension, startindex, endindex):
  138. """
  139. 工作表插入行或列
  140. :param log_type: 日志路径
  141. :param crawler: 哪个爬虫的云文档
  142. :param sheetid:哪张工作表
  143. :param majordimension:行或者列, ROWS、COLUMNS
  144. :param startindex:开始位置
  145. :param endindex:结束位置
  146. """
  147. insert_columns_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  148. + cls.spreadsheettoken(crawler) + "/insert_dimension_range"
  149. headers = {
  150. "Authorization": "Bearer " + cls.get_token(log_type),
  151. "Content-Type": "application/json; charset=utf-8"
  152. }
  153. body = {
  154. "dimension": {
  155. "sheetId": sheetid,
  156. "majorDimension": majordimension, # 默认 ROWS ,可选 ROWS、COLUMNS
  157. "startIndex": startindex, # 开始的位置
  158. "endIndex": endindex # 结束的位置
  159. },
  160. "inheritStyle": "AFTER" # BEFORE 或 AFTER,不填为不继承 style
  161. }
  162. try:
  163. urllib3.disable_warnings()
  164. r = requests.post(url=insert_columns_url, headers=headers, json=body, proxies=proxies, verify=False)
  165. Common.logger(log_type).info("插入行或列:{}", r.json()["msg"])
  166. except Exception as e:
  167. Common.logger(log_type).error("插入行或列异常:{}", e)
  168. # 写入数据
  169. @classmethod
  170. def update_values(cls, log_type, crawler, sheetid, ranges, values):
  171. """
  172. 写入数据
  173. :param log_type: 日志路径
  174. :param crawler: 哪个爬虫的云文档
  175. :param sheetid:哪张工作表
  176. :param ranges:单元格范围
  177. :param values:写入的具体数据,list
  178. """
  179. update_values_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  180. + cls.spreadsheettoken(crawler) + "/values_batch_update"
  181. headers = {
  182. "Authorization": "Bearer " + cls.get_token(log_type),
  183. "Content-Type": "application/json; charset=utf-8"
  184. }
  185. body = {
  186. "valueRanges": [
  187. {
  188. "range": sheetid + "!" + ranges,
  189. "values": values
  190. },
  191. ],
  192. }
  193. try:
  194. urllib3.disable_warnings()
  195. r = requests.post(url=update_values_url, headers=headers, json=body, proxies=proxies, verify=False)
  196. Common.logger(log_type).info("写入数据:{}", r.json()["msg"])
  197. except Exception as e:
  198. Common.logger(log_type).error("写入数据异常:{}", e)
  199. # 合并单元格
  200. @classmethod
  201. def merge_cells(cls, log_type, crawler, sheetid, ranges):
  202. """
  203. 合并单元格
  204. :param log_type: 日志路径
  205. :param crawler: 哪个爬虫
  206. :param sheetid:哪张工作表
  207. :param ranges:需要合并的单元格范围
  208. """
  209. merge_cells_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  210. + cls.spreadsheettoken(crawler) + "/merge_cells"
  211. headers = {
  212. "Authorization": "Bearer " + cls.get_token(log_type),
  213. "Content-Type": "application/json; charset=utf-8"
  214. }
  215. body = {
  216. "range": sheetid + "!" + ranges,
  217. "mergeType": "MERGE_ROWS"
  218. }
  219. try:
  220. urllib3.disable_warnings()
  221. r = requests.post(url=merge_cells_url, headers=headers, json=body, proxies=proxies, verify=False)
  222. Common.logger(log_type).info("合并单元格:{}", r.json()["msg"])
  223. except Exception as e:
  224. Common.logger(log_type).error("合并单元格异常:{}", e)
  225. # 读取单元格数据
  226. @classmethod
  227. def get_range_value(cls, log_type, crawler, sheetid, cell):
  228. """
  229. 读取单元格内容
  230. :param log_type: 日志路径
  231. :param crawler: 哪个爬虫
  232. :param sheetid: 哪张工作表
  233. :param cell: 哪个单元格
  234. :return: 单元格内容
  235. """
  236. get_range_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  237. + cls.spreadsheettoken(crawler) + "/values/" + sheetid + "!" + cell
  238. headers = {
  239. "Authorization": "Bearer " + cls.get_token(log_type),
  240. "Content-Type": "application/json; charset=utf-8"
  241. }
  242. params = {
  243. # valueRenderOption=ToString 可返回纯文本的值(数值类型除外);
  244. # valueRenderOption=FormattedValue 计算并格式化单元格;
  245. # valueRenderOption=Formula 单元格中含有公式时返回公式本身;
  246. # valueRenderOption=UnformattedValue 计算但不对单元格进行格式化。
  247. "valueRenderOption": "FormattedValue",
  248. # dateTimeRenderOption=FormattedString 计算并对时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
  249. "dateTimeRenderOption": "",
  250. # 返回的用户id类型,可选open_id,union_id
  251. "user_id_type": "open_id"
  252. }
  253. try:
  254. urllib3.disable_warnings()
  255. r = requests.get(url=get_range_value_url, headers=headers, params=params, proxies=proxies, verify=False)
  256. # print(r.text)
  257. return r.json()["data"]["valueRange"]["values"][0]
  258. except Exception as e:
  259. Common.logger(log_type).error("读取单元格数据异常:{}", e)
  260. # 删除行或列,可选 ROWS、COLUMNS
  261. @classmethod
  262. def dimension_range(cls, log_type, crawler, sheetid, major_dimension, startindex, endindex):
  263. """
  264. 删除行或列
  265. :param log_type: 日志路径
  266. :param crawler: 哪个爬虫
  267. :param sheetid:工作表
  268. :param major_dimension:默认 ROWS ,可选 ROWS、COLUMNS
  269. :param startindex:开始的位置
  270. :param endindex:结束的位置
  271. :return:
  272. """
  273. dimension_range_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  274. + cls.spreadsheettoken(crawler) + "/dimension_range"
  275. headers = {
  276. "Authorization": "Bearer " + cls.get_token(log_type),
  277. "Content-Type": "application/json; charset=utf-8"
  278. }
  279. body = {
  280. "dimension": {
  281. "sheetId": sheetid,
  282. "majorDimension": major_dimension,
  283. "startIndex": startindex,
  284. "endIndex": endindex
  285. }
  286. }
  287. try:
  288. urllib3.disable_warnings()
  289. r = requests.delete(url=dimension_range_url, headers=headers, json=body, proxies=proxies, verify=False)
  290. Common.logger(log_type).info("删除视频数据:{}", r.json()["msg"])
  291. except Exception as e:
  292. Common.logger(log_type).error("删除视频数据异常:{}", e)
  293. if __name__ == "__main__":
  294. pass