feishu_lib.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. # @Author: wangkun
  2. # @Time: 3月 08, 2022
  3. import json
  4. import requests
  5. import urllib3
  6. from crawler_shipinhao.main.common import Common
  7. proxies = {"http": None, "https": None}
  8. class Feishu:
  9. """
  10. 编辑飞书云文档
  11. """
  12. # 看一看爬虫数据表
  13. kanyikan_url = "https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?"
  14. # 快手爬虫数据表
  15. kuaishou_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?"
  16. # 微视爬虫数据表
  17. weishi_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?"
  18. # 小年糕爬虫数据表
  19. xiaoniangao_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?"
  20. # 数据监控表
  21. crawler_monitor = "https://w42nne6hzg.feishu.cn/sheets/shtcnlZWYazInhf7Z60jkbLRJyd?"
  22. # 本山祝福数据表
  23. crawler_benshanzhufu = "https://w42nne6hzg.feishu.cn/sheets/shtcnGh2rrsPYM4iVNEBO7OqWrb?"
  24. # 公众号爬虫表
  25. gzh_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnexNXnpDLHhARw0QdiwbYuA?"
  26. # 视频号表
  27. shipinhao_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcn9rOdZRAGFbRkWpn7hqEHGc?'
  28. # 飞书路径token
  29. @classmethod
  30. def spreadsheettoken(cls, crawler):
  31. """
  32. :param crawler: 哪个爬虫
  33. """
  34. if crawler == "kanyikan":
  35. return "shtcngRPoDYAi24x52j2nDuHMih"
  36. elif crawler == "kuaishou":
  37. return "shtcnp4SaJt37q6OOOrYzPMjQkg"
  38. elif crawler == "weishi":
  39. return "shtcn5YSWg91JfVGzj0SFZIRRPh"
  40. elif crawler == "xiaoniangao":
  41. return "shtcnYxiyQ1wLklo1W5Kdqc9cGh"
  42. elif crawler == "monitor":
  43. return "shtcnlZWYazInhf7Z60jkbLRJyd"
  44. elif crawler == "bszf":
  45. return "shtcnGh2rrsPYM4iVNEBO7OqWrb"
  46. elif crawler == "gzh":
  47. return "shtcnexNXnpDLHhARw0QdiwbYuA"
  48. elif crawler == "shipinhao":
  49. return "shtcn9rOdZRAGFbRkWpn7hqEHGc"
  50. # 获取飞书api token
  51. @classmethod
  52. def get_token(cls, log_type):
  53. """
  54. 获取飞书api token
  55. :return:
  56. """
  57. url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
  58. post_data = {"app_id": "cli_a13ad2afa438d00b", # 这里账号密码是发布应用的后台账号及密码
  59. "app_secret": "4tK9LY9VbiQlY5umhE42dclBFo6t4p5O"}
  60. try:
  61. urllib3.disable_warnings()
  62. response = requests.post(url=url, data=post_data, proxies=proxies, verify=False)
  63. tenant_access_token = response.json()["tenant_access_token"]
  64. return tenant_access_token
  65. except Exception as e:
  66. Common.logger(log_type).error("获取飞书 api token 异常:{}", e)
  67. # 获取表格元数据
  68. @classmethod
  69. def get_metainfo(cls, log_type, crawler):
  70. """
  71. 获取表格元数据
  72. :return:
  73. """
  74. get_metainfo_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  75. + cls.spreadsheettoken(crawler) + "/metainfo"
  76. headers = {
  77. "Authorization": "Bearer " + cls.get_token(log_type),
  78. "Content-Type": "application/json; charset=utf-8"
  79. }
  80. params = {
  81. "extFields": "protectedRange", # 额外返回的字段,extFields=protectedRange时返回保护行列信息
  82. "user_id_type": "open_id" # 返回的用户id类型,可选open_id,union_id
  83. }
  84. try:
  85. urllib3.disable_warnings()
  86. r = requests.get(url=get_metainfo_url, headers=headers, params=params, proxies=proxies, verify=False)
  87. response = json.loads(r.content.decode("utf8"))
  88. return response
  89. except Exception as e:
  90. Common.logger(log_type).error("获取表格元数据异常:{}", e)
  91. # 读取工作表中所有数据
  92. @classmethod
  93. def get_values_batch(cls, log_type, crawler, sheetid):
  94. """
  95. 读取工作表中所有数据
  96. :param log_type: 启用哪个 log
  97. :param crawler: 哪个爬虫
  98. :param sheetid: 哪张表
  99. :return: 所有数据
  100. """
  101. get_values_batch_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  102. + cls.spreadsheettoken(crawler) + "/values_batch_get"
  103. headers = {
  104. "Authorization": "Bearer " + cls.get_token(log_type),
  105. "Content-Type": "application/json; charset=utf-8"
  106. }
  107. params = {
  108. # 多个查询范围 如 url?ranges=range1,range2 ,其中 range 包含 sheetId 与单元格范围两部分
  109. "ranges": sheetid,
  110. # valueRenderOption=ToString 可返回纯文本的值(数值类型除外);
  111. # valueRenderOption=FormattedValue 计算并格式化单元格;
  112. # valueRenderOption=Formula单元格中含有公式时返回公式本身;
  113. # valueRenderOption=UnformattedValue计算但不对单元格进行格式化
  114. "valueRenderOption": "ToString",
  115. # dateTimeRenderOption=FormattedString 计算并将时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
  116. "dateTimeRenderOption": "",
  117. # 返回的用户id类型,可选open_id,union_id
  118. "user_id_type": "open_id"
  119. }
  120. try:
  121. urllib3.disable_warnings()
  122. r = requests.get(url=get_values_batch_url, headers=headers, params=params, proxies=proxies, verify=False)
  123. # print(r.text)
  124. response = json.loads(r.content.decode("utf8"))
  125. values = response["data"]["valueRanges"][0]["values"]
  126. return values
  127. except Exception as e:
  128. Common.logger(log_type).error("读取工作表所有数据异常:{}", e)
  129. # 工作表,插入行或列
  130. @classmethod
  131. def insert_columns(cls, log_type, crawler, sheetid, majordimension, startindex, endindex):
  132. """
  133. 工作表插入行或列
  134. :param log_type: 日志路径
  135. :param crawler: 哪个爬虫的云文档
  136. :param sheetid:哪张工作表
  137. :param majordimension:行或者列, ROWS、COLUMNS
  138. :param startindex:开始位置
  139. :param endindex:结束位置
  140. """
  141. insert_columns_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  142. + cls.spreadsheettoken(crawler) + "/insert_dimension_range"
  143. headers = {
  144. "Authorization": "Bearer " + cls.get_token(log_type),
  145. "Content-Type": "application/json; charset=utf-8"
  146. }
  147. body = {
  148. "dimension": {
  149. "sheetId": sheetid,
  150. "majorDimension": majordimension, # 默认 ROWS ,可选 ROWS、COLUMNS
  151. "startIndex": startindex, # 开始的位置
  152. "endIndex": endindex # 结束的位置
  153. },
  154. "inheritStyle": "AFTER" # BEFORE 或 AFTER,不填为不继承 style
  155. }
  156. try:
  157. urllib3.disable_warnings()
  158. r = requests.post(url=insert_columns_url, headers=headers, json=body, proxies=proxies, verify=False)
  159. Common.logger(log_type).info("插入行或列:{}", r.json()["msg"])
  160. except Exception as e:
  161. Common.logger(log_type).error("插入行或列异常:{}", e)
  162. # 写入数据
  163. @classmethod
  164. def update_values(cls, log_type, crawler, sheetid, ranges, values):
  165. """
  166. 写入数据
  167. :param log_type: 日志路径
  168. :param crawler: 哪个爬虫的云文档
  169. :param sheetid:哪张工作表
  170. :param ranges:单元格范围
  171. :param values:写入的具体数据,list
  172. """
  173. update_values_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  174. + cls.spreadsheettoken(crawler) + "/values_batch_update"
  175. headers = {
  176. "Authorization": "Bearer " + cls.get_token(log_type),
  177. "Content-Type": "application/json; charset=utf-8"
  178. }
  179. body = {
  180. "valueRanges": [
  181. {
  182. "range": sheetid + "!" + ranges,
  183. "values": values
  184. },
  185. ],
  186. }
  187. try:
  188. urllib3.disable_warnings()
  189. r = requests.post(url=update_values_url, headers=headers, json=body, proxies=proxies, verify=False)
  190. Common.logger(log_type).info("写入数据:{}", r.json()["msg"])
  191. except Exception as e:
  192. Common.logger(log_type).error("写入数据异常:{}", e)
  193. # 合并单元格
  194. @classmethod
  195. def merge_cells(cls, log_type, crawler, sheetid, ranges):
  196. """
  197. 合并单元格
  198. :param log_type: 日志路径
  199. :param crawler: 哪个爬虫
  200. :param sheetid:哪张工作表
  201. :param ranges:需要合并的单元格范围
  202. """
  203. merge_cells_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  204. + cls.spreadsheettoken(crawler) + "/merge_cells"
  205. headers = {
  206. "Authorization": "Bearer " + cls.get_token(log_type),
  207. "Content-Type": "application/json; charset=utf-8"
  208. }
  209. body = {
  210. "range": sheetid + "!" + ranges,
  211. "mergeType": "MERGE_ROWS"
  212. }
  213. try:
  214. urllib3.disable_warnings()
  215. r = requests.post(url=merge_cells_url, headers=headers, json=body, proxies=proxies, verify=False)
  216. Common.logger(log_type).info("合并单元格:{}", r.json()["msg"])
  217. except Exception as e:
  218. Common.logger(log_type).error("合并单元格异常:{}", e)
  219. # 读取单元格数据
  220. @classmethod
  221. def get_range_value(cls, log_type, crawler, sheetid, cell):
  222. """
  223. 读取单元格内容
  224. :param log_type: 日志路径
  225. :param crawler: 哪个爬虫
  226. :param sheetid: 哪张工作表
  227. :param cell: 哪个单元格
  228. :return: 单元格内容
  229. """
  230. get_range_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  231. + cls.spreadsheettoken(crawler) + "/values/" + sheetid + "!" + cell
  232. headers = {
  233. "Authorization": "Bearer " + cls.get_token(log_type),
  234. "Content-Type": "application/json; charset=utf-8"
  235. }
  236. params = {
  237. # valueRenderOption=ToString 可返回纯文本的值(数值类型除外);
  238. # valueRenderOption=FormattedValue 计算并格式化单元格;
  239. # valueRenderOption=Formula 单元格中含有公式时返回公式本身;
  240. # valueRenderOption=UnformattedValue 计算但不对单元格进行格式化。
  241. "valueRenderOption": "FormattedValue",
  242. # dateTimeRenderOption=FormattedString 计算并对时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
  243. "dateTimeRenderOption": "",
  244. # 返回的用户id类型,可选open_id,union_id
  245. "user_id_type": "open_id"
  246. }
  247. try:
  248. urllib3.disable_warnings()
  249. r = requests.get(url=get_range_value_url, headers=headers, params=params, proxies=proxies, verify=False)
  250. # print(r.text)
  251. return r.json()["data"]["valueRange"]["values"][0]
  252. except Exception as e:
  253. Common.logger(log_type).error("读取单元格数据异常:{}", e)
  254. # 删除行或列,可选 ROWS、COLUMNS
  255. @classmethod
  256. def dimension_range(cls, log_type, crawler, sheetid, major_dimension, startindex, endindex):
  257. """
  258. 删除行或列
  259. :param log_type: 日志路径
  260. :param crawler: 哪个爬虫
  261. :param sheetid:工作表
  262. :param major_dimension:默认 ROWS ,可选 ROWS、COLUMNS
  263. :param startindex:开始的位置
  264. :param endindex:结束的位置
  265. :return:
  266. """
  267. dimension_range_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  268. + cls.spreadsheettoken(crawler) + "/dimension_range"
  269. headers = {
  270. "Authorization": "Bearer " + cls.get_token(log_type),
  271. "Content-Type": "application/json; charset=utf-8"
  272. }
  273. body = {
  274. "dimension": {
  275. "sheetId": sheetid,
  276. "majorDimension": major_dimension,
  277. "startIndex": startindex,
  278. "endIndex": endindex
  279. }
  280. }
  281. try:
  282. urllib3.disable_warnings()
  283. r = requests.delete(url=dimension_range_url, headers=headers, json=body, proxies=proxies, verify=False)
  284. Common.logger(log_type).info("删除视频数据:{}", r.json()["msg"])
  285. except Exception as e:
  286. Common.logger(log_type).error("删除视频数据异常:{}", e)
  287. if __name__ == "__main__":
  288. print(Feishu.get_token('gzh'))
  289. pass