feishu.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/1/31
  4. """
  5. 飞书表配置: token 鉴权 / 增删改查 / 机器人报警
  6. """
  7. import json
  8. import os
  9. import sys
  10. import requests
  11. import urllib3
  12. sys.path.append(os.getcwd())
  13. from common.common import Common
  14. proxies = {"http": None, "https": None}
  15. class Feishu:
  16. """
  17. 编辑飞书云文档
  18. """
  19. # 看一看爬虫数据表
  20. kanyikan_url = "https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?"
  21. # 快手爬虫数据表
  22. kuaishou_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnICEfaw9llDNQkKgdymM1xf?"
  23. # 微视爬虫数据表
  24. weishi_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?"
  25. # 小年糕爬虫数据表
  26. xiaoniangao_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?"
  27. # 音乐相册
  28. music_album = "https://w42nne6hzg.feishu.cn/sheets/shtcnT6zvmfsYe1g0iv4pt7855g?"
  29. # 本山祝福数据表
  30. crawler_benshanzhufu = "https://w42nne6hzg.feishu.cn/sheets/shtcnGh2rrsPYM4iVNEBO7OqWrb?"
  31. # 公众号爬虫表
  32. gzh_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnexNXnpDLHhARw0QdiwbYuA?"
  33. # 数据监控表
  34. crawler_monitor = "https://w42nne6hzg.feishu.cn/sheets/shtcnlZWYazInhf7Z60jkbLRJyd?"
  35. # 微群视频爬虫表
  36. crawler_weiqun_video = "https://w42nne6hzg.feishu.cn/sheets/shtcnoKThNquYRweaylMFVyo9Hc?"
  37. # 视频号爬虫表
  38. crawler_shipinhao = 'https://w42nne6hzg.feishu.cn/sheets/shtcn9rOdZRAGFbRkWpn7hqEHGc?'
  39. # 西瓜视频
  40. crawler_xigua = 'https://w42nne6hzg.feishu.cn/sheets/shtcnvOpx2P8vBXiV91Ot1MKIw8?'
  41. # 知乎 PC 端
  42. crawler_zhihu = 'https://w42nne6hzg.feishu.cn/sheets/shtcnkGPBmGsjaqapgzouuj8MXe?'
  43. # 吉祥幸福
  44. crawler_jixiangxingfu = 'https://w42nne6hzg.feishu.cn/sheets/shtcnSx4nafMbLTq7xl7RHBwHBf?'
  45. # 众妙音信
  46. crawler_zmyx = 'https://w42nne6hzg.feishu.cn/sheets/shtcnbZIxstPeM0xshW07b26sve?'
  47. # 岁岁年年迎福气
  48. crawler_ssnnyfq = 'https://w42nne6hzg.feishu.cn/sheets/shtcnyJmJSJynHDLLbLTkySfvZe?'
  49. # 祝福猫视频
  50. crawler_zhufumao = 'https://w42nne6hzg.feishu.cn/sheets/shtcnXfIJthvkjhI5zlEJq84i6g?'
  51. # 宗教公众号
  52. crawler_zongjiao = 'https://w42nne6hzg.feishu.cn/sheets/shtcn73NW0CyoOeF21HWO15KBsb?'
  53. # 好看视频
  54. crawler_haokan = 'https://w42nne6hzg.feishu.cn/sheets/shtcnaYz8Nhv8q6DbWtlL6rMEBd'
  55. # 看到就是福气
  56. crawler_kandaojiushifuqi = 'https://w42nne6hzg.feishu.cn/sheets/shtcnEokBkIjOUPAk8vbbPKnXgb'
  57. # 胜胜影音
  58. crawler_shengshengyingyin = 'https://w42nne6hzg.feishu.cn/sheets/shtcnz1ymxHL1u8WHblfqfys7qe'
  59. # 刚刚都传
  60. crawler_ganggangdouchuan = 'https://w42nne6hzg.feishu.cn/sheets/shtcnTuJgeZU2bc7VaesAqk3QJx'
  61. # 公众号_信欣
  62. crawler_gongzhonghao = 'https://w42nne6hzg.feishu.cn/sheets/shtcna98M2mX7TbivTj9Sb7WKBN?'
  63. # YouTube
  64. crawler_youtube = 'https://w42nne6hzg.feishu.cn/sheets/shtcnrLyr1zbYbhhZyqpN7Xrd5f?'
  65. # 微信指数
  66. weixinzhishu = 'https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?'
  67. # 微信指数_搜索词
  68. weixinzhishu_search_word = 'https://w42nne6hzg.feishu.cn/sheets/shtcnHxCj6dZBYMuK1Q3tIJVlqg?'
  69. # 手机号
  70. wangkun = "13426262515"
  71. gaonannan = "18501180073"
  72. xinxin = "15546206651"
  73. huxinxue = "18832292015"
  74. # 飞书路径token
  75. @classmethod
  76. def spreadsheettoken(cls, crawler):
  77. """
  78. :param crawler: 哪个爬虫
  79. """
  80. if crawler == "kanyikan":
  81. return "shtcngRPoDYAi24x52j2nDuHMih"
  82. elif crawler == "kuaishou":
  83. return "shtcnICEfaw9llDNQkKgdymM1xf"
  84. elif crawler == "weishi":
  85. return "shtcn5YSWg91JfVGzj0SFZIRRPh"
  86. elif crawler == "xiaoniangao":
  87. return "shtcnYxiyQ1wLklo1W5Kdqc9cGh"
  88. elif crawler == "monitor":
  89. return "shtcnlZWYazInhf7Z60jkbLRJyd"
  90. elif crawler == "music_album":
  91. return "shtcnT6zvmfsYe1g0iv4pt7855g"
  92. elif crawler == "bszf":
  93. return "shtcnGh2rrsPYM4iVNEBO7OqWrb"
  94. elif crawler == "gzh":
  95. return "shtcnexNXnpDLHhARw0QdiwbYuA"
  96. elif crawler == "weiqun":
  97. return "shtcnoKThNquYRweaylMFVyo9Hc"
  98. elif crawler == 'shipinhao':
  99. return 'shtcn9rOdZRAGFbRkWpn7hqEHGc'
  100. elif crawler == 'xigua':
  101. return 'shtcnvOpx2P8vBXiV91Ot1MKIw8'
  102. elif crawler == 'zhihu':
  103. return 'shtcnkGPBmGsjaqapgzouuj8MXe'
  104. elif crawler == 'jxxf':
  105. return 'shtcnSx4nafMbLTq7xl7RHBwHBf'
  106. elif crawler == 'zmyx':
  107. return 'shtcnbZIxstPeM0xshW07b26sve'
  108. elif crawler == 'ssnnyfq':
  109. return 'shtcnyJmJSJynHDLLbLTkySfvZe'
  110. elif crawler == 'zhufumao':
  111. return 'shtcnXfIJthvkjhI5zlEJq84i6g'
  112. elif crawler == 'zongjiao':
  113. return 'shtcn73NW0CyoOeF21HWO15KBsb'
  114. elif crawler == 'haokan':
  115. return 'shtcnaYz8Nhv8q6DbWtlL6rMEBd'
  116. elif crawler == 'kdjsfq':
  117. return 'shtcnEokBkIjOUPAk8vbbPKnXgb'
  118. elif crawler == 'ssyy':
  119. return 'shtcnz1ymxHL1u8WHblfqfys7qe'
  120. elif crawler == 'ggdc':
  121. return 'shtcnTuJgeZU2bc7VaesAqk3QJx'
  122. elif crawler == 'gongzhonghao_xinxin':
  123. return 'shtcna98M2mX7TbivTj9Sb7WKBN'
  124. elif crawler == 'youtube':
  125. return 'shtcnrLyr1zbYbhhZyqpN7Xrd5f'
  126. elif crawler == 'weixinzhishu':
  127. return 'shtcnqhMRUGunIfGnGXMOBYiy4K'
  128. elif crawler == 'weixinzhishu_search_word':
  129. return 'shtcnHxCj6dZBYMuK1Q3tIJVlqg'
  130. # 获取飞书api token
  131. @classmethod
  132. def get_token(cls, log_type, crawler):
  133. """
  134. 获取飞书api token
  135. :return:
  136. """
  137. url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
  138. post_data = {"app_id": "cli_a13ad2afa438d00b", # 这里账号密码是发布应用的后台账号及密码
  139. "app_secret": "4tK9LY9VbiQlY5umhE42dclBFo6t4p5O"}
  140. try:
  141. urllib3.disable_warnings()
  142. response = requests.post(url=url, data=post_data, proxies=proxies, verify=False)
  143. tenant_access_token = response.json()["tenant_access_token"]
  144. return tenant_access_token
  145. except Exception as e:
  146. Common.logger(log_type, crawler).error("获取飞书 api token 异常:{}", e)
  147. # 获取表格元数据
  148. @classmethod
  149. def get_metainfo(cls, log_type, crawler):
  150. """
  151. 获取表格元数据
  152. :return:
  153. """
  154. get_metainfo_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  155. + cls.spreadsheettoken(crawler) + "/metainfo"
  156. headers = {
  157. "Authorization": "Bearer " + cls.get_token(log_type, crawler),
  158. "Content-Type": "application/json; charset=utf-8"
  159. }
  160. params = {
  161. "extFields": "protectedRange", # 额外返回的字段,extFields=protectedRange时返回保护行列信息
  162. "user_id_type": "open_id" # 返回的用户id类型,可选open_id,union_id
  163. }
  164. try:
  165. urllib3.disable_warnings()
  166. r = requests.get(url=get_metainfo_url, headers=headers, params=params, proxies=proxies, verify=False)
  167. response = json.loads(r.content.decode("utf8"))
  168. return response
  169. except Exception as e:
  170. Common.logger(log_type, crawler).error("获取表格元数据异常:{}", e)
  171. # 读取工作表中所有数据
  172. @classmethod
  173. def get_values_batch(cls, log_type, crawler, sheetid):
  174. """
  175. 读取工作表中所有数据
  176. :param log_type: 启用哪个 log
  177. :param crawler: 哪个爬虫
  178. :param sheetid: 哪张表
  179. :return: 所有数据
  180. """
  181. get_values_batch_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  182. + cls.spreadsheettoken(crawler) + "/values_batch_get"
  183. headers = {
  184. "Authorization": "Bearer " + cls.get_token(log_type, crawler),
  185. "Content-Type": "application/json; charset=utf-8"
  186. }
  187. params = {
  188. # 多个查询范围 如 url?ranges=range1,range2 ,其中 range 包含 sheetId 与单元格范围两部分
  189. "ranges": sheetid,
  190. # valueRenderOption=ToString 可返回纯文本的值(数值类型除外);
  191. # valueRenderOption=FormattedValue 计算并格式化单元格;
  192. # valueRenderOption=Formula单元格中含有公式时返回公式本身;
  193. # valueRenderOption=UnformattedValue计算但不对单元格进行格式化
  194. "valueRenderOption": "ToString",
  195. # dateTimeRenderOption=FormattedString 计算并将时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
  196. "dateTimeRenderOption": "",
  197. # 返回的用户id类型,可选open_id,union_id
  198. "user_id_type": "open_id"
  199. }
  200. try:
  201. urllib3.disable_warnings()
  202. r = requests.get(url=get_values_batch_url, headers=headers, params=params, proxies=proxies, verify=False)
  203. # print(r.text)
  204. response = json.loads(r.content.decode("utf8"))
  205. values = response["data"]["valueRanges"][0]["values"]
  206. return values
  207. except Exception as e:
  208. Common.logger(log_type, crawler).error("读取工作表所有数据异常:{}", e)
  209. # 工作表,插入行或列
  210. @classmethod
  211. def insert_columns(cls, log_type, crawler, sheetid, majordimension, startindex, endindex):
  212. """
  213. 工作表插入行或列
  214. :param log_type: 日志路径
  215. :param crawler: 哪个爬虫的云文档
  216. :param sheetid:哪张工作表
  217. :param majordimension:行或者列, ROWS、COLUMNS
  218. :param startindex:开始位置
  219. :param endindex:结束位置
  220. """
  221. insert_columns_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  222. + cls.spreadsheettoken(crawler) + "/insert_dimension_range"
  223. headers = {
  224. "Authorization": "Bearer " + cls.get_token(log_type, crawler),
  225. "Content-Type": "application/json; charset=utf-8"
  226. }
  227. body = {
  228. "dimension": {
  229. "sheetId": sheetid,
  230. "majorDimension": majordimension, # 默认 ROWS ,可选 ROWS、COLUMNS
  231. "startIndex": startindex, # 开始的位置
  232. "endIndex": endindex # 结束的位置
  233. },
  234. "inheritStyle": "AFTER" # BEFORE 或 AFTER,不填为不继承 style
  235. }
  236. try:
  237. urllib3.disable_warnings()
  238. r = requests.post(url=insert_columns_url, headers=headers, json=body, proxies=proxies, verify=False)
  239. Common.logger(log_type, crawler).info("插入行或列:{}", r.json()["msg"])
  240. except Exception as e:
  241. Common.logger(log_type, crawler).error("插入行或列异常:{}", e)
  242. # 写入数据
  243. @classmethod
  244. def update_values(cls, log_type, crawler, sheetid, ranges, values):
  245. """
  246. 写入数据
  247. :param log_type: 日志路径
  248. :param crawler: 哪个爬虫的云文档
  249. :param sheetid:哪张工作表
  250. :param ranges:单元格范围
  251. :param values:写入的具体数据,list
  252. """
  253. update_values_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  254. + cls.spreadsheettoken(crawler) + "/values_batch_update"
  255. headers = {
  256. "Authorization": "Bearer " + cls.get_token(log_type, crawler),
  257. "Content-Type": "application/json; charset=utf-8"
  258. }
  259. body = {
  260. "valueRanges": [
  261. {
  262. "range": sheetid + "!" + ranges,
  263. "values": values
  264. },
  265. ],
  266. }
  267. try:
  268. urllib3.disable_warnings()
  269. r = requests.post(url=update_values_url, headers=headers, json=body, proxies=proxies, verify=False)
  270. Common.logger(log_type, crawler).info("写入数据:{}", r.json()["msg"])
  271. except Exception as e:
  272. Common.logger(log_type, crawler).error("写入数据异常:{}", e)
  273. # 合并单元格
  274. @classmethod
  275. def merge_cells(cls, log_type, crawler, sheetid, ranges):
  276. """
  277. 合并单元格
  278. :param log_type: 日志路径
  279. :param crawler: 哪个爬虫
  280. :param sheetid:哪张工作表
  281. :param ranges:需要合并的单元格范围
  282. """
  283. merge_cells_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  284. + cls.spreadsheettoken(crawler) + "/merge_cells"
  285. headers = {
  286. "Authorization": "Bearer " + cls.get_token(log_type, crawler),
  287. "Content-Type": "application/json; charset=utf-8"
  288. }
  289. body = {
  290. "range": sheetid + "!" + ranges,
  291. "mergeType": "MERGE_ROWS"
  292. }
  293. try:
  294. urllib3.disable_warnings()
  295. r = requests.post(url=merge_cells_url, headers=headers, json=body, proxies=proxies, verify=False)
  296. Common.logger(log_type, crawler).info("合并单元格:{}", r.json()["msg"])
  297. except Exception as e:
  298. Common.logger(log_type, crawler).error("合并单元格异常:{}", e)
  299. # 读取单元格数据
  300. @classmethod
  301. def get_range_value(cls, log_type, crawler, sheetid, cell):
  302. """
  303. 读取单元格内容
  304. :param log_type: 日志路径
  305. :param crawler: 哪个爬虫
  306. :param sheetid: 哪张工作表
  307. :param cell: 哪个单元格
  308. :return: 单元格内容
  309. """
  310. get_range_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  311. + cls.spreadsheettoken(crawler) + "/values/" + sheetid + "!" + cell
  312. headers = {
  313. "Authorization": "Bearer " + cls.get_token(log_type, crawler),
  314. "Content-Type": "application/json; charset=utf-8"
  315. }
  316. params = {
  317. # valueRenderOption=ToString 可返回纯文本的值(数值类型除外);
  318. # valueRenderOption=FormattedValue 计算并格式化单元格;
  319. # valueRenderOption=Formula 单元格中含有公式时返回公式本身;
  320. # valueRenderOption=UnformattedValue 计算但不对单元格进行格式化。
  321. "valueRenderOption": "FormattedValue",
  322. # dateTimeRenderOption=FormattedString 计算并对时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
  323. "dateTimeRenderOption": "",
  324. # 返回的用户id类型,可选open_id,union_id
  325. "user_id_type": "open_id"
  326. }
  327. try:
  328. urllib3.disable_warnings()
  329. r = requests.get(url=get_range_value_url, headers=headers, params=params, proxies=proxies, verify=False)
  330. # print(r.text)
  331. return r.json()["data"]["valueRange"]["values"][0]
  332. except Exception as e:
  333. Common.logger(log_type, crawler).error("读取单元格数据异常:{}", e)
  334. # 获取表内容
  335. @classmethod
  336. def get_sheet_content(cls, log_type, crawler, sheet_id):
  337. try:
  338. sheet = Feishu.get_values_batch(log_type, crawler, sheet_id)
  339. content_list = []
  340. for x in sheet:
  341. for y in x:
  342. if y is None:
  343. pass
  344. else:
  345. content_list.append(y)
  346. return content_list
  347. except Exception as e:
  348. Common.logger(log_type, crawler).error(f'get_sheet_content:{e}\n')
  349. # 删除行或列,可选 ROWS、COLUMNS
  350. @classmethod
  351. def dimension_range(cls, log_type, crawler, sheetid, major_dimension, startindex, endindex):
  352. """
  353. 删除行或列
  354. :param log_type: 日志路径
  355. :param crawler: 哪个爬虫
  356. :param sheetid:工作表
  357. :param major_dimension:默认 ROWS ,可选 ROWS、COLUMNS
  358. :param startindex:开始的位置
  359. :param endindex:结束的位置
  360. :return:
  361. """
  362. dimension_range_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
  363. + cls.spreadsheettoken(crawler) + "/dimension_range"
  364. headers = {
  365. "Authorization": "Bearer " + cls.get_token(log_type, crawler),
  366. "Content-Type": "application/json; charset=utf-8"
  367. }
  368. body = {
  369. "dimension": {
  370. "sheetId": sheetid,
  371. "majorDimension": major_dimension,
  372. "startIndex": startindex,
  373. "endIndex": endindex
  374. }
  375. }
  376. try:
  377. urllib3.disable_warnings()
  378. r = requests.delete(url=dimension_range_url, headers=headers, json=body, proxies=proxies, verify=False)
  379. Common.logger(log_type, crawler).info("删除视频数据:{}", r.json()["msg"])
  380. except Exception as e:
  381. Common.logger(log_type, crawler).error("删除视频数据异常:{}", e)
  382. # 获取用户 ID
  383. @classmethod
  384. def get_userid(cls, log_type, crawler, username):
  385. try:
  386. url = "https://open.feishu.cn/open-apis/user/v1/batch_get_id?"
  387. headers = {
  388. "Authorization": "Bearer " + cls.get_token(log_type, crawler),
  389. "Content-Type": "application/json; charset=utf-8"
  390. }
  391. if username == "wangkun":
  392. username = cls.wangkun
  393. elif username == "gaonannan":
  394. username = cls.gaonannan
  395. elif username == "xinxin":
  396. username = cls.xinxin
  397. elif username == "huxinxue":
  398. username = cls.huxinxue
  399. data = {"mobiles": [username]}
  400. urllib3.disable_warnings()
  401. r = requests.get(url=url, headers=headers, params=data, verify=False, proxies=proxies)
  402. open_id = r.json()["data"]["mobile_users"][username][0]["open_id"]
  403. Common.logger(log_type, crawler).info("{}:{}", username, open_id)
  404. # print(f"{username}:{open_id}")
  405. return open_id
  406. except Exception as e:
  407. Common.logger(log_type, crawler).error("get_userid异常:{}", e)
  408. # 飞书机器人
  409. @classmethod
  410. def bot(cls, log_type, crawler, content):
  411. try:
  412. url = "https://open.feishu.cn/open-apis/bot/v2/hook/96989577-50e7-4653-9ec2-308fe3f2c5fe"
  413. headers = {
  414. 'Content-Type': 'application/json'
  415. }
  416. data = json.dumps({
  417. "msg_type": "interactive",
  418. "card": {
  419. "config": {
  420. "wide_screen_mode": True,
  421. "enable_forward": True
  422. },
  423. "elements": [{
  424. "tag": "div",
  425. "text": {
  426. "content": "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at>\n" + content,
  427. "tag": "lark_md"
  428. }
  429. }, {
  430. "actions": [{
  431. "tag": "button",
  432. "text": {
  433. "content": "快手爬虫表",
  434. "tag": "lark_md"
  435. },
  436. "url": "https://w42nne6hzg.feishu.cn/sheets/shtcnICEfaw9llDNQkKgdymM1xf",
  437. "type": "default",
  438. "value": {}
  439. },
  440. {
  441. "tag": "button",
  442. "text": {
  443. "content": "快手Jenkins",
  444. "tag": "lark_md"
  445. },
  446. "url": "https://jenkins-on.yishihui.com/view/%E7%88%AC%E8%99%AB-Spider/job/%E5%BF%"
  447. "AB%E6%89%8B%E5%B0%8F%E7%A8%8B%E5%BA%8F-%E8%A7%86%E9%A2%91%E7%88%AC%E5%8F%96/",
  448. "type": "default",
  449. "value": {}
  450. }
  451. ],
  452. "tag": "action"
  453. }],
  454. "header": {
  455. "title": {
  456. "content": "📣有新的报警,请注意查处",
  457. "tag": "plain_text"
  458. }
  459. }
  460. }
  461. })
  462. urllib3.disable_warnings()
  463. r = requests.post(url, headers=headers, data=data, verify=False, proxies=proxies)
  464. Common.logger(log_type, crawler).info("触发机器人消息:{}, {}", r, r.json()["StatusMessage"])
  465. except Exception as e:
  466. Common.logger(log_type, crawler).error("bot异常:{}", e)
  467. if __name__ == "__main__":
  468. pass