123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488 |
- # -*- coding: utf-8 -*-
- # @Author: wangkun
- # @Time: 2023/1/31
- """
- 飞书表配置: token 鉴权 / 增删改查 / 机器人报警
- """
- import json
- import os
- import sys
- import requests
- import urllib3
- sys.path.append(os.getcwd())
- from common.common import Common
- proxies = {"http": None, "https": None}
- class Feishu:
- """
- 编辑飞书云文档
- """
- # 看一看爬虫数据表
- kanyikan_url = "https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?"
- # 快手爬虫数据表
- kuaishou_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnICEfaw9llDNQkKgdymM1xf?"
- # 微视爬虫数据表
- weishi_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?"
- # 小年糕爬虫数据表
- xiaoniangao_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?"
- # 音乐相册
- music_album = "https://w42nne6hzg.feishu.cn/sheets/shtcnT6zvmfsYe1g0iv4pt7855g?"
- # 本山祝福数据表
- crawler_benshanzhufu = "https://w42nne6hzg.feishu.cn/sheets/shtcnGh2rrsPYM4iVNEBO7OqWrb?"
- # 公众号爬虫表
- gzh_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnexNXnpDLHhARw0QdiwbYuA?"
- # 数据监控表
- crawler_monitor = "https://w42nne6hzg.feishu.cn/sheets/shtcnlZWYazInhf7Z60jkbLRJyd?"
- # 微群视频爬虫表
- crawler_weiqun_video = "https://w42nne6hzg.feishu.cn/sheets/shtcnoKThNquYRweaylMFVyo9Hc?"
- # 视频号爬虫表
- crawler_shipinhao = 'https://w42nne6hzg.feishu.cn/sheets/shtcn9rOdZRAGFbRkWpn7hqEHGc?'
- # 西瓜视频
- crawler_xigua = 'https://w42nne6hzg.feishu.cn/sheets/shtcnvOpx2P8vBXiV91Ot1MKIw8?'
- # 知乎 PC 端
- crawler_zhihu = 'https://w42nne6hzg.feishu.cn/sheets/shtcnkGPBmGsjaqapgzouuj8MXe?'
- # 吉祥幸福
- crawler_jixiangxingfu = 'https://w42nne6hzg.feishu.cn/sheets/shtcnSx4nafMbLTq7xl7RHBwHBf?'
- # 众妙音信
- crawler_zmyx = 'https://w42nne6hzg.feishu.cn/sheets/shtcnbZIxstPeM0xshW07b26sve?'
- # 岁岁年年迎福气
- crawler_ssnnyfq = 'https://w42nne6hzg.feishu.cn/sheets/shtcnyJmJSJynHDLLbLTkySfvZe?'
- # 祝福猫视频
- crawler_zhufumao = 'https://w42nne6hzg.feishu.cn/sheets/shtcnXfIJthvkjhI5zlEJq84i6g?'
- # 宗教公众号
- crawler_zongjiao = 'https://w42nne6hzg.feishu.cn/sheets/shtcn73NW0CyoOeF21HWO15KBsb?'
- # 好看视频
- crawler_haokan = 'https://w42nne6hzg.feishu.cn/sheets/shtcnaYz8Nhv8q6DbWtlL6rMEBd'
- # 看到就是福气
- crawler_kandaojiushifuqi = 'https://w42nne6hzg.feishu.cn/sheets/shtcnEokBkIjOUPAk8vbbPKnXgb'
- # 胜胜影音
- crawler_shengshengyingyin = 'https://w42nne6hzg.feishu.cn/sheets/shtcnz1ymxHL1u8WHblfqfys7qe'
- # 刚刚都传
- crawler_ganggangdouchuan = 'https://w42nne6hzg.feishu.cn/sheets/shtcnTuJgeZU2bc7VaesAqk3QJx'
- # 公众号_信欣
- crawler_gongzhonghao = 'https://w42nne6hzg.feishu.cn/sheets/shtcna98M2mX7TbivTj9Sb7WKBN?'
- # YouTube
- crawler_youtube = 'https://w42nne6hzg.feishu.cn/sheets/shtcnrLyr1zbYbhhZyqpN7Xrd5f?'
- # 手机号
- wangkun = "13426262515"
- gaonannan = "18501180073"
- xinxin = "15546206651"
- huxinxue = "18832292015"
- # 飞书路径token
- @classmethod
- def spreadsheettoken(cls, crawler):
- """
- :param crawler: 哪个爬虫
- """
- if crawler == "kanyikan":
- return "shtcngRPoDYAi24x52j2nDuHMih"
- elif crawler == "kuaishou":
- return "shtcnICEfaw9llDNQkKgdymM1xf"
- elif crawler == "weishi":
- return "shtcn5YSWg91JfVGzj0SFZIRRPh"
- elif crawler == "xiaoniangao":
- return "shtcnYxiyQ1wLklo1W5Kdqc9cGh"
- elif crawler == "monitor":
- return "shtcnlZWYazInhf7Z60jkbLRJyd"
- elif crawler == "music_album":
- return "shtcnT6zvmfsYe1g0iv4pt7855g"
- elif crawler == "bszf":
- return "shtcnGh2rrsPYM4iVNEBO7OqWrb"
- elif crawler == "gzh":
- return "shtcnexNXnpDLHhARw0QdiwbYuA"
- elif crawler == "weiqun":
- return "shtcnoKThNquYRweaylMFVyo9Hc"
- elif crawler == 'shipinhao':
- return 'shtcn9rOdZRAGFbRkWpn7hqEHGc'
- elif crawler == 'xigua':
- return 'shtcnvOpx2P8vBXiV91Ot1MKIw8'
- elif crawler == 'zhihu':
- return 'shtcnkGPBmGsjaqapgzouuj8MXe'
- elif crawler == 'jxxf':
- return 'shtcnSx4nafMbLTq7xl7RHBwHBf'
- elif crawler == 'zmyx':
- return 'shtcnbZIxstPeM0xshW07b26sve'
- elif crawler == 'ssnnyfq':
- return 'shtcnyJmJSJynHDLLbLTkySfvZe'
- elif crawler == 'zhufumao':
- return 'shtcnXfIJthvkjhI5zlEJq84i6g'
- elif crawler == 'zongjiao':
- return 'shtcn73NW0CyoOeF21HWO15KBsb'
- elif crawler == 'haokan':
- return 'shtcnaYz8Nhv8q6DbWtlL6rMEBd'
- elif crawler == 'kdjsfq':
- return 'shtcnEokBkIjOUPAk8vbbPKnXgb'
- elif crawler == 'ssyy':
- return 'shtcnz1ymxHL1u8WHblfqfys7qe'
- elif crawler == 'ggdc':
- return 'shtcnTuJgeZU2bc7VaesAqk3QJx'
- elif crawler == 'gongzhonghao_xinxin':
- return 'shtcna98M2mX7TbivTj9Sb7WKBN'
- elif crawler == 'youtube':
- return 'shtcnrLyr1zbYbhhZyqpN7Xrd5f'
- # 获取飞书api token
- @classmethod
- def get_token(cls, log_type, crawler):
- """
- 获取飞书api token
- :return:
- """
- url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
- post_data = {"app_id": "cli_a13ad2afa438d00b", # 这里账号密码是发布应用的后台账号及密码
- "app_secret": "4tK9LY9VbiQlY5umhE42dclBFo6t4p5O"}
- try:
- urllib3.disable_warnings()
- response = requests.post(url=url, data=post_data, proxies=proxies, verify=False)
- tenant_access_token = response.json()["tenant_access_token"]
- return tenant_access_token
- except Exception as e:
- Common.logger(log_type, crawler).error("获取飞书 api token 异常:{}", e)
- # 获取表格元数据
- @classmethod
- def get_metainfo(cls, log_type, crawler):
- """
- 获取表格元数据
- :return:
- """
- get_metainfo_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
- + cls.spreadsheettoken(crawler) + "/metainfo"
- headers = {
- "Authorization": "Bearer " + cls.get_token(log_type, crawler),
- "Content-Type": "application/json; charset=utf-8"
- }
- params = {
- "extFields": "protectedRange", # 额外返回的字段,extFields=protectedRange时返回保护行列信息
- "user_id_type": "open_id" # 返回的用户id类型,可选open_id,union_id
- }
- try:
- urllib3.disable_warnings()
- r = requests.get(url=get_metainfo_url, headers=headers, params=params, proxies=proxies, verify=False)
- response = json.loads(r.content.decode("utf8"))
- return response
- except Exception as e:
- Common.logger(log_type, crawler).error("获取表格元数据异常:{}", e)
- # 读取工作表中所有数据
- @classmethod
- def get_values_batch(cls, log_type, crawler, sheetid):
- """
- 读取工作表中所有数据
- :param log_type: 启用哪个 log
- :param crawler: 哪个爬虫
- :param sheetid: 哪张表
- :return: 所有数据
- """
- get_values_batch_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
- + cls.spreadsheettoken(crawler) + "/values_batch_get"
- headers = {
- "Authorization": "Bearer " + cls.get_token(log_type, crawler),
- "Content-Type": "application/json; charset=utf-8"
- }
- params = {
- # 多个查询范围 如 url?ranges=range1,range2 ,其中 range 包含 sheetId 与单元格范围两部分
- "ranges": sheetid,
- # valueRenderOption=ToString 可返回纯文本的值(数值类型除外);
- # valueRenderOption=FormattedValue 计算并格式化单元格;
- # valueRenderOption=Formula单元格中含有公式时返回公式本身;
- # valueRenderOption=UnformattedValue计算但不对单元格进行格式化
- "valueRenderOption": "ToString",
- # dateTimeRenderOption=FormattedString 计算并将时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
- "dateTimeRenderOption": "",
- # 返回的用户id类型,可选open_id,union_id
- "user_id_type": "open_id"
- }
- try:
- urllib3.disable_warnings()
- r = requests.get(url=get_values_batch_url, headers=headers, params=params, proxies=proxies, verify=False)
- # print(r.text)
- response = json.loads(r.content.decode("utf8"))
- values = response["data"]["valueRanges"][0]["values"]
- return values
- except Exception as e:
- Common.logger(log_type, crawler).error("读取工作表所有数据异常:{}", e)
- # 工作表,插入行或列
- @classmethod
- def insert_columns(cls, log_type, crawler, sheetid, majordimension, startindex, endindex):
- """
- 工作表插入行或列
- :param log_type: 日志路径
- :param crawler: 哪个爬虫的云文档
- :param sheetid:哪张工作表
- :param majordimension:行或者列, ROWS、COLUMNS
- :param startindex:开始位置
- :param endindex:结束位置
- """
- insert_columns_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
- + cls.spreadsheettoken(crawler) + "/insert_dimension_range"
- headers = {
- "Authorization": "Bearer " + cls.get_token(log_type, crawler),
- "Content-Type": "application/json; charset=utf-8"
- }
- body = {
- "dimension": {
- "sheetId": sheetid,
- "majorDimension": majordimension, # 默认 ROWS ,可选 ROWS、COLUMNS
- "startIndex": startindex, # 开始的位置
- "endIndex": endindex # 结束的位置
- },
- "inheritStyle": "AFTER" # BEFORE 或 AFTER,不填为不继承 style
- }
- try:
- urllib3.disable_warnings()
- r = requests.post(url=insert_columns_url, headers=headers, json=body, proxies=proxies, verify=False)
- Common.logger(log_type, crawler).info("插入行或列:{}", r.json()["msg"])
- except Exception as e:
- Common.logger(log_type, crawler).error("插入行或列异常:{}", e)
- # 写入数据
- @classmethod
- def update_values(cls, log_type, crawler, sheetid, ranges, values):
- """
- 写入数据
- :param log_type: 日志路径
- :param crawler: 哪个爬虫的云文档
- :param sheetid:哪张工作表
- :param ranges:单元格范围
- :param values:写入的具体数据,list
- """
- update_values_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
- + cls.spreadsheettoken(crawler) + "/values_batch_update"
- headers = {
- "Authorization": "Bearer " + cls.get_token(log_type, crawler),
- "Content-Type": "application/json; charset=utf-8"
- }
- body = {
- "valueRanges": [
- {
- "range": sheetid + "!" + ranges,
- "values": values
- },
- ],
- }
- try:
- urllib3.disable_warnings()
- r = requests.post(url=update_values_url, headers=headers, json=body, proxies=proxies, verify=False)
- Common.logger(log_type, crawler).info("写入数据:{}", r.json()["msg"])
- except Exception as e:
- Common.logger(log_type, crawler).error("写入数据异常:{}", e)
- # 合并单元格
- @classmethod
- def merge_cells(cls, log_type, crawler, sheetid, ranges):
- """
- 合并单元格
- :param log_type: 日志路径
- :param crawler: 哪个爬虫
- :param sheetid:哪张工作表
- :param ranges:需要合并的单元格范围
- """
- merge_cells_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
- + cls.spreadsheettoken(crawler) + "/merge_cells"
- headers = {
- "Authorization": "Bearer " + cls.get_token(log_type, crawler),
- "Content-Type": "application/json; charset=utf-8"
- }
- body = {
- "range": sheetid + "!" + ranges,
- "mergeType": "MERGE_ROWS"
- }
- try:
- urllib3.disable_warnings()
- r = requests.post(url=merge_cells_url, headers=headers, json=body, proxies=proxies, verify=False)
- Common.logger(log_type, crawler).info("合并单元格:{}", r.json()["msg"])
- except Exception as e:
- Common.logger(log_type, crawler).error("合并单元格异常:{}", e)
- # 读取单元格数据
- @classmethod
- def get_range_value(cls, log_type, crawler, sheetid, cell):
- """
- 读取单元格内容
- :param log_type: 日志路径
- :param crawler: 哪个爬虫
- :param sheetid: 哪张工作表
- :param cell: 哪个单元格
- :return: 单元格内容
- """
- get_range_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
- + cls.spreadsheettoken(crawler) + "/values/" + sheetid + "!" + cell
- headers = {
- "Authorization": "Bearer " + cls.get_token(log_type, crawler),
- "Content-Type": "application/json; charset=utf-8"
- }
- params = {
- # valueRenderOption=ToString 可返回纯文本的值(数值类型除外);
- # valueRenderOption=FormattedValue 计算并格式化单元格;
- # valueRenderOption=Formula 单元格中含有公式时返回公式本身;
- # valueRenderOption=UnformattedValue 计算但不对单元格进行格式化。
- "valueRenderOption": "FormattedValue",
- # dateTimeRenderOption=FormattedString 计算并对时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
- "dateTimeRenderOption": "",
- # 返回的用户id类型,可选open_id,union_id
- "user_id_type": "open_id"
- }
- try:
- urllib3.disable_warnings()
- r = requests.get(url=get_range_value_url, headers=headers, params=params, proxies=proxies, verify=False)
- # print(r.text)
- return r.json()["data"]["valueRange"]["values"][0]
- except Exception as e:
- Common.logger(log_type, crawler).error("读取单元格数据异常:{}", e)
- # 获取表内容
- @classmethod
- def get_sheet_content(cls, log_type, crawler, sheet_id):
- try:
- sheet = Feishu.get_values_batch(log_type, crawler, sheet_id)
- content_list = []
- for x in sheet:
- for y in x:
- if y is None:
- pass
- else:
- content_list.append(y)
- return content_list
- except Exception as e:
- Common.logger(log_type, crawler).error(f'get_sheet_content:{e}\n')
- # 删除行或列,可选 ROWS、COLUMNS
- @classmethod
- def dimension_range(cls, log_type, crawler, sheetid, major_dimension, startindex, endindex):
- """
- 删除行或列
- :param log_type: 日志路径
- :param crawler: 哪个爬虫
- :param sheetid:工作表
- :param major_dimension:默认 ROWS ,可选 ROWS、COLUMNS
- :param startindex:开始的位置
- :param endindex:结束的位置
- :return:
- """
- dimension_range_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
- + cls.spreadsheettoken(crawler) + "/dimension_range"
- headers = {
- "Authorization": "Bearer " + cls.get_token(log_type, crawler),
- "Content-Type": "application/json; charset=utf-8"
- }
- body = {
- "dimension": {
- "sheetId": sheetid,
- "majorDimension": major_dimension,
- "startIndex": startindex,
- "endIndex": endindex
- }
- }
- try:
- urllib3.disable_warnings()
- r = requests.delete(url=dimension_range_url, headers=headers, json=body, proxies=proxies, verify=False)
- Common.logger(log_type, crawler).info("删除视频数据:{}", r.json()["msg"])
- except Exception as e:
- Common.logger(log_type, crawler).error("删除视频数据异常:{}", e)
- # 获取用户 ID
- @classmethod
- def get_userid(cls, log_type, crawler, username):
- try:
- url = "https://open.feishu.cn/open-apis/user/v1/batch_get_id?"
- headers = {
- "Authorization": "Bearer " + cls.get_token(log_type, crawler),
- "Content-Type": "application/json; charset=utf-8"
- }
- if username == "wangkun":
- username = cls.wangkun
- elif username == "gaonannan":
- username = cls.gaonannan
- elif username == "xinxin":
- username = cls.xinxin
- elif username == "huxinxue":
- username = cls.huxinxue
- data = {"mobiles": [username]}
- urllib3.disable_warnings()
- r = requests.get(url=url, headers=headers, params=data, verify=False, proxies=proxies)
- open_id = r.json()["data"]["mobile_users"][username][0]["open_id"]
- Common.logger(log_type, crawler).info("{}:{}", username, open_id)
- # print(f"{username}:{open_id}")
- return open_id
- except Exception as e:
- Common.logger(log_type, crawler).error("get_userid异常:{}", e)
- # 飞书机器人
- @classmethod
- def bot(cls, log_type, crawler, content):
- try:
- url = "https://open.feishu.cn/open-apis/bot/v2/hook/96989577-50e7-4653-9ec2-308fe3f2c5fe"
- headers = {
- 'Content-Type': 'application/json'
- }
- data = json.dumps({
- "msg_type": "interactive",
- "card": {
- "config": {
- "wide_screen_mode": True,
- "enable_forward": True
- },
- "elements": [{
- "tag": "div",
- "text": {
- "content": "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at>\n" + content,
- "tag": "lark_md"
- }
- }, {
- "actions": [{
- "tag": "button",
- "text": {
- "content": "快手爬虫表",
- "tag": "lark_md"
- },
- "url": "https://w42nne6hzg.feishu.cn/sheets/shtcnICEfaw9llDNQkKgdymM1xf",
- "type": "default",
- "value": {}
- },
- {
- "tag": "button",
- "text": {
- "content": "快手Jenkins",
- "tag": "lark_md"
- },
- "url": "https://jenkins-on.yishihui.com/view/%E7%88%AC%E8%99%AB-Spider/job/%E5%BF%"
- "AB%E6%89%8B%E5%B0%8F%E7%A8%8B%E5%BA%8F-%E8%A7%86%E9%A2%91%E7%88%AC%E5%8F%96/",
- "type": "default",
- "value": {}
- }
- ],
- "tag": "action"
- }],
- "header": {
- "title": {
- "content": "📣有新的报警,请注意查处",
- "tag": "plain_text"
- }
- }
- }
- })
- urllib3.disable_warnings()
- r = requests.post(url, headers=headers, data=data, verify=False, proxies=proxies)
- Common.logger(log_type, crawler).info("触发机器人消息:{}, {}", r, r.json()["StatusMessage"])
- except Exception as e:
- Common.logger(log_type, crawler).error("bot异常:{}", e)
- if __name__ == "__main__":
- pass
|