automation_provide_job_monitor.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. import datetime
  2. from aliyun.log import LogClient
  3. from aliyun.log.auth import AUTH_VERSION_4
  4. from util import feishu_inform_util
  5. endpoint = "cn-hangzhou.log.aliyuncs.com"
  6. access_key = "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P"
  7. access_key_id = "LTAIWYUujJAm7CbH"
  8. project = "crawler-scheduler"
  9. log_store = "aigc-provider"
  10. query_sql = "* | select crawlerMode, result, if(reason='null', '成功', reason) as reason, count(distinct videoId) as videoIdCnt, count(distinct crawlerPlanId) as crawlerPlanIdCnt from log where reason not in ('该账号已经存在爬取计划,跳过执行', '该视频近期已经处理过', '该Topic已经创建过爬取计划', '该关键词已经创建过爬取计划') group by crawlerMode, result, reason order by crawlerMode, result desc, reason"
  11. client = LogClient(endpoint=endpoint, accessKey=access_key, accessKeyId=access_key_id, auth_version=AUTH_VERSION_4, region='cn-hangzhou')
  12. webhook = 'https://open.feishu.cn/open-apis/bot/v2/hook/9f5c5cce-5eb2-4731-b368-33926f5549f9'
  13. card_json = {
  14. "schema": "2.0",
  15. "header": {
  16. "title": {
  17. "tag": "plain_text",
  18. "content": "【自动化供给】日任务执行情况监控"
  19. },
  20. "template": "blue"
  21. },
  22. "body": {
  23. "elements": []
  24. }
  25. }
  26. def gen_collapsible_panel_json(title, content, is_parent: bool = True) -> dict:
  27. return {
  28. "tag": "collapsible_panel",
  29. "expanded": False,
  30. "header": {
  31. "title": {
  32. "tag": "plain_text",
  33. "content": title
  34. },
  35. "vertical_align": "center",
  36. },
  37. "border": {
  38. "color": "grey",
  39. "corner_radius": "5px"
  40. },
  41. "elements": [
  42. {
  43. "tag": "markdown",
  44. "content": content
  45. }
  46. ]
  47. }
  48. def main():
  49. # 获取当前日期
  50. today = datetime.datetime.now()
  51. # 当天开始时间(00:00:00)
  52. start_of_day = datetime.datetime.combine(today.date(), datetime.time.min)
  53. # 当天结束时间(23:59:59.999999)
  54. end_of_day = datetime.datetime.combine(today.date(), datetime.time.max)
  55. # 转换为时间戳(秒级)
  56. start_timestamp = int(start_of_day.timestamp())
  57. end_timestamp = int(end_of_day.timestamp())
  58. resp = client.get_log(project=project, logstore=log_store, from_time=start_timestamp, to_time=end_timestamp, query=query_sql)
  59. log_data = resp.get_body().get('data')
  60. all_crawler_mode = []
  61. for datum in log_data:
  62. if datum.get('crawlerMode') not in all_crawler_mode:
  63. all_crawler_mode.append(datum.get('crawlerMode'))
  64. collapsible_limit = 5
  65. crawler_mode_group = [all_crawler_mode[i:i + collapsible_limit] for i in range(0, len(all_crawler_mode), collapsible_limit)]
  66. for crawler_mode_list in crawler_mode_group:
  67. elements = []
  68. for crawler_mode in crawler_mode_list:
  69. content = "| reason | videoIdCnt | crawlerPlanIdCnt |\n"
  70. content += "| --- | --- | --- |\n"
  71. for datum in resp.get_body().get('data'):
  72. if crawler_mode != datum.get('crawlerMode'):
  73. continue
  74. reason = datum.get('reason')
  75. video_id_cnt = datum.get('videoIdCnt')
  76. crawler_plan_id_cnt = datum.get('crawlerPlanIdCnt')
  77. content += f"| {reason} | {video_id_cnt} | {crawler_plan_id_cnt} |\n"
  78. elements.append(gen_collapsible_panel_json(crawler_mode, content))
  79. card_json["body"]["elements"] = elements
  80. feishu_inform_util.send_card_msg_to_feishu(webhook, card_json)
  81. if __name__ == "__main__":
  82. main()