| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210 |
- # import json
- # import time
- # import traceback
- # from syslog import syslog
- # from typing import Optional, List
- # from aliyun.log import LogClient, LogItem, PutLogsRequest, GetLogsRequest
- # from loguru import logger
- # from tornado.process import task_id
- # from utils import get_global_config
- # from datetime import datetime
- # _config = get_global_config().log.aliyun
- # class AliyunLog(object):
- # client = LogClient(endpoint=_config.endpoint,
- # accessKey=_config.access_key_secret,
- # accessKeyId=_config.access_key_id)
- # project_name = 'cyber-crawler-prod'
- # logstore_name = 'error-log'
- # process_logstore_name = 'process-log'
- # @classmethod
- # def record(cls, task: CrawlerTask, stacktrace: str):
- # log_item = LogItem()
- # log_item.set_contents([
- # ('task_id', task.task_id),
- # ('plan_id', task.plan_id),
- # ('plan_type', str(task.plan_type.value.id)),
- # ('channel', str(task.channel.value.id)),
- # ('crawler_mode', str(task.crawler_mode.value.id)),
- # ('task_params', task.task_params),
- # ('stacktrace', stacktrace),
- # ])
- # request = PutLogsRequest(project=cls.project_name,
- # logstore=cls.logstore_name,
- # logitems=[log_item],
- # compress=False)
- # cls.client.put_logs(request)
- # @classmethod
- # def process(cls, task: CrawlerTask, process_step: str, log_type: str, message: str,
- # content: Optional[AiDitContent],
- # account: Optional[AiDitAccount],
- # content_portrait: Optional[List[CrawlerContentPortrait]],
- # account_portrait: Optional[List[CrawlerAccountPortrait]]):
- # """
- # 记录任务执行&爬取过程
- # process_step: crawler、skip、filter、after_filter
- # log_type: content、content_portrait、account_portrait
- # """
- # try:
- # # 序列化
- # # 只有在对象不为 None 时才进行序列化,否则为 None
- # content_str = content.model_dump_json() if content else None
- # account_str = account.model_dump_json() if account else None
- # # 序列化
- # if content_portrait:
- # # 使用列表推导式将每个对象转换为字典,然后序列化整个列表
- # content_portrait_str = json.dumps([item.model_dump() for item in content_portrait])
- # else:
- # content_portrait_str = None
- # if account_portrait:
- # # 使用列表推导式将每个对象转换为字典,然后序列化整个列表
- # account_portrait_str = json.dumps([item.model_dump() for item in account_portrait])
- # else:
- # account_portrait_str = None
- # log_item = LogItem()
- # task_id = task.task_id
- # plan_id = task.plan_id
- # plan_type = ''
- # if task.plan_type is not None:
- # plan_type = str(task.plan_type.value.id)
- # channel = ''
- # if task.channel is not None:
- # channel = str(task.channel.value.id)
- # crawler_mode = ''
- # if task.crawler_mode is not None:
- # crawler_mode = str(task.crawler_mode.value.id)
- # task_params = ''
- # if task.task_params is not None:
- # task_params = json.dumps(task.task_params)
- # log_item.set_contents([
- # # ('task_id', task.task_id),
- # # ('plan_id', task.plan_id),
- # # ('plan_type', str(task.plan_type.value.id)),
- # # ('channel', str(task.channel.value.id)),
- # # ('crawler_mode', str(task.crawler_mode.value.id)),
- # # ('task_params', task.task_params),
- # ('task_id', task_id),
- # ('plan_id', plan_id),
- # ('plan_type', plan_type),
- # ('channel', channel),
- # ('crawler_mode', crawler_mode),
- # ('task_params', task_params),
- # ('process_step', process_step),
- # ('log_type', log_type),
- # ('message', message),
- # ('content', content_str or ''),
- # ('account', account_str or ''),
- # ('content_portrait', content_portrait_str or ''),
- # ('account_portrait', account_portrait_str or ''),
- # ('timestamp', str(time.time())),
- # ])
- # request = PutLogsRequest(project=cls.project_name,
- # logstore=cls.process_logstore_name,
- # logitems=[log_item],
- # compress=False)
- # cls.client.put_logs(request)
- # except Exception as e:
- # traceback.print_exc()
- # @classmethod
- # def info(cls, path: str, channel: int, params: str, response: str, status_code: int, msg: str = '',
- # token: str = ''):
- # log_item = LogItem()
- # log_item.set_contents([
- # ('path', path),
- # ('channel', channel),
- # ('params', params),
- # ('response', response),
- # ('status_code', status_code),
- # ('msg', msg),
- # ('token', token)
- # ])
- # request = PutLogsRequest(project=cls.project_name,
- # logstore='request-log',
- # logitems=[log_item],
- # compress=False)
- # cls.client.put_logs(request)
- # @classmethod
- # def req_info(cls, channel: str, params: str, response: str, source: str, path: str = '/', status_code: int = 0,
- # token: str = ''):
- # try:
- # log_item = LogItem()
- # log_item.set_contents([
- # ('channel', channel),
- # ('params', str(params)),
- # ('response', str(response)),
- # ('path', path),
- # ('source', source),
- # ('status_code', str(status_code)),
- # ('token', token)
- # ])
- # request = PutLogsRequest(project=cls.project_name,
- # logstore='info-log',
- # logitems=[log_item],
- # compress=False)
- # cls.client.put_logs(request)
- # except Exception as e:
- # logger.error(f"AliyunLog.req_info error: {e}")
- # pass
- # @classmethod
- # def http_req_info(cls, path: str, params: str, response: str, status_code: int = 0):
- # log_item = LogItem()
- # log_item.set_contents([
- # ('path', path),
- # ('params', params),
- # ('response', response),
- # ('status_code', status_code)
- # ])
- # request = PutLogsRequest(project=cls.project_name,
- # logstore='info-log',
- # logitems=[log_item],
- # compress=False)
- # cls.client.put_logs(request)
- # @classmethod
- # def get_log(cls):
- # from_time = int(datetime.now().timestamp() * 1000) - 1000 * 60 * 60 * 24
- # to_time = int(datetime.now().timestamp() * 1000)
- # response = cls.client.get_logs(GetLogsRequest(project='cyber-crawler-prod',
- # logstore='request-log',
- # topic='',
- # fromTime=from_time,
- # toTime=to_time,
- # query='path: /crawler/moonshot/kimi and status_code :10000'))
- # print(response.body)
- # return response
- # class AliyunHkLog(object):
- # client = LogClient(endpoint='cn-hongkong.log.aliyuncs.com',
- # accessKey=_config.access_key_secret,
- # accessKeyId=_config.access_key_id)
- # project_name = 'cyber-crawler-prod'
- # @classmethod
- # def get_log(cls, query: str, project_name: str = 'cyber-crawler-prod', logstore_name: str = 'request-log'):
- # today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
- # from_time = int(today.timestamp()) - 24 * 60 * 60
- # to_time = int(today.timestamp())
- # response = cls.client.get_logs(GetLogsRequest(project=cls.project_name,
- # logstore=logstore_name,
- # topic='',
- # fromTime=from_time,
- # toTime=to_time,
- # query=query))
- # return response.body
|