# import json # import time # import traceback # from syslog import syslog # from typing import Optional, List # from aliyun.log import LogClient, LogItem, PutLogsRequest, GetLogsRequest # from loguru import logger # from tornado.process import task_id # from utils import get_global_config # from datetime import datetime # _config = get_global_config().log.aliyun # class AliyunLog(object): # client = LogClient(endpoint=_config.endpoint, # accessKey=_config.access_key_secret, # accessKeyId=_config.access_key_id) # project_name = 'cyber-crawler-prod' # logstore_name = 'error-log' # process_logstore_name = 'process-log' # @classmethod # def record(cls, task: CrawlerTask, stacktrace: str): # log_item = LogItem() # log_item.set_contents([ # ('task_id', task.task_id), # ('plan_id', task.plan_id), # ('plan_type', str(task.plan_type.value.id)), # ('channel', str(task.channel.value.id)), # ('crawler_mode', str(task.crawler_mode.value.id)), # ('task_params', task.task_params), # ('stacktrace', stacktrace), # ]) # request = PutLogsRequest(project=cls.project_name, # logstore=cls.logstore_name, # logitems=[log_item], # compress=False) # cls.client.put_logs(request) # @classmethod # def process(cls, task: CrawlerTask, process_step: str, log_type: str, message: str, # content: Optional[AiDitContent], # account: Optional[AiDitAccount], # content_portrait: Optional[List[CrawlerContentPortrait]], # account_portrait: Optional[List[CrawlerAccountPortrait]]): # """ # 记录任务执行&爬取过程 # process_step: crawler、skip、filter、after_filter # log_type: content、content_portrait、account_portrait # """ # try: # # 序列化 # # 只有在对象不为 None 时才进行序列化,否则为 None # content_str = content.model_dump_json() if content else None # account_str = account.model_dump_json() if account else None # # 序列化 # if content_portrait: # # 使用列表推导式将每个对象转换为字典,然后序列化整个列表 # content_portrait_str = json.dumps([item.model_dump() for item in content_portrait]) # else: # content_portrait_str = None # if account_portrait: # # 使用列表推导式将每个对象转换为字典,然后序列化整个列表 # account_portrait_str = json.dumps([item.model_dump() for item in account_portrait]) # else: # account_portrait_str = None # log_item = LogItem() # task_id = task.task_id # plan_id = task.plan_id # plan_type = '' # if task.plan_type is not None: # plan_type = str(task.plan_type.value.id) # channel = '' # if task.channel is not None: # channel = str(task.channel.value.id) # crawler_mode = '' # if task.crawler_mode is not None: # crawler_mode = str(task.crawler_mode.value.id) # task_params = '' # if task.task_params is not None: # task_params = json.dumps(task.task_params) # log_item.set_contents([ # # ('task_id', task.task_id), # # ('plan_id', task.plan_id), # # ('plan_type', str(task.plan_type.value.id)), # # ('channel', str(task.channel.value.id)), # # ('crawler_mode', str(task.crawler_mode.value.id)), # # ('task_params', task.task_params), # ('task_id', task_id), # ('plan_id', plan_id), # ('plan_type', plan_type), # ('channel', channel), # ('crawler_mode', crawler_mode), # ('task_params', task_params), # ('process_step', process_step), # ('log_type', log_type), # ('message', message), # ('content', content_str or ''), # ('account', account_str or ''), # ('content_portrait', content_portrait_str or ''), # ('account_portrait', account_portrait_str or ''), # ('timestamp', str(time.time())), # ]) # request = PutLogsRequest(project=cls.project_name, # logstore=cls.process_logstore_name, # logitems=[log_item], # compress=False) # cls.client.put_logs(request) # except Exception as e: # traceback.print_exc() # @classmethod # def info(cls, path: str, channel: int, params: str, response: str, status_code: int, msg: str = '', # token: str = ''): # log_item = LogItem() # log_item.set_contents([ # ('path', path), # ('channel', channel), # ('params', params), # ('response', response), # ('status_code', status_code), # ('msg', msg), # ('token', token) # ]) # request = PutLogsRequest(project=cls.project_name, # logstore='request-log', # logitems=[log_item], # compress=False) # cls.client.put_logs(request) # @classmethod # def req_info(cls, channel: str, params: str, response: str, source: str, path: str = '/', status_code: int = 0, # token: str = ''): # try: # log_item = LogItem() # log_item.set_contents([ # ('channel', channel), # ('params', str(params)), # ('response', str(response)), # ('path', path), # ('source', source), # ('status_code', str(status_code)), # ('token', token) # ]) # request = PutLogsRequest(project=cls.project_name, # logstore='info-log', # logitems=[log_item], # compress=False) # cls.client.put_logs(request) # except Exception as e: # logger.error(f"AliyunLog.req_info error: {e}") # pass # @classmethod # def http_req_info(cls, path: str, params: str, response: str, status_code: int = 0): # log_item = LogItem() # log_item.set_contents([ # ('path', path), # ('params', params), # ('response', response), # ('status_code', status_code) # ]) # request = PutLogsRequest(project=cls.project_name, # logstore='info-log', # logitems=[log_item], # compress=False) # cls.client.put_logs(request) # @classmethod # def get_log(cls): # from_time = int(datetime.now().timestamp() * 1000) - 1000 * 60 * 60 * 24 # to_time = int(datetime.now().timestamp() * 1000) # response = cls.client.get_logs(GetLogsRequest(project='cyber-crawler-prod', # logstore='request-log', # topic='', # fromTime=from_time, # toTime=to_time, # query='path: /crawler/moonshot/kimi and status_code :10000')) # print(response.body) # return response # class AliyunHkLog(object): # client = LogClient(endpoint='cn-hongkong.log.aliyuncs.com', # accessKey=_config.access_key_secret, # accessKeyId=_config.access_key_id) # project_name = 'cyber-crawler-prod' # @classmethod # def get_log(cls, query: str, project_name: str = 'cyber-crawler-prod', logstore_name: str = 'request-log'): # today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) # from_time = int(today.timestamp()) - 24 * 60 * 60 # to_time = int(today.timestamp()) # response = cls.client.get_logs(GetLogsRequest(project=cls.project_name, # logstore=logstore_name, # topic='', # fromTime=from_time, # toTime=to_time, # query=query)) # return response.body