|
|
@@ -0,0 +1,210 @@
|
|
|
+# import json
|
|
|
+# import time
|
|
|
+# import traceback
|
|
|
+# from syslog import syslog
|
|
|
+# from typing import Optional, List
|
|
|
+
|
|
|
+# from aliyun.log import LogClient, LogItem, PutLogsRequest, GetLogsRequest
|
|
|
+# from loguru import logger
|
|
|
+# from tornado.process import task_id
|
|
|
+
|
|
|
+
|
|
|
+# from utils import get_global_config
|
|
|
+# from datetime import datetime
|
|
|
+
|
|
|
+# _config = get_global_config().log.aliyun
|
|
|
+
|
|
|
+
|
|
|
+# class AliyunLog(object):
|
|
|
+# client = LogClient(endpoint=_config.endpoint,
|
|
|
+# accessKey=_config.access_key_secret,
|
|
|
+# accessKeyId=_config.access_key_id)
|
|
|
+# project_name = 'cyber-crawler-prod'
|
|
|
+# logstore_name = 'error-log'
|
|
|
+# process_logstore_name = 'process-log'
|
|
|
+
|
|
|
+# @classmethod
|
|
|
+# def record(cls, task: CrawlerTask, stacktrace: str):
|
|
|
+# log_item = LogItem()
|
|
|
+# log_item.set_contents([
|
|
|
+# ('task_id', task.task_id),
|
|
|
+# ('plan_id', task.plan_id),
|
|
|
+# ('plan_type', str(task.plan_type.value.id)),
|
|
|
+# ('channel', str(task.channel.value.id)),
|
|
|
+# ('crawler_mode', str(task.crawler_mode.value.id)),
|
|
|
+# ('task_params', task.task_params),
|
|
|
+# ('stacktrace', stacktrace),
|
|
|
+# ])
|
|
|
+# request = PutLogsRequest(project=cls.project_name,
|
|
|
+# logstore=cls.logstore_name,
|
|
|
+# logitems=[log_item],
|
|
|
+# compress=False)
|
|
|
+# cls.client.put_logs(request)
|
|
|
+
|
|
|
+# @classmethod
|
|
|
+# def process(cls, task: CrawlerTask, process_step: str, log_type: str, message: str,
|
|
|
+# content: Optional[AiDitContent],
|
|
|
+# account: Optional[AiDitAccount],
|
|
|
+# content_portrait: Optional[List[CrawlerContentPortrait]],
|
|
|
+# account_portrait: Optional[List[CrawlerAccountPortrait]]):
|
|
|
+# """
|
|
|
+# 记录任务执行&爬取过程
|
|
|
+# process_step: crawler、skip、filter、after_filter
|
|
|
+# log_type: content、content_portrait、account_portrait
|
|
|
+# """
|
|
|
+# try:
|
|
|
+# # 序列化
|
|
|
+# # 只有在对象不为 None 时才进行序列化,否则为 None
|
|
|
+# content_str = content.model_dump_json() if content else None
|
|
|
+# account_str = account.model_dump_json() if account else None
|
|
|
+# # 序列化
|
|
|
+# if content_portrait:
|
|
|
+# # 使用列表推导式将每个对象转换为字典,然后序列化整个列表
|
|
|
+# content_portrait_str = json.dumps([item.model_dump() for item in content_portrait])
|
|
|
+# else:
|
|
|
+# content_portrait_str = None
|
|
|
+
|
|
|
+# if account_portrait:
|
|
|
+# # 使用列表推导式将每个对象转换为字典,然后序列化整个列表
|
|
|
+# account_portrait_str = json.dumps([item.model_dump() for item in account_portrait])
|
|
|
+# else:
|
|
|
+# account_portrait_str = None
|
|
|
+
|
|
|
+# log_item = LogItem()
|
|
|
+# task_id = task.task_id
|
|
|
+# plan_id = task.plan_id
|
|
|
+# plan_type = ''
|
|
|
+# if task.plan_type is not None:
|
|
|
+# plan_type = str(task.plan_type.value.id)
|
|
|
+# channel = ''
|
|
|
+# if task.channel is not None:
|
|
|
+# channel = str(task.channel.value.id)
|
|
|
+# crawler_mode = ''
|
|
|
+# if task.crawler_mode is not None:
|
|
|
+# crawler_mode = str(task.crawler_mode.value.id)
|
|
|
+# task_params = ''
|
|
|
+# if task.task_params is not None:
|
|
|
+# task_params = json.dumps(task.task_params)
|
|
|
+
|
|
|
+# log_item.set_contents([
|
|
|
+# # ('task_id', task.task_id),
|
|
|
+# # ('plan_id', task.plan_id),
|
|
|
+# # ('plan_type', str(task.plan_type.value.id)),
|
|
|
+# # ('channel', str(task.channel.value.id)),
|
|
|
+# # ('crawler_mode', str(task.crawler_mode.value.id)),
|
|
|
+# # ('task_params', task.task_params),
|
|
|
+# ('task_id', task_id),
|
|
|
+# ('plan_id', plan_id),
|
|
|
+# ('plan_type', plan_type),
|
|
|
+# ('channel', channel),
|
|
|
+# ('crawler_mode', crawler_mode),
|
|
|
+# ('task_params', task_params),
|
|
|
+# ('process_step', process_step),
|
|
|
+# ('log_type', log_type),
|
|
|
+# ('message', message),
|
|
|
+# ('content', content_str or ''),
|
|
|
+# ('account', account_str or ''),
|
|
|
+# ('content_portrait', content_portrait_str or ''),
|
|
|
+# ('account_portrait', account_portrait_str or ''),
|
|
|
+# ('timestamp', str(time.time())),
|
|
|
+# ])
|
|
|
+# request = PutLogsRequest(project=cls.project_name,
|
|
|
+# logstore=cls.process_logstore_name,
|
|
|
+# logitems=[log_item],
|
|
|
+# compress=False)
|
|
|
+# cls.client.put_logs(request)
|
|
|
+# except Exception as e:
|
|
|
+# traceback.print_exc()
|
|
|
+
|
|
|
+# @classmethod
|
|
|
+# def info(cls, path: str, channel: int, params: str, response: str, status_code: int, msg: str = '',
|
|
|
+# token: str = ''):
|
|
|
+# log_item = LogItem()
|
|
|
+# log_item.set_contents([
|
|
|
+# ('path', path),
|
|
|
+# ('channel', channel),
|
|
|
+# ('params', params),
|
|
|
+# ('response', response),
|
|
|
+# ('status_code', status_code),
|
|
|
+# ('msg', msg),
|
|
|
+# ('token', token)
|
|
|
+# ])
|
|
|
+# request = PutLogsRequest(project=cls.project_name,
|
|
|
+# logstore='request-log',
|
|
|
+# logitems=[log_item],
|
|
|
+# compress=False)
|
|
|
+# cls.client.put_logs(request)
|
|
|
+
|
|
|
+# @classmethod
|
|
|
+# def req_info(cls, channel: str, params: str, response: str, source: str, path: str = '/', status_code: int = 0,
|
|
|
+# token: str = ''):
|
|
|
+# try:
|
|
|
+# log_item = LogItem()
|
|
|
+# log_item.set_contents([
|
|
|
+# ('channel', channel),
|
|
|
+# ('params', str(params)),
|
|
|
+# ('response', str(response)),
|
|
|
+# ('path', path),
|
|
|
+# ('source', source),
|
|
|
+# ('status_code', str(status_code)),
|
|
|
+# ('token', token)
|
|
|
+# ])
|
|
|
+# request = PutLogsRequest(project=cls.project_name,
|
|
|
+# logstore='info-log',
|
|
|
+# logitems=[log_item],
|
|
|
+# compress=False)
|
|
|
+# cls.client.put_logs(request)
|
|
|
+# except Exception as e:
|
|
|
+# logger.error(f"AliyunLog.req_info error: {e}")
|
|
|
+# pass
|
|
|
+
|
|
|
+# @classmethod
|
|
|
+# def http_req_info(cls, path: str, params: str, response: str, status_code: int = 0):
|
|
|
+# log_item = LogItem()
|
|
|
+# log_item.set_contents([
|
|
|
+# ('path', path),
|
|
|
+# ('params', params),
|
|
|
+# ('response', response),
|
|
|
+# ('status_code', status_code)
|
|
|
+# ])
|
|
|
+# request = PutLogsRequest(project=cls.project_name,
|
|
|
+# logstore='info-log',
|
|
|
+# logitems=[log_item],
|
|
|
+# compress=False)
|
|
|
+# cls.client.put_logs(request)
|
|
|
+
|
|
|
+# @classmethod
|
|
|
+# def get_log(cls):
|
|
|
+# from_time = int(datetime.now().timestamp() * 1000) - 1000 * 60 * 60 * 24
|
|
|
+# to_time = int(datetime.now().timestamp() * 1000)
|
|
|
+
|
|
|
+# response = cls.client.get_logs(GetLogsRequest(project='cyber-crawler-prod',
|
|
|
+# logstore='request-log',
|
|
|
+# topic='',
|
|
|
+# fromTime=from_time,
|
|
|
+# toTime=to_time,
|
|
|
+# query='path: /crawler/moonshot/kimi and status_code :10000'))
|
|
|
+# print(response.body)
|
|
|
+# return response
|
|
|
+
|
|
|
+
|
|
|
+# class AliyunHkLog(object):
|
|
|
+# client = LogClient(endpoint='cn-hongkong.log.aliyuncs.com',
|
|
|
+# accessKey=_config.access_key_secret,
|
|
|
+# accessKeyId=_config.access_key_id)
|
|
|
+# project_name = 'cyber-crawler-prod'
|
|
|
+
|
|
|
+# @classmethod
|
|
|
+# def get_log(cls, query: str, project_name: str = 'cyber-crawler-prod', logstore_name: str = 'request-log'):
|
|
|
+# today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
|
|
|
+# from_time = int(today.timestamp()) - 24 * 60 * 60
|
|
|
+# to_time = int(today.timestamp())
|
|
|
+
|
|
|
+# response = cls.client.get_logs(GetLogsRequest(project=cls.project_name,
|
|
|
+# logstore=logstore_name,
|
|
|
+# topic='',
|
|
|
+# fromTime=from_time,
|
|
|
+# toTime=to_time,
|
|
|
+# query=query))
|
|
|
+
|
|
|
+# return response.body
|