sls.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. # import json
  2. # import time
  3. # import traceback
  4. # from syslog import syslog
  5. # from typing import Optional, List
  6. # from aliyun.log import LogClient, LogItem, PutLogsRequest, GetLogsRequest
  7. # from loguru import logger
  8. # from tornado.process import task_id
  9. # from utils import get_global_config
  10. # from datetime import datetime
  11. # _config = get_global_config().log.aliyun
  12. # class AliyunLog(object):
  13. # client = LogClient(endpoint=_config.endpoint,
  14. # accessKey=_config.access_key_secret,
  15. # accessKeyId=_config.access_key_id)
  16. # project_name = 'cyber-crawler-prod'
  17. # logstore_name = 'error-log'
  18. # process_logstore_name = 'process-log'
  19. # @classmethod
  20. # def record(cls, task: CrawlerTask, stacktrace: str):
  21. # log_item = LogItem()
  22. # log_item.set_contents([
  23. # ('task_id', task.task_id),
  24. # ('plan_id', task.plan_id),
  25. # ('plan_type', str(task.plan_type.value.id)),
  26. # ('channel', str(task.channel.value.id)),
  27. # ('crawler_mode', str(task.crawler_mode.value.id)),
  28. # ('task_params', task.task_params),
  29. # ('stacktrace', stacktrace),
  30. # ])
  31. # request = PutLogsRequest(project=cls.project_name,
  32. # logstore=cls.logstore_name,
  33. # logitems=[log_item],
  34. # compress=False)
  35. # cls.client.put_logs(request)
  36. # @classmethod
  37. # def process(cls, task: CrawlerTask, process_step: str, log_type: str, message: str,
  38. # content: Optional[AiDitContent],
  39. # account: Optional[AiDitAccount],
  40. # content_portrait: Optional[List[CrawlerContentPortrait]],
  41. # account_portrait: Optional[List[CrawlerAccountPortrait]]):
  42. # """
  43. # 记录任务执行&爬取过程
  44. # process_step: crawler、skip、filter、after_filter
  45. # log_type: content、content_portrait、account_portrait
  46. # """
  47. # try:
  48. # # 序列化
  49. # # 只有在对象不为 None 时才进行序列化,否则为 None
  50. # content_str = content.model_dump_json() if content else None
  51. # account_str = account.model_dump_json() if account else None
  52. # # 序列化
  53. # if content_portrait:
  54. # # 使用列表推导式将每个对象转换为字典,然后序列化整个列表
  55. # content_portrait_str = json.dumps([item.model_dump() for item in content_portrait])
  56. # else:
  57. # content_portrait_str = None
  58. # if account_portrait:
  59. # # 使用列表推导式将每个对象转换为字典,然后序列化整个列表
  60. # account_portrait_str = json.dumps([item.model_dump() for item in account_portrait])
  61. # else:
  62. # account_portrait_str = None
  63. # log_item = LogItem()
  64. # task_id = task.task_id
  65. # plan_id = task.plan_id
  66. # plan_type = ''
  67. # if task.plan_type is not None:
  68. # plan_type = str(task.plan_type.value.id)
  69. # channel = ''
  70. # if task.channel is not None:
  71. # channel = str(task.channel.value.id)
  72. # crawler_mode = ''
  73. # if task.crawler_mode is not None:
  74. # crawler_mode = str(task.crawler_mode.value.id)
  75. # task_params = ''
  76. # if task.task_params is not None:
  77. # task_params = json.dumps(task.task_params)
  78. # log_item.set_contents([
  79. # # ('task_id', task.task_id),
  80. # # ('plan_id', task.plan_id),
  81. # # ('plan_type', str(task.plan_type.value.id)),
  82. # # ('channel', str(task.channel.value.id)),
  83. # # ('crawler_mode', str(task.crawler_mode.value.id)),
  84. # # ('task_params', task.task_params),
  85. # ('task_id', task_id),
  86. # ('plan_id', plan_id),
  87. # ('plan_type', plan_type),
  88. # ('channel', channel),
  89. # ('crawler_mode', crawler_mode),
  90. # ('task_params', task_params),
  91. # ('process_step', process_step),
  92. # ('log_type', log_type),
  93. # ('message', message),
  94. # ('content', content_str or ''),
  95. # ('account', account_str or ''),
  96. # ('content_portrait', content_portrait_str or ''),
  97. # ('account_portrait', account_portrait_str or ''),
  98. # ('timestamp', str(time.time())),
  99. # ])
  100. # request = PutLogsRequest(project=cls.project_name,
  101. # logstore=cls.process_logstore_name,
  102. # logitems=[log_item],
  103. # compress=False)
  104. # cls.client.put_logs(request)
  105. # except Exception as e:
  106. # traceback.print_exc()
  107. # @classmethod
  108. # def info(cls, path: str, channel: int, params: str, response: str, status_code: int, msg: str = '',
  109. # token: str = ''):
  110. # log_item = LogItem()
  111. # log_item.set_contents([
  112. # ('path', path),
  113. # ('channel', channel),
  114. # ('params', params),
  115. # ('response', response),
  116. # ('status_code', status_code),
  117. # ('msg', msg),
  118. # ('token', token)
  119. # ])
  120. # request = PutLogsRequest(project=cls.project_name,
  121. # logstore='request-log',
  122. # logitems=[log_item],
  123. # compress=False)
  124. # cls.client.put_logs(request)
  125. # @classmethod
  126. # def req_info(cls, channel: str, params: str, response: str, source: str, path: str = '/', status_code: int = 0,
  127. # token: str = ''):
  128. # try:
  129. # log_item = LogItem()
  130. # log_item.set_contents([
  131. # ('channel', channel),
  132. # ('params', str(params)),
  133. # ('response', str(response)),
  134. # ('path', path),
  135. # ('source', source),
  136. # ('status_code', str(status_code)),
  137. # ('token', token)
  138. # ])
  139. # request = PutLogsRequest(project=cls.project_name,
  140. # logstore='info-log',
  141. # logitems=[log_item],
  142. # compress=False)
  143. # cls.client.put_logs(request)
  144. # except Exception as e:
  145. # logger.error(f"AliyunLog.req_info error: {e}")
  146. # pass
  147. # @classmethod
  148. # def http_req_info(cls, path: str, params: str, response: str, status_code: int = 0):
  149. # log_item = LogItem()
  150. # log_item.set_contents([
  151. # ('path', path),
  152. # ('params', params),
  153. # ('response', response),
  154. # ('status_code', status_code)
  155. # ])
  156. # request = PutLogsRequest(project=cls.project_name,
  157. # logstore='info-log',
  158. # logitems=[log_item],
  159. # compress=False)
  160. # cls.client.put_logs(request)
  161. # @classmethod
  162. # def get_log(cls):
  163. # from_time = int(datetime.now().timestamp() * 1000) - 1000 * 60 * 60 * 24
  164. # to_time = int(datetime.now().timestamp() * 1000)
  165. # response = cls.client.get_logs(GetLogsRequest(project='cyber-crawler-prod',
  166. # logstore='request-log',
  167. # topic='',
  168. # fromTime=from_time,
  169. # toTime=to_time,
  170. # query='path: /crawler/moonshot/kimi and status_code :10000'))
  171. # print(response.body)
  172. # return response
  173. # class AliyunHkLog(object):
  174. # client = LogClient(endpoint='cn-hongkong.log.aliyuncs.com',
  175. # accessKey=_config.access_key_secret,
  176. # accessKeyId=_config.access_key_id)
  177. # project_name = 'cyber-crawler-prod'
  178. # @classmethod
  179. # def get_log(cls, query: str, project_name: str = 'cyber-crawler-prod', logstore_name: str = 'request-log'):
  180. # today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
  181. # from_time = int(today.timestamp()) - 24 * 60 * 60
  182. # to_time = int(today.timestamp())
  183. # response = cls.client.get_logs(GetLogsRequest(project=cls.project_name,
  184. # logstore=logstore_name,
  185. # topic='',
  186. # fromTime=from_time,
  187. # toTime=to_time,
  188. # query=query))
  189. # return response.body