image_describer.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. import diskcache
  2. import threading
  3. from pqai_agent import chat_service
  4. from pqai_agent.chat_service import VOLCENGINE_MODEL_DOUBAO_1_5_VISION_PRO
  5. from pqai_agent.logging_service import logger
  6. from pqai_agent.toolkit.base import BaseToolkit
  7. from pqai_agent.toolkit.function_tool import FunctionTool
  8. from pqai_agent.toolkit.tool_registry import register_toolkit
  9. # 不同实例间复用cache,但不是很好的实践
  10. _image_describer_caches = {}
  11. _cache_mutex = threading.Lock()
  12. @register_toolkit
  13. class ImageDescriber(BaseToolkit):
  14. def __init__(self, cache_dir: str = None):
  15. self.model = VOLCENGINE_MODEL_DOUBAO_1_5_VISION_PRO
  16. self.llm_client = chat_service.OpenAICompatible.create_client(self.model)
  17. if not cache_dir:
  18. cache_dir = 'image_descriptions_cache'
  19. if cache_dir not in _image_describer_caches:
  20. with _cache_mutex:
  21. _image_describer_caches[cache_dir] = diskcache.Cache(cache_dir, size_limit=100*1024*1024)
  22. self.cache = _image_describer_caches[cache_dir]
  23. super().__init__()
  24. def analyse_image(self, image_url: str):
  25. """Takes an image URL as input and returns a detailed description of the image.
  26. Args:
  27. image_url (str): The URL of the image to be described.
  28. Returns:
  29. str: A detailed description of the image.
  30. """
  31. if image_url in self.cache:
  32. logger.debug(f"Cache hit for image URL: {image_url}")
  33. return self.cache[image_url]
  34. system_prompt = "你是一位图像分析专家。请提供输入图像的详细描述,包括图像中的文本内容(如果存在)"
  35. messages = [
  36. {'role': 'system', 'content': system_prompt},
  37. {'role': 'user', 'content': [
  38. {
  39. 'type': 'image_url',
  40. 'image_url': image_url
  41. }
  42. ]}
  43. ]
  44. response = self.llm_client.chat.completions.create(messages=messages, model=self.model)
  45. response_content = response.choices[0].message.content
  46. logger.debug(f"ImageDescriber response: {response_content}")
  47. self.cache[image_url] = response_content
  48. return response_content
  49. def get_tools(self):
  50. return [FunctionTool(self.analyse_image)]