imageExtraction.ts 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. import type { Message } from "../types/message";
  2. export interface ExtractedImage {
  3. url: string;
  4. alt?: string;
  5. }
  6. /**
  7. * Extracts images from a message's content or result.
  8. * Handles both JSON array format (MsgResult[]) and Rich Text (Markdown/HTML).
  9. *
  10. * @param result - The content or result field from a message
  11. * @returns Array of extracted images
  12. */
  13. export const extractImagesFromResult = (result: unknown): ExtractedImage[] => {
  14. const images: ExtractedImage[] = [];
  15. if (!result) return images;
  16. // Case 0: result IS the message content which might be an array directly
  17. if (Array.isArray(result)) {
  18. result.forEach((item) => {
  19. if (typeof item === "object" && item !== null) {
  20. // eslint-disable-next-line @typescript-eslint/no-explicit-any
  21. const msgItem = item as any;
  22. // 1. Check for standard OpenAI-like image_url
  23. if (msgItem.image_url && typeof msgItem.image_url === "object" && msgItem.image_url.url) {
  24. images.push({
  25. url: msgItem.image_url.url,
  26. alt: "Attached Image",
  27. });
  28. }
  29. // 2. Check for type="image" with source
  30. if (msgItem.type === "image" && msgItem.source && typeof msgItem.source === "object") {
  31. const source = msgItem.source;
  32. if (source.data) {
  33. const mimeType = source.media_type || "image/png";
  34. images.push({
  35. url: `data:${mimeType};base64,${source.data}`,
  36. alt: "Base64 Image",
  37. });
  38. } else if (source.url) {
  39. images.push({
  40. url: source.url,
  41. alt: "Image URL",
  42. });
  43. }
  44. }
  45. }
  46. });
  47. }
  48. // Case 2: result is a string (Rich Text / Markdown)
  49. if (typeof result === "string") {
  50. // 1. Match Markdown images: ![alt](url)
  51. const markdownRegex = /!\[([^\]]*)\]\(([^)]+)\)/g;
  52. let match;
  53. while ((match = markdownRegex.exec(result)) !== null) {
  54. images.push({
  55. alt: match[1] || "Markdown Image",
  56. url: match[2],
  57. });
  58. }
  59. // 2. Match HTML img tags: <img ... src="..." ...>
  60. const htmlRegex = /<img\s+[^>]*src=["']([^"']+)["'][^>]*>/g;
  61. while ((match = htmlRegex.exec(result)) !== null) {
  62. images.push({
  63. alt: "HTML Image",
  64. url: match[1],
  65. });
  66. }
  67. // 3. Match JSON "image_url": "..." patterns embedded in text
  68. // Matches "image_url"\s*:\s*"([^"]+)"
  69. const jsonRegex = /"image_url"\s*:\s*"([^"]+)"/g;
  70. while ((match = jsonRegex.exec(result)) !== null) {
  71. // Basic filtering to avoid matching non-URL strings if the key is reused
  72. if (match[1].startsWith("http") || match[1].startsWith("data:")) {
  73. images.push({
  74. alt: "JSON Image",
  75. url: match[1],
  76. });
  77. }
  78. }
  79. }
  80. return images;
  81. };
  82. /**
  83. * Helper to extract images from a Message object
  84. */
  85. export const extractImagesFromMessage = (message: Message): ExtractedImage[] => {
  86. if (!message.content) return [];
  87. // If content is a string, treat it as result
  88. if (typeof message.content === "string") {
  89. return extractImagesFromResult(message.content);
  90. }
  91. // If content is an object (MessageContent)
  92. if (typeof message.content === "object") {
  93. // Check 'result' field
  94. if ("result" in message.content && message.content.result) {
  95. return extractImagesFromResult(message.content.result);
  96. }
  97. // Also check if content itself is an array (e.g. standard MessageContent array)
  98. if (Array.isArray(message.content)) {
  99. return extractImagesFromResult(message.content);
  100. }
  101. }
  102. return [];
  103. };