|
|
@@ -0,0 +1,110 @@
|
|
|
+import { describe, it, expect } from "vitest";
|
|
|
+import { extractImagesFromResult, extractImagesFromMessage } from "../imageExtraction";
|
|
|
+import type { Message } from "../../types/message";
|
|
|
+
|
|
|
+describe("extractImagesFromResult", () => {
|
|
|
+ it("should return empty array for null/undefined", () => {
|
|
|
+ expect(extractImagesFromResult(null)).toEqual([]);
|
|
|
+ expect(extractImagesFromResult(undefined)).toEqual([]);
|
|
|
+ });
|
|
|
+
|
|
|
+ it("should extract images from OpenAI-like MsgResult array", () => {
|
|
|
+ const input = [
|
|
|
+ { type: "text", text: "hello" },
|
|
|
+ { type: "image_url", image_url: { url: "http://example.com/1.png" } },
|
|
|
+ ];
|
|
|
+ const result = extractImagesFromResult(input);
|
|
|
+ expect(result).toHaveLength(1);
|
|
|
+ expect(result[0].url).toBe("http://example.com/1.png");
|
|
|
+ });
|
|
|
+
|
|
|
+ it('should extract images from custom type="image" with base64', () => {
|
|
|
+ const input = [
|
|
|
+ {
|
|
|
+ type: "image",
|
|
|
+ source: {
|
|
|
+ media_type: "image/jpeg",
|
|
|
+ data: "base64data",
|
|
|
+ },
|
|
|
+ },
|
|
|
+ ];
|
|
|
+ const result = extractImagesFromResult(input);
|
|
|
+ expect(result).toHaveLength(1);
|
|
|
+ expect(result[0].url).toBe("data:image/jpeg;base64,base64data");
|
|
|
+ expect(result[0].alt).toBe("Base64 Image");
|
|
|
+ });
|
|
|
+
|
|
|
+ it("should extract markdown images from string result", () => {
|
|
|
+ const input =
|
|
|
+ "Here is an image:  and another ";
|
|
|
+ const result = extractImagesFromResult(input);
|
|
|
+ expect(result).toHaveLength(2);
|
|
|
+ expect(result[0].url).toBe("http://example.com/2.png");
|
|
|
+ expect(result[0].alt).toBe("alt text");
|
|
|
+ expect(result[1].url).toBe("http://example.com/3.png");
|
|
|
+ expect(result[1].alt).toBe("img2");
|
|
|
+ });
|
|
|
+
|
|
|
+ it("should extract HTML img tags from string result", () => {
|
|
|
+ const input = 'Some text <img src="http://example.com/4.png" alt="html img" /> end';
|
|
|
+ const result = extractImagesFromResult(input);
|
|
|
+ expect(result).toHaveLength(1);
|
|
|
+ expect(result[0].url).toBe("http://example.com/4.png");
|
|
|
+ });
|
|
|
+
|
|
|
+ it("should extract images from JSON-embedded string (read_file output)", () => {
|
|
|
+ const input = `
|
|
|
+ # 01_file.json
|
|
|
+ <file>
|
|
|
+ 1| {
|
|
|
+ 2| "image_url": "http://res.cybertogether.net/crawler/image/test.jpeg",
|
|
|
+ 3| "other": "value"
|
|
|
+ 4| }
|
|
|
+ `;
|
|
|
+ const result = extractImagesFromResult(input);
|
|
|
+ expect(result).toHaveLength(1);
|
|
|
+ expect(result[0].url).toBe("http://res.cybertogether.net/crawler/image/test.jpeg");
|
|
|
+ });
|
|
|
+
|
|
|
+ it("should extract images from complex escaped JSON string (user case)", () => {
|
|
|
+ // This simulates the string exactly as provided by the user, where the "result" is a string containing file content
|
|
|
+ // Note: The user provided example shows "image_url": " http://..."
|
|
|
+ // We need to be careful about matching the quote and spaces.
|
|
|
+ const input = `# 01_图片分段_07_g3_人物与玫瑰花.json
|
|
|
+
|
|
|
+<file>
|
|
|
+ 1| {
|
|
|
+ 2| "image_url": "http://res.cybertogether.net/crawler/image/e70bbea964cfcf0225744da00e8e7939.jpeg",
|
|
|
+ 3| "sections": [
|
|
|
+ 4| {
|
|
|
+ 5| "名称": "人物与玫瑰花",
|
|
|
+`;
|
|
|
+ const result = extractImagesFromResult(input);
|
|
|
+ expect(result).toHaveLength(1);
|
|
|
+ expect(result[0].url).toBe("http://res.cybertogether.net/crawler/image/e70bbea964cfcf0225744da00e8e7939.jpeg");
|
|
|
+ });
|
|
|
+});
|
|
|
+
|
|
|
+describe("extractImagesFromMessage", () => {
|
|
|
+ it("should extract from message with string content", () => {
|
|
|
+ const msg: Message = {
|
|
|
+ content: "",
|
|
|
+ };
|
|
|
+ const result = extractImagesFromMessage(msg);
|
|
|
+ expect(result).toHaveLength(1);
|
|
|
+ expect(result[0].url).toBe("http://example.com/test.png");
|
|
|
+ });
|
|
|
+
|
|
|
+ it("should extract from message with MessageContent object", () => {
|
|
|
+ const msg: Message = {
|
|
|
+ content: {
|
|
|
+ result: [{ image_url: { url: "http://example.com/obj.png" } }],
|
|
|
+ },
|
|
|
+ };
|
|
|
+ // Need to cast to any because Message type definition might be strict about content structure
|
|
|
+ // but at runtime this is what we expect
|
|
|
+ const result = extractImagesFromMessage(msg);
|
|
|
+ expect(result).toHaveLength(1);
|
|
|
+ expect(result[0].url).toBe("http://example.com/obj.png");
|
|
|
+ });
|
|
|
+});
|