|
|
@@ -2534,8 +2534,8 @@ class AgentRunner:
|
|
|
# 统计优化情况
|
|
|
stats = {"kept": 0, "downscaled": 0, "described": 0, "cache_hit": 0}
|
|
|
|
|
|
- # 收集需要降分辨率的图片(用于并发处理)
|
|
|
- downscale_jobs = [] # [(msg_idx, block_idx, image_url, cache_key)]
|
|
|
+ # 收集需要降分辨率或尺寸补齐的图片(用于并发处理)
|
|
|
+ process_jobs = [] # [(msg_idx, block_idx, image_url, cache_key, max_size, cache_field)]
|
|
|
|
|
|
# 第一遍:扫描并收集需要处理的图片
|
|
|
for i in range(last_assistant_idx):
|
|
|
@@ -2559,21 +2559,24 @@ class AgentRunner:
|
|
|
else:
|
|
|
cache_key = hashlib.md5(image_url.encode()).hexdigest()
|
|
|
|
|
|
- # 3-5 轮需要降分辨率
|
|
|
- if 2 < rounds_ago <= 5:
|
|
|
+ # 1-5 轮都需要检查尺寸
|
|
|
+ if rounds_ago <= 5:
|
|
|
cached = self._image_opt_cache.get(cache_key, {})
|
|
|
- if "downscaled" not in cached and image_url.startswith("data:"):
|
|
|
- downscale_jobs.append((i, block_idx, image_url, cache_key))
|
|
|
-
|
|
|
- # 并发处理所有降分辨率任务
|
|
|
- if downscale_jobs:
|
|
|
- downscale_results = await asyncio.gather(
|
|
|
- *[self._downscale_image(url) for _, _, url, _ in downscale_jobs],
|
|
|
+ cache_field = "pad_only" if rounds_ago <= 2 else "downscaled"
|
|
|
+
|
|
|
+ if cache_field not in cached and image_url.startswith("data:"):
|
|
|
+ max_size = None if rounds_ago <= 2 else 512
|
|
|
+ process_jobs.append((i, block_idx, image_url, cache_key, max_size, cache_field))
|
|
|
+
|
|
|
+ # 并发处理所有尺寸任务
|
|
|
+ if process_jobs:
|
|
|
+ process_results = await asyncio.gather(
|
|
|
+ *[self._process_image_size(url, max_size=ms) for _, _, url, _, ms, _ in process_jobs],
|
|
|
return_exceptions=True
|
|
|
)
|
|
|
- for (_, _, _, cache_key), result in zip(downscale_jobs, downscale_results):
|
|
|
+ for (_, _, _, cache_key, _, cache_field), result in zip(process_jobs, process_results):
|
|
|
if not isinstance(result, Exception) and result is not None:
|
|
|
- self._image_opt_cache.setdefault(cache_key, {})["downscaled"] = result
|
|
|
+ self._image_opt_cache.setdefault(cache_key, {})[cache_field] = result
|
|
|
|
|
|
# 第二遍:应用处理结果
|
|
|
for i in range(last_assistant_idx):
|
|
|
@@ -2603,9 +2606,29 @@ class AgentRunner:
|
|
|
|
|
|
# 根据距离决定处理策略
|
|
|
if rounds_ago <= 2:
|
|
|
- # 最近 1-2 轮:保留原图
|
|
|
- new_content.append(block)
|
|
|
- stats["kept"] += 1
|
|
|
+ # 最近 1-2 轮:只补齐过小图片,保留原分辨率
|
|
|
+ cached = self._image_opt_cache.get(cache_key, {})
|
|
|
+ if "pad_only" in cached:
|
|
|
+ new_content.append({
|
|
|
+ "type": "image_url",
|
|
|
+ "image_url": {"url": cached["pad_only"]}
|
|
|
+ })
|
|
|
+ stats["kept"] += 1
|
|
|
+ stats["cache_hit"] += 1
|
|
|
+ elif image_url.startswith("data:"):
|
|
|
+ processed = await self._process_image_size(image_url, max_size=None)
|
|
|
+ if processed:
|
|
|
+ self._image_opt_cache.setdefault(cache_key, {})["pad_only"] = processed
|
|
|
+ new_content.append({
|
|
|
+ "type": "image_url",
|
|
|
+ "image_url": {"url": processed}
|
|
|
+ })
|
|
|
+ else:
|
|
|
+ new_content.append(block)
|
|
|
+ stats["kept"] += 1
|
|
|
+ else:
|
|
|
+ new_content.append(block)
|
|
|
+ stats["kept"] += 1
|
|
|
|
|
|
elif rounds_ago <= 5:
|
|
|
# 3-5 轮:降低分辨率(优先从缓存取)
|
|
|
@@ -2618,13 +2641,13 @@ class AgentRunner:
|
|
|
stats["downscaled"] += 1
|
|
|
stats["cache_hit"] += 1
|
|
|
elif image_url.startswith("data:"):
|
|
|
- downscaled = await self._downscale_image(image_url)
|
|
|
- if downscaled:
|
|
|
+ processed = await self._process_image_size(image_url, max_size=512)
|
|
|
+ if processed:
|
|
|
# 缓存结果
|
|
|
- self._image_opt_cache.setdefault(cache_key, {})["downscaled"] = downscaled
|
|
|
+ self._image_opt_cache.setdefault(cache_key, {})["downscaled"] = processed
|
|
|
new_content.append({
|
|
|
"type": "image_url",
|
|
|
- "image_url": {"url": downscaled}
|
|
|
+ "image_url": {"url": processed}
|
|
|
})
|
|
|
stats["downscaled"] += 1
|
|
|
else:
|
|
|
@@ -2668,16 +2691,11 @@ class AgentRunner:
|
|
|
|
|
|
return messages
|
|
|
|
|
|
- async def _downscale_image(self, base64_url: str, max_size: int = 512) -> Optional[str]:
|
|
|
+ async def _process_image_size(self, base64_url: str, max_size: Optional[int] = 512, min_size: int = 11) -> Optional[str]:
|
|
|
"""
|
|
|
- 降低 base64 图片的分辨率
|
|
|
-
|
|
|
- Args:
|
|
|
- base64_url: data:image/xxx;base64,... 格式的 URL
|
|
|
- max_size: 最大边长(像素)
|
|
|
-
|
|
|
- Returns:
|
|
|
- 降分辨率后的 base64 URL,失败返回 None
|
|
|
+ 处理 base64 图片的尺寸:
|
|
|
+ - 若 max_size 不为 None 且大于该值,则等比例缩放
|
|
|
+ - 若任意一边小于 min_size,则补充白边 (Padding)
|
|
|
"""
|
|
|
try:
|
|
|
from PIL import Image
|
|
|
@@ -2695,20 +2713,42 @@ class AgentRunner:
|
|
|
img_data = base64.b64decode(data)
|
|
|
img = Image.open(io.BytesIO(img_data))
|
|
|
|
|
|
- # 计算新尺寸(保持宽高比)
|
|
|
width, height = img.size
|
|
|
- if width <= max_size and height <= max_size:
|
|
|
- return base64_url # 已经够小,不需要缩放
|
|
|
|
|
|
- if width > height:
|
|
|
- new_width = max_size
|
|
|
- new_height = int(height * max_size / width)
|
|
|
+ needs_downscale = max_size is not None and (width > max_size or height > max_size)
|
|
|
+ needs_pad = width < min_size or height < min_size
|
|
|
+
|
|
|
+ # 尺寸正常,无需处理
|
|
|
+ if not needs_downscale and not needs_pad:
|
|
|
+ return base64_url
|
|
|
+
|
|
|
+ new_width, new_height = width, height
|
|
|
+
|
|
|
+ # 1. 降分辨率
|
|
|
+ if needs_downscale:
|
|
|
+ if width > height:
|
|
|
+ new_width = max_size
|
|
|
+ new_height = int(height * max_size / width)
|
|
|
+ else:
|
|
|
+ new_height = max_size
|
|
|
+ new_width = int(width * max_size / height)
|
|
|
+
|
|
|
+ if (new_width, new_height) != (width, height):
|
|
|
+ img_resized = img.resize((new_width, new_height), Image.Resampling.BILINEAR)
|
|
|
else:
|
|
|
- new_height = max_size
|
|
|
- new_width = int(width * max_size / height)
|
|
|
+ img_resized = img
|
|
|
+
|
|
|
+ # 2. 补齐白边 (Padding)
|
|
|
+ pad_width = max(new_width, min_size)
|
|
|
+ pad_height = max(new_height, min_size)
|
|
|
|
|
|
- # 缩放图片(使用更快的 BILINEAR 算法)
|
|
|
- img_resized = img.resize((new_width, new_height), Image.Resampling.BILINEAR)
|
|
|
+ if pad_width > new_width or pad_height > new_height:
|
|
|
+ # 创建白色背景
|
|
|
+ padded_img = Image.new("RGBA" if img_resized.mode in ("RGBA", "P") else "RGB", (pad_width, pad_height), (255, 255, 255, 255))
|
|
|
+ offset_x = (pad_width - new_width) // 2
|
|
|
+ offset_y = (pad_height - new_height) // 2
|
|
|
+ padded_img.paste(img_resized, (offset_x, offset_y))
|
|
|
+ img_resized = padded_img
|
|
|
|
|
|
# 转换为 RGB(JPEG不支持 RGBA, P 等具有透明度或索引的模式)
|
|
|
if img_resized.mode != "RGB":
|
|
|
@@ -2722,15 +2762,16 @@ class AgentRunner:
|
|
|
img_resized = background
|
|
|
img_resized = img_resized.convert("RGB")
|
|
|
|
|
|
- # 重新编码为 JPEG(降低质量以加快速度)
|
|
|
+ # 重新编码为 JPEG(如果只是补齐没有缩放,可以稍微保留高点质量)
|
|
|
buffer = io.BytesIO()
|
|
|
- img_resized.save(buffer, format="JPEG", quality=60, optimize=False)
|
|
|
+ quality = 60 if needs_downscale else 85
|
|
|
+ img_resized.save(buffer, format="JPEG", quality=quality, optimize=False)
|
|
|
new_data = base64.b64encode(buffer.getvalue()).decode("utf-8")
|
|
|
|
|
|
return f"data:image/jpeg;base64,{new_data}"
|
|
|
|
|
|
except Exception as e:
|
|
|
- self.log.warning(f"[Image Downscale] 降分辨率失败: {e}")
|
|
|
+ self.log.warning(f"[Image Process] 处理图片尺寸失败: {e}")
|
|
|
return None
|
|
|
|
|
|
async def _generate_image_description(self, image_url: str, current_model: str) -> str:
|