|
@@ -72,7 +72,19 @@ async def _call_with_retry(llm_call, messages, model, task_name, max_retries=3):
|
|
|
try:
|
|
try:
|
|
|
resp = await llm_call(messages=cur_messages, model=model,
|
|
resp = await llm_call(messages=cur_messages, model=model,
|
|
|
temperature=0.1, max_tokens=4000)
|
|
temperature=0.1, max_tokens=4000)
|
|
|
- cost = resp.get("cost") or 0.0
|
|
|
|
|
|
|
+ # 成本:优先用 provider 自带 cost;缺省时按 token 用量估(同 llm_helper 口径)
|
|
|
|
|
+ provider_cost = resp.get("cost")
|
|
|
|
|
+ if isinstance(provider_cost, (int, float)) and provider_cost > 0:
|
|
|
|
|
+ cost = provider_cost
|
|
|
|
|
+ else:
|
|
|
|
|
+ usage = resp.get("usage") or {}
|
|
|
|
|
+ if hasattr(usage, "__dict__"):
|
|
|
|
|
+ it = getattr(usage, "input_tokens", 0) or getattr(usage, "prompt_tokens", 0)
|
|
|
|
|
+ ot = getattr(usage, "output_tokens", 0) or getattr(usage, "completion_tokens", 0)
|
|
|
|
|
+ else:
|
|
|
|
|
+ it = usage.get("input_tokens", 0) or usage.get("prompt_tokens", 0)
|
|
|
|
|
+ ot = usage.get("output_tokens", 0) or usage.get("completion_tokens", 0)
|
|
|
|
|
+ cost = (it / 1e6 * 3.0) + (ot / 1e6 * 15.0)
|
|
|
total_cost += cost
|
|
total_cost += cost
|
|
|
content = resp.get("content", "")
|
|
content = resp.get("content", "")
|
|
|
if isinstance(content, list):
|
|
if isinstance(content, list):
|