@@ -523,7 +523,7 @@ def init_model(checkpoint_path, device, precision, compile=False):
decode_one_token,
backend="inductor" if torch.cuda.is_available() else "aot_eager",
mode="default" if torch.cuda.is_available() else None,
- fullgraph=False,
+ fullgraph=True,
)
return model.eval(), decode_one_token