Add --fast argument to enable experimental optimizations.

Optimizations that might break things/lower quality will be put behind
this flag first and might be enabled by default in the future.

Currently the only optimization is float8_e4m3fn matrix multiplication on
4000/ADA series Nvidia cards or later. If you have one of these cards you
will see a speed boost when using fp8_e4m3fn flux for example.
This commit is contained in:
comfyanonymous
2024-08-20 11:49:33 -04:00
parent d1a6bd6845
commit 9953f22fce
4 changed files with 52 additions and 5 deletions

View File

@@ -96,10 +96,7 @@ class BaseModel(torch.nn.Module):
if not unet_config.get("disable_unet_model_creation", False):
if model_config.custom_operations is None:
if self.manual_cast_dtype is not None:
operations = comfy.ops.manual_cast
else:
operations = comfy.ops.disable_weight_init
operations = comfy.ops.pick_operations(unet_config.get("dtype", None), self.manual_cast_dtype)
else:
operations = model_config.custom_operations
self.diffusion_model = unet_model(**unet_config, device=device, operations=operations)