Save memory by storing text encoder weights in fp16 in most situations.

Do inference in fp32 to make sure quality stays the exact same.
2025-09-12 12:37:01 +00:00 · 2023-08-23 01:07:57 -04:00
parent d7b3b0f8c1
commit f081017c1a
4 changed files with 5 additions and 9 deletions
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -433,7 +433,7 @@ def text_encoder_device():
        return get_torch_device()
    elif vram_state == VRAMState.HIGH_VRAM or vram_state == VRAMState.NORMAL_VRAM:
        #NOTE: on a Ryzen 5 7600X with 4080 it's faster to shift to GPU
-        if torch.get_num_threads() < 8: #leaving the text encoder on the CPU is faster than shifting it if the CPU is fast enough.
+        if should_use_fp16() or torch.get_num_threads() < 8: #leaving the text encoder on the CPU is faster than shifting it if the CPU is fast enough.
            return get_torch_device()
        else:
            return torch.device("cpu")