From 1b96fae1d4a8425c44d4d3bd60acd818d05bf4f6 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Sat, 19 Jul 2025 01:55:23 -0700
Subject: [PATCH 01/15] Add nested style of dual cfg to DualCFGGuider node.
 (#8965)

---
 comfy_extras/nodes_custom_sampler.py | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/comfy_extras/nodes_custom_sampler.py b/comfy_extras/nodes_custom_sampler.py
index 33bc41842..d17737e1a 100644
--- a/comfy_extras/nodes_custom_sampler.py
+++ b/comfy_extras/nodes_custom_sampler.py
@@ -683,9 +683,10 @@ class CFGGuider:
         return (guider,)
 
 class Guider_DualCFG(comfy.samplers.CFGGuider):
-    def set_cfg(self, cfg1, cfg2):
+    def set_cfg(self, cfg1, cfg2, nested=False):
         self.cfg1 = cfg1
         self.cfg2 = cfg2
+        self.nested = nested
 
     def set_conds(self, positive, middle, negative):
         middle = node_helpers.conditioning_set_values(middle, {"prompt_type": "negative"})
@@ -695,14 +696,20 @@ class Guider_DualCFG(comfy.samplers.CFGGuider):
         negative_cond = self.conds.get("negative", None)
         middle_cond = self.conds.get("middle", None)
         positive_cond = self.conds.get("positive", None)
-        if model_options.get("disable_cfg1_optimization", False) == False:
-            if math.isclose(self.cfg2, 1.0):
-                negative_cond = None
-                if math.isclose(self.cfg1, 1.0):
-                    middle_cond = None
 
-        out = comfy.samplers.calc_cond_batch(self.inner_model, [negative_cond, middle_cond, positive_cond], x, timestep, model_options)
-        return comfy.samplers.cfg_function(self.inner_model, out[1], out[0], self.cfg2, x, timestep, model_options=model_options, cond=middle_cond, uncond=negative_cond) + (out[2] - out[1]) * self.cfg1
+        if self.nested:
+            out = comfy.samplers.calc_cond_batch(self.inner_model, [negative_cond, middle_cond, positive_cond], x, timestep, model_options)
+            pred_text = comfy.samplers.cfg_function(self.inner_model, out[2], out[1], self.cfg1, x, timestep, model_options=model_options, cond=positive_cond, uncond=middle_cond)
+            return out[0] + self.cfg2 * (pred_text - out[0])
+        else:
+            if model_options.get("disable_cfg1_optimization", False) == False:
+                if math.isclose(self.cfg2, 1.0):
+                    negative_cond = None
+                    if math.isclose(self.cfg1, 1.0):
+                        middle_cond = None
+
+            out = comfy.samplers.calc_cond_batch(self.inner_model, [negative_cond, middle_cond, positive_cond], x, timestep, model_options)
+            return comfy.samplers.cfg_function(self.inner_model, out[1], out[0], self.cfg2, x, timestep, model_options=model_options, cond=middle_cond, uncond=negative_cond) + (out[2] - out[1]) * self.cfg1
 
 class DualCFGGuider:
     @classmethod
@@ -714,6 +721,7 @@ class DualCFGGuider:
                     "negative": ("CONDITIONING", ),
                     "cfg_conds": ("FLOAT", {"default": 8.0, "min": 0.0, "max": 100.0, "step":0.1, "round": 0.01}),
                     "cfg_cond2_negative": ("FLOAT", {"default": 8.0, "min": 0.0, "max": 100.0, "step":0.1, "round": 0.01}),
+                    "style": (["regular", "nested"],),
                      }
                 }
 
@@ -722,10 +730,10 @@ class DualCFGGuider:
     FUNCTION = "get_guider"
     CATEGORY = "sampling/custom_sampling/guiders"
 
-    def get_guider(self, model, cond1, cond2, negative, cfg_conds, cfg_cond2_negative):
+    def get_guider(self, model, cond1, cond2, negative, cfg_conds, cfg_cond2_negative, style):
         guider = Guider_DualCFG(model)
         guider.set_conds(cond1, cond2, negative)
-        guider.set_cfg(cfg_conds, cfg_cond2_negative)
+        guider.set_cfg(cfg_conds, cfg_cond2_negative, nested=(style == "nested"))
         return (guider,)
 
 class DisableNoise:

From 1da5639e865a50f921d870a92c7c87110ce20c48 Mon Sep 17 00:00:00 2001
From: ComfyUI Wiki <contact@comfyui-wiki.com>
Date: Sat, 19 Jul 2025 18:08:00 +0800
Subject: [PATCH 02/15] Update template to 0.1.37 (#8967)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 7705918a8..a7e44095f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 comfyui-frontend-package==1.23.4
-comfyui-workflow-templates==0.1.36
+comfyui-workflow-templates==0.1.37
 comfyui-embedded-docs==0.2.4
 torch
 torchsde

From 100c2478eaba71ab735539fdc00c9d0de49bc224 Mon Sep 17 00:00:00 2001
From: chaObserv <154517000+chaObserv@users.noreply.github.com>
Date: Sun, 20 Jul 2025 11:09:11 +0800
Subject: [PATCH 03/15] Add SamplingPercentToSigma node (#8963)

It's helpful to adjust start_percent or end_percent based on the corresponding sigma.
---
 comfy_extras/nodes_custom_sampler.py | 30 ++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/comfy_extras/nodes_custom_sampler.py b/comfy_extras/nodes_custom_sampler.py
index d17737e1a..d011f433b 100644
--- a/comfy_extras/nodes_custom_sampler.py
+++ b/comfy_extras/nodes_custom_sampler.py
@@ -301,6 +301,35 @@ class ExtendIntermediateSigmas:
 
         return (extended_sigmas,)
 
+
+class SamplingPercentToSigma:
+    @classmethod
+    def INPUT_TYPES(cls) -> InputTypeDict:
+        return {
+            "required": {
+                "model": (IO.MODEL, {}),
+                "sampling_percent": (IO.FLOAT, {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.0001}),
+                "return_actual_sigma": (IO.BOOLEAN, {"default": False, "tooltip": "Return the actual sigma value instead of the value used for interval checks.\nThis only affects results at 0.0 and 1.0."}),
+            }
+        }
+
+    RETURN_TYPES = (IO.FLOAT,)
+    RETURN_NAMES = ("sigma_value",)
+    CATEGORY = "sampling/custom_sampling/sigmas"
+
+    FUNCTION = "get_sigma"
+
+    def get_sigma(self, model, sampling_percent, return_actual_sigma):
+        model_sampling = model.get_model_object("model_sampling")
+        sigma_val = model_sampling.percent_to_sigma(sampling_percent)
+        if return_actual_sigma:
+            if sampling_percent == 0.0:
+                sigma_val = model_sampling.sigma_max.item()
+            elif sampling_percent == 1.0:
+                sigma_val = model_sampling.sigma_min.item()
+        return (sigma_val,)
+
+
 class KSamplerSelect:
     @classmethod
     def INPUT_TYPES(s):
@@ -887,6 +916,7 @@ NODE_CLASS_MAPPINGS = {
     "FlipSigmas": FlipSigmas,
     "SetFirstSigma": SetFirstSigma,
     "ExtendIntermediateSigmas": ExtendIntermediateSigmas,
+    "SamplingPercentToSigma": SamplingPercentToSigma,
 
     "CFGGuider": CFGGuider,
     "DualCFGGuider": DualCFGGuider,

From a0c0785635a9f4d2da64b58fef063825f386d8da Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Sat, 19 Jul 2025 22:24:09 -0700
Subject: [PATCH 04/15] Document what the fast_fp16_accumulation is in the
 portable. (#8973)

---
 .ci/windows_base_files/README_VERY_IMPORTANT.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.ci/windows_base_files/README_VERY_IMPORTANT.txt b/.ci/windows_base_files/README_VERY_IMPORTANT.txt
index d46acbcbf..8ab70c890 100755
--- a/.ci/windows_base_files/README_VERY_IMPORTANT.txt
+++ b/.ci/windows_base_files/README_VERY_IMPORTANT.txt
@@ -4,6 +4,9 @@ if you have a NVIDIA gpu:
 
 run_nvidia_gpu.bat
 
+if you want to enable the fast fp16 accumulation (faster for fp16 models with slightly less quality):
+
+run_nvidia_gpu_fast_fp16_accumulation.bat
 
 
 To run it in slow CPU mode:

From 7d627f764c2137d816a39adbc358cb28c1718a47 Mon Sep 17 00:00:00 2001
From: ComfyUI Wiki <contact@comfyui-wiki.com>
Date: Mon, 21 Jul 2025 03:58:35 +0800
Subject: [PATCH 05/15] Update template to 0.1.39 (#8981)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index a7e44095f..8f6a6d112 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 comfyui-frontend-package==1.23.4
-comfyui-workflow-templates==0.1.37
+comfyui-workflow-templates==0.1.39
 comfyui-embedded-docs==0.2.4
 torch
 torchsde

From 9a470e073e2742d4edd6e7ea1ce28d861a77d9c4 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 21 Jul 2025 14:05:43 -0400
Subject: [PATCH 06/15] ComfyUI version 0.3.45

---
 comfyui_version.py | 2 +-
 pyproject.toml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfyui_version.py b/comfyui_version.py
index 7981fbaca..180ecaf8a 100644
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.3.44"
+__version__ = "0.3.45"
diff --git a/pyproject.toml b/pyproject.toml
index 96ead2157..b1d6d9df6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.3.44"
+version = "0.3.45"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"

From 54a45b996729b361ea12f473de760e481dcf1f0a Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Mon, 21 Jul 2025 11:19:14 -0700
Subject: [PATCH 07/15] Replace torchaudio.load with pyav. (#8989)

---
 comfy_extras/nodes_audio.py | 58 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

diff --git a/comfy_extras/nodes_audio.py b/comfy_extras/nodes_audio.py
index 8cd647846..38697240e 100644
--- a/comfy_extras/nodes_audio.py
+++ b/comfy_extras/nodes_audio.py
@@ -278,6 +278,62 @@ class PreviewAudio(SaveAudio):
                 "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},
                 }
 
+def f32_pcm(wav: torch.Tensor) -> torch.Tensor:
+    """Convert audio to float 32 bits PCM format."""
+    if wav.dtype.is_floating_point:
+        return wav
+    elif wav.dtype == torch.int16:
+        return wav.float() / (2 ** 15)
+    elif wav.dtype == torch.int32:
+        return wav.float() / (2 ** 31)
+    raise ValueError(f"Unsupported wav dtype: {wav.dtype}")
+
+def load(filepath: str, frame_offset: int = 0, num_frames: int = -1) -> tuple[torch.Tensor, int]:
+    with av.open(filepath) as af:
+        if not af.streams.audio:
+            raise ValueError("No audio stream found in the file.")
+
+        stream = af.streams.audio[0]
+        sr = stream.codec_context.sample_rate
+        n_channels = stream.channels
+
+        seek_time = frame_offset / sr if frame_offset > 0 else 0.0
+        duration = num_frames / sr if num_frames > 0 else -1.0
+
+        sample_offset = int(sr * seek_time)
+        num_samples = int(sr * duration) if duration >= 0 else -1
+
+        # Small negative offset for MP3 artifacts, NOTE: this is LLM code so idk if it's actually necessary'
+        seek_sec = max(0, seek_time - 0.1) if filepath.lower().endswith('.mp3') else seek_time
+        af.seek(int(seek_sec / stream.time_base), stream=stream)
+
+        frames = []
+        length = 0
+        for frame in af.decode(streams=stream.index):
+            current_offset = int(frame.rate * frame.pts * frame.time_base)
+            strip = max(0, sample_offset - current_offset)
+
+            buf = torch.from_numpy(frame.to_ndarray())
+            if buf.shape[0] != n_channels:
+                buf = buf.view(-1, n_channels).t()
+
+            buf = buf[:, strip:]
+            frames.append(buf)
+            length += buf.shape[1]
+
+            if num_samples > 0 and length >= num_samples:
+                break
+
+        if not frames:
+            raise ValueError("No audio frames decoded.")
+
+        wav = torch.cat(frames, dim=1)
+        if num_samples > 0:
+            wav = wav[:, :num_samples]
+
+        wav = f32_pcm(wav)
+        return wav, sr
+
 class LoadAudio:
     @classmethod
     def INPUT_TYPES(s):
@@ -292,7 +348,7 @@ class LoadAudio:
 
     def load(self, audio):
         audio_path = folder_paths.get_annotated_filepath(audio)
-        waveform, sample_rate = torchaudio.load(audio_path)
+        waveform, sample_rate = load(audio_path)
         audio = {"waveform": waveform.unsqueeze(0), "sample_rate": sample_rate}
         return (audio, )
 

From 5249e45a1c7d91656ebefdebe3815005ec3d39d7 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Mon, 21 Jul 2025 12:23:41 -0700
Subject: [PATCH 08/15] Add hidream e1.1 example to readme. (#8990)

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 0e021a687..d004364ee 100644
--- a/README.md
+++ b/README.md
@@ -69,6 +69,7 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
 - Image Editing Models
    - [Omnigen 2](https://comfyanonymous.github.io/ComfyUI_examples/omnigen/)
    - [Flux Kontext](https://comfyanonymous.github.io/ComfyUI_examples/flux/#flux-kontext-image-editing-model)
+   - [HiDream E1.1](https://comfyanonymous.github.io/ComfyUI_examples/hidream/#hidream-e11)
 - Video Models
    - [Stable Video Diffusion](https://comfyanonymous.github.io/ComfyUI_examples/video/)
    - [Mochi](https://comfyanonymous.github.io/ComfyUI_examples/mochi/)

From 0aa1c58b04b27311c6ba38b1d9949e7e20037d00 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Mon, 21 Jul 2025 13:48:25 -0700
Subject: [PATCH 09/15] This is not needed. (#8991)

---
 comfy_extras/nodes_audio.py | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/comfy_extras/nodes_audio.py b/comfy_extras/nodes_audio.py
index 38697240e..a90b31779 100644
--- a/comfy_extras/nodes_audio.py
+++ b/comfy_extras/nodes_audio.py
@@ -288,7 +288,7 @@ def f32_pcm(wav: torch.Tensor) -> torch.Tensor:
         return wav.float() / (2 ** 31)
     raise ValueError(f"Unsupported wav dtype: {wav.dtype}")
 
-def load(filepath: str, frame_offset: int = 0, num_frames: int = -1) -> tuple[torch.Tensor, int]:
+def load(filepath: str) -> tuple[torch.Tensor, int]:
     with av.open(filepath) as af:
         if not af.streams.audio:
             raise ValueError("No audio stream found in the file.")
@@ -297,40 +297,20 @@ def load(filepath: str, frame_offset: int = 0, num_frames: int = -1) -> tuple[to
         sr = stream.codec_context.sample_rate
         n_channels = stream.channels
 
-        seek_time = frame_offset / sr if frame_offset > 0 else 0.0
-        duration = num_frames / sr if num_frames > 0 else -1.0
-
-        sample_offset = int(sr * seek_time)
-        num_samples = int(sr * duration) if duration >= 0 else -1
-
-        # Small negative offset for MP3 artifacts, NOTE: this is LLM code so idk if it's actually necessary'
-        seek_sec = max(0, seek_time - 0.1) if filepath.lower().endswith('.mp3') else seek_time
-        af.seek(int(seek_sec / stream.time_base), stream=stream)
-
         frames = []
         length = 0
         for frame in af.decode(streams=stream.index):
-            current_offset = int(frame.rate * frame.pts * frame.time_base)
-            strip = max(0, sample_offset - current_offset)
-
             buf = torch.from_numpy(frame.to_ndarray())
             if buf.shape[0] != n_channels:
                 buf = buf.view(-1, n_channels).t()
 
-            buf = buf[:, strip:]
             frames.append(buf)
             length += buf.shape[1]
 
-            if num_samples > 0 and length >= num_samples:
-                break
-
         if not frames:
             raise ValueError("No audio frames decoded.")
 
         wav = torch.cat(frames, dim=1)
-        if num_samples > 0:
-            wav = wav[:, :num_samples]
-
         wav = f32_pcm(wav)
         return wav, sr
 

From 5ac9ec214ba3ef1632701416f27948a57ec60919 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 22 Jul 2025 01:07:51 -0700
Subject: [PATCH 10/15] Try to fix line endings workflow. (#9001)

---
 .github/workflows/check-line-endings.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/check-line-endings.yml b/.github/workflows/check-line-endings.yml
index f20dca565..03b3e3ced 100644
--- a/.github/workflows/check-line-endings.yml
+++ b/.github/workflows/check-line-endings.yml
@@ -17,6 +17,7 @@ jobs:
       - name: Check for Windows line endings (CRLF)
         run: |
           # Get the list of changed files in the PR
+          git merge origin/${{ github.base_ref }} --no-edit
           CHANGED_FILES=$(git diff --name-only origin/${{ github.base_ref }}..HEAD)
 
           # Flag to track if CRLF is found

From 255f1398638b265a47d0e74fb4759fe6cfc3b3d4 Mon Sep 17 00:00:00 2001
From: Simon Lui <502929+simonlui@users.noreply.github.com>
Date: Tue, 22 Jul 2025 12:20:09 -0700
Subject: [PATCH 11/15] Add xpu version for async offload and some other
 things. (#9004)

---
 comfy/model_management.py | 41 +++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 816caf18f..ab1e9bf3a 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -101,7 +101,7 @@ if args.directml is not None:
     lowvram_available = False #TODO: need to find a way to get free memory in directml before this can be enabled by default.
 
 try:
-    import intel_extension_for_pytorch as ipex
+    import intel_extension_for_pytorch as ipex  # noqa: F401
     _ = torch.xpu.device_count()
     xpu_available = xpu_available or torch.xpu.is_available()
 except:
@@ -186,8 +186,12 @@ def get_total_memory(dev=None, torch_total_too=False):
         elif is_intel_xpu():
             stats = torch.xpu.memory_stats(dev)
             mem_reserved = stats['reserved_bytes.all.current']
+            if torch_version_numeric < (2, 6):
+                mem_total_xpu = torch.xpu.get_device_properties(dev).total_memory
+            else:
+                _, mem_total_xpu = torch.xpu.mem_get_info(dev)
             mem_total_torch = mem_reserved
-            mem_total = torch.xpu.get_device_properties(dev).total_memory
+            mem_total = mem_total_xpu
         elif is_ascend_npu():
             stats = torch.npu.memory_stats(dev)
             mem_reserved = stats['reserved_bytes.all.current']
@@ -929,7 +933,7 @@ def device_supports_non_blocking(device):
     if is_device_mps(device):
         return False #pytorch bug? mps doesn't support non blocking
     if is_intel_xpu():
-        return False
+        return True
     if args.deterministic: #TODO: figure out why deterministic breaks non blocking from gpu to cpu (previews)
         return False
     if directml_enabled:
@@ -968,6 +972,8 @@ def get_offload_stream(device):
         stream_counter = (stream_counter + 1) % len(ss)
         if is_device_cuda(device):
             ss[stream_counter].wait_stream(torch.cuda.current_stream())
+        elif is_device_xpu(device):
+            ss[stream_counter].wait_stream(torch.xpu.current_stream())
         stream_counters[device] = stream_counter
         return s
     elif is_device_cuda(device):
@@ -979,6 +985,15 @@ def get_offload_stream(device):
         stream_counter = (stream_counter + 1) % len(ss)
         stream_counters[device] = stream_counter
         return s
+    elif is_device_xpu(device):
+        ss = []
+        for k in range(NUM_STREAMS):
+            ss.append(torch.xpu.Stream(device=device, priority=0))
+        STREAMS[device] = ss
+        s = ss[stream_counter]
+        stream_counter = (stream_counter + 1) % len(ss)
+        stream_counters[device] = stream_counter
+        return s
     return None
 
 def sync_stream(device, stream):
@@ -986,6 +1001,8 @@ def sync_stream(device, stream):
         return
     if is_device_cuda(device):
         torch.cuda.current_stream().wait_stream(stream)
+    elif is_device_xpu(device):
+        torch.xpu.current_stream().wait_stream(stream)
 
 def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False, stream=None):
     if device is None or weight.device == device:
@@ -1092,8 +1109,11 @@ def get_free_memory(dev=None, torch_free_too=False):
             stats = torch.xpu.memory_stats(dev)
             mem_active = stats['active_bytes.all.current']
             mem_reserved = stats['reserved_bytes.all.current']
+            if torch_version_numeric < (2, 6):
+                mem_free_xpu = torch.xpu.get_device_properties(dev).total_memory - mem_reserved
+            else:
+                mem_free_xpu, _ = torch.xpu.mem_get_info(dev)
             mem_free_torch = mem_reserved - mem_active
-            mem_free_xpu = torch.xpu.get_device_properties(dev).total_memory - mem_reserved
             mem_free_total = mem_free_xpu + mem_free_torch
         elif is_ascend_npu():
             stats = torch.npu.memory_stats(dev)
@@ -1142,6 +1162,9 @@ def is_device_cpu(device):
 def is_device_mps(device):
     return is_device_type(device, 'mps')
 
+def is_device_xpu(device):
+    return is_device_type(device, 'xpu')
+
 def is_device_cuda(device):
     return is_device_type(device, 'cuda')
 
@@ -1173,7 +1196,10 @@ def should_use_fp16(device=None, model_params=0, prioritize_performance=True, ma
         return False
 
     if is_intel_xpu():
-        return True
+        if torch_version_numeric < (2, 3):
+            return True
+        else:
+            return torch.xpu.get_device_properties(device).has_fp16
 
     if is_ascend_npu():
         return True
@@ -1236,7 +1262,10 @@ def should_use_bf16(device=None, model_params=0, prioritize_performance=True, ma
         return False
 
     if is_intel_xpu():
-        return True
+        if torch_version_numeric < (2, 6):
+            return True
+        else:
+            return torch.xpu.get_device_capability(device)['has_bfloat16_conversions']
 
     if is_ascend_npu():
         return True

From 5ad33787dee43d36f8d054c590818b3153b55370 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 23 Jul 2025 11:20:49 -0700
Subject: [PATCH 12/15] Add default device argument. (#9023)

---
 comfy/cli_args.py         | 3 ++-
 comfy/model_management.py | 1 +
 main.py                   | 9 +++++++++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index ef0d4337e..0d760d524 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -49,7 +49,8 @@ parser.add_argument("--temp-directory", type=str, default=None, help="Set the Co
 parser.add_argument("--input-directory", type=str, default=None, help="Set the ComfyUI input directory. Overrides --base-directory.")
 parser.add_argument("--auto-launch", action="store_true", help="Automatically launch ComfyUI in the default browser.")
 parser.add_argument("--disable-auto-launch", action="store_true", help="Disable auto launching the browser.")
-parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.")
+parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use. All other devices will not be visible.")
+parser.add_argument("--default-device", type=int, default=None, metavar="DEFAULT_DEVICE_ID", help="Set the id of the default device, all other devices will stay visible.")
 cm_group = parser.add_mutually_exclusive_group()
 cm_group.add_argument("--cuda-malloc", action="store_true", help="Enable cudaMallocAsync (enabled by default for torch 2.0 and up).")
 cm_group.add_argument("--disable-cuda-malloc", action="store_true", help="Disable cudaMallocAsync.")
diff --git a/comfy/model_management.py b/comfy/model_management.py
index ab1e9bf3a..346673895 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -880,6 +880,7 @@ def vae_dtype(device=None, allowed_dtypes=[]):
             return d
 
         # NOTE: bfloat16 seems to work on AMD for the VAE but is extremely slow in some cases compared to fp32
+        # slowness still a problem on pytorch nightly 2.9.0.dev20250720+rocm6.4 tested on RDNA3
         if d == torch.bfloat16 and (not is_amd()) and should_use_bf16(device):
             return d
 
diff --git a/main.py b/main.py
index 2b4ffafd4..e8ca8152a 100644
--- a/main.py
+++ b/main.py
@@ -115,6 +115,15 @@ if os.name == "nt":
     logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not available' not in record.getMessage())
 
 if __name__ == "__main__":
+    if args.default_device is not None:
+        default_dev = args.default_device
+        devices = list(range(32))
+        devices.remove(default_dev)
+        devices.insert(0, default_dev)
+        devices = ','.join(map(str, devices))
+        os.environ['CUDA_VISIBLE_DEVICES'] = str(devices)
+        os.environ['HIP_VISIBLE_DEVICES'] = str(devices)
+
     if args.cuda_device is not None:
         os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda_device)
         os.environ['HIP_VISIBLE_DEVICES'] = str(args.cuda_device)

From 39dda1d40d1f2f18ccda8ade860932d0b8a07af4 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 23 Jul 2025 15:10:59 -0700
Subject: [PATCH 13/15] Fix xpu function not implemented. (#9026)

---
 comfy/model_management.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 346673895..746b063ed 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -186,10 +186,7 @@ def get_total_memory(dev=None, torch_total_too=False):
         elif is_intel_xpu():
             stats = torch.xpu.memory_stats(dev)
             mem_reserved = stats['reserved_bytes.all.current']
-            if torch_version_numeric < (2, 6):
-                mem_total_xpu = torch.xpu.get_device_properties(dev).total_memory
-            else:
-                _, mem_total_xpu = torch.xpu.mem_get_info(dev)
+            mem_total_xpu = torch.xpu.get_device_properties(dev).total_memory
             mem_total_torch = mem_reserved
             mem_total = mem_total_xpu
         elif is_ascend_npu():

From a86a58c308c2423e86054462a8c9f1125536a034 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 23 Jul 2025 15:18:20 -0700
Subject: [PATCH 14/15] Fix xpu function not implemented p2. (#9027)

---
 comfy/model_management.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 746b063ed..42873d09b 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -1107,10 +1107,7 @@ def get_free_memory(dev=None, torch_free_too=False):
             stats = torch.xpu.memory_stats(dev)
             mem_active = stats['active_bytes.all.current']
             mem_reserved = stats['reserved_bytes.all.current']
-            if torch_version_numeric < (2, 6):
-                mem_free_xpu = torch.xpu.get_device_properties(dev).total_memory - mem_reserved
-            else:
-                mem_free_xpu, _ = torch.xpu.mem_get_info(dev)
+            mem_free_xpu = torch.xpu.get_device_properties(dev).total_memory - mem_reserved
             mem_free_torch = mem_reserved - mem_active
             mem_free_total = mem_free_xpu + mem_free_torch
         elif is_ascend_npu():

From d3504e1778c0cc8992b04fe30dc0fae239c13713 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 23 Jul 2025 16:21:29 -0700
Subject: [PATCH 15/15] Enable pytorch attention by default for gfx1201 on
 torch 2.8 (#9029)

---
 comfy/model_management.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 42873d09b..e8b9b5c81 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -308,7 +308,10 @@ try:
         logging.info("ROCm version: {}".format(rocm_version))
         if args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
             if torch_version_numeric >= (2, 7):  # works on 2.6 but doesn't actually seem to improve much
-                if any((a in arch) for a in ["gfx90a", "gfx942", "gfx1100", "gfx1101", "gfx1151"]):  # TODO: more arches, TODO: gfx1201 and gfx950
+                if any((a in arch) for a in ["gfx90a", "gfx942", "gfx1100", "gfx1101", "gfx1151"]):  # TODO: more arches, TODO: gfx950
+                    ENABLE_PYTORCH_ATTENTION = True
+            if torch_version_numeric >= (2, 8):
+                if any((a in arch) for a in ["gfx1201"]):
                     ENABLE_PYTORCH_ATTENTION = True
         if torch_version_numeric >= (2, 7) and rocm_version >= (6, 4):
             if any((a in arch) for a in ["gfx1201", "gfx942", "gfx950"]):  # TODO: more arches