diff --git a/README.md b/README.md index 119098f5c..e4cff01a9 100644 --- a/README.md +++ b/README.md @@ -203,7 +203,7 @@ Put your VAE in: models/vae ### AMD GPUs (Linux only) AMD users can install rocm and pytorch with pip if you don't have it already installed, this is the command to install the stable version: -```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.3``` +```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.4``` This is the command to install the nightly with ROCm 6.4 which might have some performance improvements: @@ -237,7 +237,7 @@ Additional discussion and help can be found [here](https://github.com/comfyanony Nvidia users should install stable pytorch using this command: -```pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu128``` +```pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu129``` This is the command to install pytorch nightly instead which might have performance improvements. diff --git a/comfy/lora.py b/comfy/lora.py index 6686b7229..00358884b 100644 --- a/comfy/lora.py +++ b/comfy/lora.py @@ -301,6 +301,7 @@ def model_lora_keys_unet(model, key_map={}): key_map["{}".format(key_lora)] = k # Support transformer prefix format key_map["transformer.{}".format(key_lora)] = k + key_map["lycoris_{}".format(key_lora.replace(".", "_"))] = k #SimpleTuner lycoris format return key_map diff --git a/comfy/model_management.py b/comfy/model_management.py index 9e6149d60..c08f759e5 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -321,9 +321,9 @@ try: if torch_version_numeric >= (2, 7): # works on 2.6 but doesn't actually seem to improve much if any((a in arch) for a in ["gfx90a", "gfx942", "gfx1100", "gfx1101", "gfx1151"]): # TODO: more arches, TODO: gfx950 ENABLE_PYTORCH_ATTENTION = True - if torch_version_numeric >= (2, 8): - if any((a in arch) for a in ["gfx1201"]): - ENABLE_PYTORCH_ATTENTION = True +# if torch_version_numeric >= (2, 8): +# if any((a in arch) for a in ["gfx1201"]): +# ENABLE_PYTORCH_ATTENTION = True if torch_version_numeric >= (2, 7) and rocm_version >= (6, 4): if any((a in arch) for a in ["gfx1201", "gfx942", "gfx950"]): # TODO: more arches SUPPORT_FP8_OPS = True @@ -340,7 +340,7 @@ if ENABLE_PYTORCH_ATTENTION: PRIORITIZE_FP16 = False # TODO: remove and replace with something that shows exactly which dtype is faster than the other try: - if is_nvidia() and PerformanceFeature.Fp16Accumulation in args.fast: + if (is_nvidia() or is_amd()) and PerformanceFeature.Fp16Accumulation in args.fast: torch.backends.cuda.matmul.allow_fp16_accumulation = True PRIORITIZE_FP16 = True # TODO: limit to cards where it actually boosts performance logging.info("Enabled fp16 accumulation.") diff --git a/execution.py b/execution.py index 952f0cc5c..1dc35738b 100644 --- a/execution.py +++ b/execution.py @@ -646,8 +646,6 @@ class PromptExecutor: self.add_message("execution_error", mes, broadcast=False) def execute(self, prompt, prompt_id, extra_data={}, execute_outputs=[]): - asyncio_loop = asyncio.new_event_loop() - asyncio.set_event_loop(asyncio_loop) asyncio.run(self.execute_async(prompt, prompt_id, extra_data, execute_outputs)) async def execute_async(self, prompt, prompt_id, extra_data={}, execute_outputs=[]): diff --git a/requirements.txt b/requirements.txt index 87c1d835f..efdfdb8bf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ comfyui-frontend-package==1.24.4 -comfyui-workflow-templates==0.1.52 -comfyui-embedded-docs==0.2.4 +comfyui-workflow-templates==0.1.53 +comfyui-embedded-docs==0.2.6 torch torchsde torchvision diff --git a/server.py b/server.py index 0553a0dd7..8f9c88ebf 100644 --- a/server.py +++ b/server.py @@ -235,7 +235,7 @@ class PromptServer(): sid, ) - logging.info( + logging.debug( f"Feature flags negotiated for client {sid}: {client_flags}" ) first_message = False