From 333d942f303b170d9a778959b80fe0c3246a3cf6 Mon Sep 17 00:00:00 2001 From: bigcat88 Date: Wed, 23 Jul 2025 06:54:15 +0300 Subject: [PATCH 1/2] refactored Preview/Save of images --- comfy_api/v3/ui.py | 72 ++++++++++++++++++++++++++++++--- comfy_extras/v3/nodes_images.py | 45 +++++++++------------ 2 files changed, 86 insertions(+), 31 deletions(-) diff --git a/comfy_api/v3/ui.py b/comfy_api/v3/ui.py index 8a276cf0c..caaf894f6 100644 --- a/comfy_api/v3/ui.py +++ b/comfy_api/v3/ui.py @@ -37,6 +37,20 @@ class SavedResult(dict): return FolderType(self["type"]) +class SavedImages(_UIOutput): + """A UI output class to represent one or more saved images, potentially animated.""" + def __init__(self, results: list[SavedResult], is_animated: bool = False): + super().__init__() + self.results = results + self.is_animated = is_animated + + def as_dict(self) -> dict: + data = {"images": self.results} + if self.is_animated: + data["animated"] = (True,) + return data + + def _get_directory_by_folder_type(folder_type: FolderType) -> str: if folder_type == FolderType.input: return folder_paths.get_input_directory() @@ -125,14 +139,22 @@ class ImageSaveHelper: counter += 1 return results + @staticmethod + def get_save_images_ui(images, filename_prefix: str, cls: Type[ComfyNodeV3] | None, compress_level=4) -> SavedImages: + """Saves a batch of images and returns a UI object for the node output.""" + return SavedImages( + ImageSaveHelper.save_images( + images, + filename_prefix=filename_prefix, + folder_type=FolderType.output, + cls=cls, + compress_level=compress_level, + ) + ) + @staticmethod def save_animated_png( - images, - filename_prefix: str, - folder_type: FolderType, - cls: Type[ComfyNodeV3] | None, - fps: float, - compress_level: int + images, filename_prefix: str, folder_type: FolderType, cls: Type[ComfyNodeV3] | None, fps: float, compress_level: int ) -> SavedResult: """Saves a batch of images as a single animated PNG.""" full_output_folder, filename, counter, subfolder, _ = folder_paths.get_save_image_path( @@ -152,6 +174,21 @@ class ImageSaveHelper: ) return SavedResult(file, subfolder, folder_type) + @staticmethod + def get_save_animated_png_ui( + images, filename_prefix: str, cls: Type[ComfyNodeV3] | None, fps: float, compress_level: int + ) -> SavedImages: + """Saves an animated PNG and returns a UI object for the node output.""" + result = ImageSaveHelper.save_animated_png( + images, + filename_prefix=filename_prefix, + folder_type=FolderType.output, + cls=cls, + fps=fps, + compress_level=compress_level, + ) + return SavedImages([result], is_animated=len(images) > 1) + @staticmethod def save_animated_webp( images, @@ -182,6 +219,29 @@ class ImageSaveHelper: ) return SavedResult(file, subfolder, folder_type) + @staticmethod + def get_save_animated_webp_ui( + images, + filename_prefix: str, + cls: Type[ComfyNodeV3] | None, + fps: float, + lossless: bool, + quality: int, + method: int, + ) -> SavedImages: + """Saves an animated WebP and returns a UI object for the node output.""" + result = ImageSaveHelper.save_animated_webp( + images, + filename_prefix=filename_prefix, + folder_type=FolderType.output, + cls=cls, + fps=fps, + lossless=lossless, + quality=quality, + method=method, + ) + return SavedImages([result], is_animated=len(images) > 1) + class PreviewImage(_UIOutput): def __init__(self, image: Image.Type, animated: bool=False, cls: ComfyNodeV3=None, **kwargs): diff --git a/comfy_extras/v3/nodes_images.py b/comfy_extras/v3/nodes_images.py index 16e77de4a..a1e26032b 100644 --- a/comfy_extras/v3/nodes_images.py +++ b/comfy_extras/v3/nodes_images.py @@ -630,15 +630,15 @@ class SaveAnimatedPNG(io.ComfyNodeV3): @classmethod def execute(cls, images, fps, compress_level, filename_prefix="ComfyUI") -> io.NodeOutput: - result = ui.ImageSaveHelper.save_animated_png( - images=images, - filename_prefix=filename_prefix, - folder_type=io.FolderType.output, - cls=cls, - fps=fps, - compress_level=compress_level, + return io.NodeOutput( + ui=ui.ImageSaveHelper.get_save_animated_png_ui( + images=images, + filename_prefix=filename_prefix, + cls=cls, + fps=fps, + compress_level=compress_level, + ) ) - return io.NodeOutput(ui={"images": [result], "animated": (len(images) != 1,)}) class SaveAnimatedWEBP(io.ComfyNodeV3): @@ -664,17 +664,17 @@ class SaveAnimatedWEBP(io.ComfyNodeV3): @classmethod def execute(cls, images, fps, filename_prefix, lossless, quality, method) -> io.NodeOutput: - result = ui.ImageSaveHelper.save_animated_webp( - images=images, - filename_prefix=filename_prefix, - folder_type=io.FolderType.output, - cls=cls, - fps=fps, - lossless=lossless, - quality=quality, - method=cls.COMPRESS_METHODS.get(method) + return io.NodeOutput( + ui=ui.ImageSaveHelper.get_save_animated_webp_ui( + images=images, + filename_prefix=filename_prefix, + cls=cls, + fps=fps, + lossless=lossless, + quality=quality, + method=cls.COMPRESS_METHODS.get(method) + ) ) - return io.NodeOutput(ui={"images": [result], "animated": (len(images) != 1,)}) class SaveImage(io.ComfyNodeV3): @@ -703,14 +703,9 @@ class SaveImage(io.ComfyNodeV3): @classmethod def execute(cls, images, filename_prefix="ComfyUI") -> io.NodeOutput: - results = ui.ImageSaveHelper.save_images( - images, - filename_prefix=filename_prefix, - folder_type=io.FolderType.output, - cls=cls, - compress_level=4, + return io.NodeOutput( + ui=ui.ImageSaveHelper.get_save_images_ui(images, filename_prefix=filename_prefix, cls=cls, compress_level=4) ) - return io.NodeOutput(ui={"images": results}) NODES_LIST: list[type[io.ComfyNodeV3]] = [ From bed60d6ed9c28687bfdc1e89d2bb84a22fa905f0 Mon Sep 17 00:00:00 2001 From: bigcat88 Date: Wed, 23 Jul 2025 10:13:47 +0300 Subject: [PATCH 2/2] refactored Preview/Save of audios --- comfy_api/v3/ui.py | 247 +++++++++++++++++++-------------- comfy_extras/v3/nodes_audio.py | 126 ++--------------- 2 files changed, 161 insertions(+), 212 deletions(-) diff --git a/comfy_api/v3/ui.py b/comfy_api/v3/ui.py index caaf894f6..9dc6f43c0 100644 --- a/comfy_api/v3/ui.py +++ b/comfy_api/v3/ui.py @@ -51,6 +51,16 @@ class SavedImages(_UIOutput): return data +class SavedAudios(_UIOutput): + """UI wrapper around one or more audio files on disk (FLAC / MP3 / Opus).""" + def __init__(self, results: list[SavedResult]): + super().__init__() + self.results = results + + def as_dict(self) -> dict: + return {"audio": self.results} + + def _get_directory_by_folder_type(folder_type: FolderType) -> str: if folder_type == FolderType.input: return folder_paths.get_input_directory() @@ -243,8 +253,134 @@ class ImageSaveHelper: return SavedImages([result], is_animated=len(images) > 1) +class AudioSaveHelper: + """A helper class with static methods to handle audio saving and metadata.""" + _OPUS_RATES = [8000, 12000, 16000, 24000, 48000] + + @staticmethod + def save_audio( + audio: dict, + filename_prefix: str, + folder_type: FolderType, + cls: Type[ComfyNodeV3] | None, + format: str = "flac", + quality: str = "128k", + ) -> list[SavedResult]: + full_output_folder, filename, counter, subfolder, _ = folder_paths.get_save_image_path( + filename_prefix, _get_directory_by_folder_type(folder_type) + ) + + metadata = {} + if not args.disable_metadata and cls is not None: + if cls.hidden.prompt is not None: + metadata["prompt"] = json.dumps(cls.hidden.prompt) + if cls.hidden.extra_pnginfo is not None: + for x in cls.hidden.extra_pnginfo: + metadata[x] = json.dumps(cls.hidden.extra_pnginfo[x]) + + results = [] + for batch_number, waveform in enumerate(audio["waveform"].cpu()): + filename_with_batch_num = filename.replace("%batch_num%", str(batch_number)) + file = f"{filename_with_batch_num}_{counter:05}_.{format}" + output_path = os.path.join(full_output_folder, file) + + # Use original sample rate initially + sample_rate = audio["sample_rate"] + + # Handle Opus sample rate requirements + if format == "opus": + if sample_rate > 48000: + sample_rate = 48000 + elif sample_rate not in AudioSaveHelper._OPUS_RATES: + # Find the next highest supported rate + for rate in sorted(AudioSaveHelper._OPUS_RATES): + if rate > sample_rate: + sample_rate = rate + break + if sample_rate not in AudioSaveHelper._OPUS_RATES: # Fallback if still not supported + sample_rate = 48000 + + # Resample if necessary + if sample_rate != audio["sample_rate"]: + waveform = torchaudio.functional.resample(waveform, audio["sample_rate"], sample_rate) + + # Create output with specified format + output_buffer = BytesIO() + output_container = av.open(output_buffer, mode="w", format=format) + + # Set metadata on the container + for key, value in metadata.items(): + output_container.metadata[key] = value + + # Set up the output stream with appropriate properties + if format == "opus": + out_stream = output_container.add_stream("libopus", rate=sample_rate) + if quality == "64k": + out_stream.bit_rate = 64000 + elif quality == "96k": + out_stream.bit_rate = 96000 + elif quality == "128k": + out_stream.bit_rate = 128000 + elif quality == "192k": + out_stream.bit_rate = 192000 + elif quality == "320k": + out_stream.bit_rate = 320000 + elif format == "mp3": + out_stream = output_container.add_stream("libmp3lame", rate=sample_rate) + if quality == "V0": + # TODO i would really love to support V3 and V5 but there doesn't seem to be a way to set the qscale level, the property below is a bool + out_stream.codec_context.qscale = 1 + elif quality == "128k": + out_stream.bit_rate = 128000 + elif quality == "320k": + out_stream.bit_rate = 320000 + else: # format == "flac": + out_stream = output_container.add_stream("flac", rate=sample_rate) + + frame = av.AudioFrame.from_ndarray( + waveform.movedim(0, 1).reshape(1, -1).float().numpy(), + format="flt", + layout="mono" if waveform.shape[0] == 1 else "stereo", + ) + frame.sample_rate = sample_rate + frame.pts = 0 + output_container.mux(out_stream.encode(frame)) + + # Flush encoder + output_container.mux(out_stream.encode(None)) + + # Close containers + output_container.close() + + # Write the output to file + output_buffer.seek(0) + with open(output_path, "wb") as f: + f.write(output_buffer.getbuffer()) + + results.append(SavedResult(file, subfolder, folder_type)) + counter += 1 + + return results + + @staticmethod + def get_save_audio_ui( + audio, filename_prefix: str, cls: Type[ComfyNodeV3] | None, format: str = "flac", quality: str = "128k", + ) -> SavedAudios: + """Save and instantly wrap for UI.""" + return SavedAudios( + AudioSaveHelper.save_audio( + audio, + filename_prefix=filename_prefix, + folder_type=FolderType.output, + cls=cls, + format=format, + quality=quality, + ) + ) + + class PreviewImage(_UIOutput): - def __init__(self, image: Image.Type, animated: bool=False, cls: ComfyNodeV3=None, **kwargs): + def __init__(self, image: Image.Type, animated: bool = False, cls: Type[ComfyNodeV3] = None, **kwargs): self.values = ImageSaveHelper.save_images( image, filename_prefix="ComfyUI_temp_" + ''.join(random.choice("abcdefghijklmnopqrstupvxyz") for _ in range(5)), @@ -316,108 +452,17 @@ class PreviewMask(PreviewImage): class PreviewAudio(_UIOutput): - def __init__(self, audio, cls: ComfyNodeV3=None, **kwargs): - quality = "128k" - format = "flac" - - filename_prefix = "ComfyUI_temp_" + ''.join(random.choice("abcdefghijklmnopqrstupvxyz") for x in range(5)) - full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path( - filename_prefix, folder_paths.get_temp_directory() + def __init__(self, audio: dict, cls: Type[ComfyNodeV3] = None, **kwargs): + self.values = AudioSaveHelper.save_audio( + audio, + filename_prefix="ComfyUI_temp_" + "".join(random.choice("abcdefghijklmnopqrstuvwxyz") for _ in range(5)), + folder_type=FolderType.temp, + cls=cls, + format="flac", + quality="128k", ) - # Prepare metadata dictionary - metadata = {} - if not args.disable_metadata and cls is not None: - if cls.hidden.prompt is not None: - metadata["prompt"] = json.dumps(cls.hidden.prompt) - if cls.hidden.extra_pnginfo is not None: - for x in cls.hidden.extra_pnginfo: - metadata[x] = json.dumps(cls.hidden.extra_pnginfo[x]) - - # Opus supported sample rates - OPUS_RATES = [8000, 12000, 16000, 24000, 48000] - results = [] - for (batch_number, waveform) in enumerate(audio["waveform"].cpu()): - filename_with_batch_num = filename.replace("%batch_num%", str(batch_number)) - file = f"{filename_with_batch_num}_{counter:05}_.{format}" - output_path = os.path.join(full_output_folder, file) - - # Use original sample rate initially - sample_rate = audio["sample_rate"] - - # Handle Opus sample rate requirements - if format == "opus": - if sample_rate > 48000: - sample_rate = 48000 - elif sample_rate not in OPUS_RATES: - # Find the next highest supported rate - for rate in sorted(OPUS_RATES): - if rate > sample_rate: - sample_rate = rate - break - if sample_rate not in OPUS_RATES: # Fallback if still not supported - sample_rate = 48000 - - # Resample if necessary - if sample_rate != audio["sample_rate"]: - waveform = torchaudio.functional.resample(waveform, audio["sample_rate"], sample_rate) - - # Create output with specified format - output_buffer = BytesIO() - output_container = av.open(output_buffer, mode='w', format=format) - - # Set metadata on the container - for key, value in metadata.items(): - output_container.metadata[key] = value - - # Set up the output stream with appropriate properties - if format == "opus": - out_stream = output_container.add_stream("libopus", rate=sample_rate) - if quality == "64k": - out_stream.bit_rate = 64000 - elif quality == "96k": - out_stream.bit_rate = 96000 - elif quality == "128k": - out_stream.bit_rate = 128000 - elif quality == "192k": - out_stream.bit_rate = 192000 - elif quality == "320k": - out_stream.bit_rate = 320000 - elif format == "mp3": - out_stream = output_container.add_stream("libmp3lame", rate=sample_rate) - if quality == "V0": - # TODO i would really love to support V3 and V5 but there doesn't seem to be a way to set the qscale level, the property below is a bool - out_stream.codec_context.qscale = 1 - elif quality == "128k": - out_stream.bit_rate = 128000 - elif quality == "320k": - out_stream.bit_rate = 320000 - else: # format == "flac": - out_stream = output_container.add_stream("flac", rate=sample_rate) - - frame = av.AudioFrame.from_ndarray(waveform.movedim(0, 1).reshape(1, -1).float().numpy(), format='flt', - layout='mono' if waveform.shape[0] == 1 else 'stereo') - frame.sample_rate = sample_rate - frame.pts = 0 - output_container.mux(out_stream.encode(frame)) - - # Flush encoder - output_container.mux(out_stream.encode(None)) - - # Close containers - output_container.close() - - # Write the output to file - output_buffer.seek(0) - with open(output_path, 'wb') as f: - f.write(output_buffer.getbuffer()) - - results.append(SavedResult(file, subfolder, FolderType.temp)) - counter += 1 - - self.values = results - - def as_dict(self): + def as_dict(self) -> dict: return {"audio": self.values} diff --git a/comfy_extras/v3/nodes_audio.py b/comfy_extras/v3/nodes_audio.py index 80c502df2..394709d1b 100644 --- a/comfy_extras/v3/nodes_audio.py +++ b/comfy_extras/v3/nodes_audio.py @@ -1,18 +1,14 @@ from __future__ import annotations import hashlib -import json import os -from io import BytesIO -import av import torch import torchaudio import comfy.model_management import folder_paths import node_helpers -from comfy.cli_args import args from comfy_api.v3 import io, ui @@ -142,8 +138,12 @@ class SaveAudioMP3(io.ComfyNodeV3): ) @classmethod - def execute(self, audio, filename_prefix="ComfyUI", format="mp3", quality="V0") -> io.NodeOutput: - return _save_audio(self, audio, filename_prefix, format, quality) + def execute(cls, audio, filename_prefix="ComfyUI", format="mp3", quality="V0") -> io.NodeOutput: + return io.NodeOutput( + ui=ui.AudioSaveHelper.get_save_audio_ui( + audio, filename_prefix=filename_prefix, cls=cls, format=format, quality=quality + ) + ) class SaveAudioOpus(io.ComfyNodeV3): @@ -163,8 +163,12 @@ class SaveAudioOpus(io.ComfyNodeV3): ) @classmethod - def execute(self, audio, filename_prefix="ComfyUI", format="opus", quality="128k") -> io.NodeOutput: - return _save_audio(self, audio, filename_prefix, format, quality) + def execute(cls, audio, filename_prefix="ComfyUI", format="opus", quality="128k") -> io.NodeOutput: + return io.NodeOutput( + ui=ui.AudioSaveHelper.get_save_audio_ui( + audio, filename_prefix=filename_prefix, cls=cls, format=format, quality=quality + ) + ) class SaveAudio(io.ComfyNodeV3): @@ -184,7 +188,9 @@ class SaveAudio(io.ComfyNodeV3): @classmethod def execute(cls, audio, filename_prefix="ComfyUI", format="flac") -> io.NodeOutput: - return _save_audio(cls, audio, filename_prefix, format) + return io.NodeOutput( + ui=ui.AudioSaveHelper.get_save_audio_ui(audio, filename_prefix=filename_prefix, cls=cls, format=format) + ) class VAEDecodeAudio(io.ComfyNodeV3): @@ -232,108 +238,6 @@ class VAEEncodeAudio(io.ComfyNodeV3): return io.NodeOutput({"samples": vae.encode(waveform.movedim(1, -1))}) -def _save_audio(cls, audio, filename_prefix="ComfyUI", format="flac", quality="128k") -> io.NodeOutput: - full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path( - filename_prefix, folder_paths.get_output_directory() - ) - - # Prepare metadata dictionary - metadata = {} - if not args.disable_metadata: - if cls.hidden.prompt is not None: - metadata["prompt"] = json.dumps(cls.hidden.prompt) - if cls.hidden.extra_pnginfo is not None: - for x in cls.hidden.extra_pnginfo: - metadata[x] = json.dumps(cls.hidden.extra_pnginfo[x]) - - # Opus supported sample rates - OPUS_RATES = [8000, 12000, 16000, 24000, 48000] - - results = [] - for batch_number, waveform in enumerate(audio["waveform"].cpu()): - filename_with_batch_num = filename.replace("%batch_num%", str(batch_number)) - file = f"{filename_with_batch_num}_{counter:05}_.{format}" - output_path = os.path.join(full_output_folder, file) - - # Use original sample rate initially - sample_rate = audio["sample_rate"] - - # Handle Opus sample rate requirements - if format == "opus": - if sample_rate > 48000: - sample_rate = 48000 - elif sample_rate not in OPUS_RATES: - # Find the next highest supported rate - for rate in sorted(OPUS_RATES): - if rate > sample_rate: - sample_rate = rate - break - if sample_rate not in OPUS_RATES: # Fallback if still not supported - sample_rate = 48000 - - # Resample if necessary - if sample_rate != audio["sample_rate"]: - waveform = torchaudio.functional.resample(waveform, audio["sample_rate"], sample_rate) - - # Create output with specified format - output_buffer = BytesIO() - output_container = av.open(output_buffer, mode="w", format=format) - - # Set metadata on the container - for key, value in metadata.items(): - output_container.metadata[key] = value - - # Set up the output stream with appropriate properties - if format == "opus": - out_stream = output_container.add_stream("libopus", rate=sample_rate) - if quality == "64k": - out_stream.bit_rate = 64000 - elif quality == "96k": - out_stream.bit_rate = 96000 - elif quality == "128k": - out_stream.bit_rate = 128000 - elif quality == "192k": - out_stream.bit_rate = 192000 - elif quality == "320k": - out_stream.bit_rate = 320000 - elif format == "mp3": - out_stream = output_container.add_stream("libmp3lame", rate=sample_rate) - if quality == "V0": - # TODO i would really love to support V3 and V5 but there doesn't seem to be a way to set the qscale level, the property below is a bool - out_stream.codec_context.qscale = 1 - elif quality == "128k": - out_stream.bit_rate = 128000 - elif quality == "320k": - out_stream.bit_rate = 320000 - else: # format == "flac": - out_stream = output_container.add_stream("flac", rate=sample_rate) - - frame = av.AudioFrame.from_ndarray( - waveform.movedim(0, 1).reshape(1, -1).float().numpy(), - format="flt", - layout="mono" if waveform.shape[0] == 1 else "stereo", - ) - frame.sample_rate = sample_rate - frame.pts = 0 - output_container.mux(out_stream.encode(frame)) - - # Flush encoder - output_container.mux(out_stream.encode(None)) - - # Close containers - output_container.close() - - # Write the output to file - output_buffer.seek(0) - with open(output_path, "wb") as f: - f.write(output_buffer.getbuffer()) - - results.append(ui.SavedResult(file, subfolder, io.FolderType.output)) - counter += 1 - - return io.NodeOutput(ui={"audio": results}) - - NODES_LIST: list[type[io.ComfyNodeV3]] = [ ConditioningStableAudio, EmptyLatentAudio,