diff --git a/README.md b/README.md index 8234af02..93c7b3ec 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,8 @@ This ui will let you design and execute advanced stable diffusion pipelines usin - Saving/Loading workflows as Json files. - Nodes interface can be used to create complex workflows like one for [Hires fix](https://comfyanonymous.github.io/ComfyUI_examples/2_pass_txt2img/) or much more advanced ones. - [Area Composition](https://comfyanonymous.github.io/ComfyUI_examples/area_composition/) +- [Inpainting](https://comfyanonymous.github.io/ComfyUI_examples/inpaint/) with both regular and inpainting models. +- [ControlNet](https://comfyanonymous.github.io/ComfyUI_examples/controlnet/) - Starts up very fast. - Works fully offline: will never download anything. diff --git a/comfy/model_management.py b/comfy/model_management.py index 1301f746..8c859d3f 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -3,6 +3,7 @@ CPU = 0 NO_VRAM = 1 LOW_VRAM = 2 NORMAL_VRAM = 3 +HIGH_VRAM = 4 accelerate_enabled = False vram_state = NORMAL_VRAM @@ -27,10 +28,11 @@ if "--lowvram" in sys.argv: set_vram_to = LOW_VRAM if "--novram" in sys.argv: set_vram_to = NO_VRAM +if "--highvram" in sys.argv: + vram_state = HIGH_VRAM - -if set_vram_to != NORMAL_VRAM: +if set_vram_to == LOW_VRAM or set_vram_to == NO_VRAM: try: import accelerate accelerate_enabled = True @@ -44,7 +46,7 @@ if set_vram_to != NORMAL_VRAM: total_vram_available_mb = int(max(256, total_vram_available_mb)) -print("Set vram state to:", ["CPU", "NO VRAM", "LOW VRAM", "NORMAL VRAM"][vram_state]) +print("Set vram state to:", ["CPU", "NO VRAM", "LOW VRAM", "NORMAL VRAM", "HIGH VRAM"][vram_state]) current_loaded_model = None @@ -57,18 +59,24 @@ def unload_model(): global current_loaded_model global model_accelerated global current_gpu_controlnets + global vram_state + if current_loaded_model is not None: if model_accelerated: accelerate.hooks.remove_hook_from_submodules(current_loaded_model.model) model_accelerated = False - current_loaded_model.model.cpu() + #never unload models from GPU on high vram + if vram_state != HIGH_VRAM: + current_loaded_model.model.cpu() current_loaded_model.unpatch_model() current_loaded_model = None - if len(current_gpu_controlnets) > 0: - for n in current_gpu_controlnets: - n.cpu() - current_gpu_controlnets = [] + + if vram_state != HIGH_VRAM: + if len(current_gpu_controlnets) > 0: + for n in current_gpu_controlnets: + n.cpu() + current_gpu_controlnets = [] def load_model_gpu(model): @@ -87,7 +95,7 @@ def load_model_gpu(model): current_loaded_model = model if vram_state == CPU: pass - elif vram_state == NORMAL_VRAM: + elif vram_state == NORMAL_VRAM or vram_state == HIGH_VRAM: model_accelerated = False real_model.cuda() else: diff --git a/comfy/sd1_clip.py b/comfy/sd1_clip.py index 2b94d281..998babe8 100644 --- a/comfy/sd1_clip.py +++ b/comfy/sd1_clip.py @@ -178,7 +178,6 @@ def load_embed(embedding_name, embedding_directory): valid_file = t break if valid_file is None: - print("warning, embedding {} does not exist, ignoring".format(embed_path)) return None else: embed_path = valid_file @@ -187,7 +186,10 @@ def load_embed(embedding_name, embedding_directory): import safetensors.torch embed = safetensors.torch.load_file(embed_path, device="cpu") else: - embed = torch.load(embed_path, weights_only=True, map_location="cpu") + if 'weights_only' in torch.load.__code__.co_varnames: + embed = torch.load(embed_path, weights_only=True, map_location="cpu") + else: + embed = torch.load(embed_path, map_location="cpu") if 'string_to_param' in embed: values = embed['string_to_param'].values() else: @@ -218,18 +220,28 @@ class SD1Tokenizer: tokens = [] for t in parsed_weights: to_tokenize = unescape_important(t[0]).replace("\n", " ").split(' ') - for word in to_tokenize: + while len(to_tokenize) > 0: + word = to_tokenize.pop(0) temp_tokens = [] embedding_identifier = "embedding:" if word.startswith(embedding_identifier) and self.embedding_directory is not None: embedding_name = word[len(embedding_identifier):].strip('\n') embed = load_embed(embedding_name, self.embedding_directory) + if embed is None: + stripped = embedding_name.strip(',') + if len(stripped) < len(embedding_name): + embed = load_embed(stripped, self.embedding_directory) + if embed is not None: + to_tokenize.insert(0, embedding_name[len(stripped):]) + if embed is not None: if len(embed.shape) == 1: temp_tokens += [(embed, t[1])] else: for x in range(embed.shape[0]): temp_tokens += [(embed[x], t[1])] + else: + print("warning, embedding:{} does not exist, ignoring".format(embedding_name)) elif len(word) > 0: tt = self.tokenizer(word)["input_ids"][1:-1] for x in tt: diff --git a/main.py b/main.py index f5aec442..54c66dac 100644 --- a/main.py +++ b/main.py @@ -29,6 +29,7 @@ if __name__ == "__main__": print("\t--dont-upcast-attention\t\tDisable upcasting of attention \n\t\t\t\t\tcan boost speed but increase the chances of black images.\n") print("\t--use-split-cross-attention\tUse the split cross attention optimization instead of the sub-quadratic one.\n\t\t\t\t\tIgnored when xformers is used.") print() + print("\t--highvram\t\t\tBy default models will be unloaded to CPU memory after being used.\n\t\t\t\t\tThis option keeps them in GPU memory.\n") print("\t--normalvram\t\t\tUsed to force normal vram use if lowvram gets automatically enabled.") print("\t--lowvram\t\t\tSplit the unet in parts to use less vram.") print("\t--novram\t\t\tWhen lowvram isn't enough.") @@ -208,6 +209,7 @@ class PromptExecutor: executed = set(executed) for x in executed: self.old_prompt[x] = copy.deepcopy(prompt[x]) + torch.cuda.empty_cache() def validate_inputs(prompt, item): unique_id = item diff --git a/models/configs/v1-inpainting-inference.yaml b/models/configs/v1-inpainting-inference.yaml new file mode 100644 index 00000000..45f3f82d --- /dev/null +++ b/models/configs/v1-inpainting-inference.yaml @@ -0,0 +1,71 @@ +model: + base_learning_rate: 7.5e-05 + target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: hybrid # important + monitor: val/loss_simple_ema + scale_factor: 0.18215 + finetune_keys: null + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 # unused + in_channels: 9 # 4 data + 4 downscaled image + 1 mask + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder + diff --git a/nodes.py b/nodes.py index b35c09de..ef1201de 100644 --- a/nodes.py +++ b/nodes.py @@ -759,7 +759,7 @@ def load_custom_nodes(): module_path = os.path.join(CUSTOM_NODE_PATH, possible_module) if os.path.isfile(module_path) and os.path.splitext(module_path)[1] != ".py": continue - module_name = "custom_node_module.{}".format(possible_module) + module_name = possible_module try: if os.path.isfile(module_path): module_spec = importlib.util.spec_from_file_location(module_name, module_path) diff --git a/notebooks/comfyui_colab.ipynb b/notebooks/comfyui_colab.ipynb index cdf182b8..2e364f16 100644 --- a/notebooks/comfyui_colab.ipynb +++ b/notebooks/comfyui_colab.ipynb @@ -85,7 +85,7 @@ { "cell_type": "markdown", "source": [ - "Run ComfyUI:" + "Run ComfyUI (use the fp16 model configs for more speed):" ], "metadata": { "id": "gggggggggg" @@ -112,7 +112,7 @@ "\n", "threading.Thread(target=iframe_thread, daemon=True, args=(8188,)).start()\n", "\n", - "!python main.py" + "!python main.py --highvram" ], "metadata": { "id": "hhhhhhhhhh"