mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-09-11 03:58:22 +00:00
Support base SDXL and SDXL refiner models.
Large refactor of the model detection and loading code.
This commit is contained in:
@@ -29,31 +29,31 @@ class ClipVisionModel():
|
||||
outputs = self.model(**inputs)
|
||||
return outputs
|
||||
|
||||
def convert_to_transformers(sd):
|
||||
def convert_to_transformers(sd, prefix):
|
||||
sd_k = sd.keys()
|
||||
if "embedder.model.visual.transformer.resblocks.0.attn.in_proj_weight" in sd_k:
|
||||
if "{}transformer.resblocks.0.attn.in_proj_weight".format(prefix) in sd_k:
|
||||
keys_to_replace = {
|
||||
"embedder.model.visual.class_embedding": "vision_model.embeddings.class_embedding",
|
||||
"embedder.model.visual.conv1.weight": "vision_model.embeddings.patch_embedding.weight",
|
||||
"embedder.model.visual.positional_embedding": "vision_model.embeddings.position_embedding.weight",
|
||||
"embedder.model.visual.ln_post.bias": "vision_model.post_layernorm.bias",
|
||||
"embedder.model.visual.ln_post.weight": "vision_model.post_layernorm.weight",
|
||||
"embedder.model.visual.ln_pre.bias": "vision_model.pre_layrnorm.bias",
|
||||
"embedder.model.visual.ln_pre.weight": "vision_model.pre_layrnorm.weight",
|
||||
"{}class_embedding".format(prefix): "vision_model.embeddings.class_embedding",
|
||||
"{}conv1.weight".format(prefix): "vision_model.embeddings.patch_embedding.weight",
|
||||
"{}positional_embedding".format(prefix): "vision_model.embeddings.position_embedding.weight",
|
||||
"{}ln_post.bias".format(prefix): "vision_model.post_layernorm.bias",
|
||||
"{}ln_post.weight".format(prefix): "vision_model.post_layernorm.weight",
|
||||
"{}ln_pre.bias".format(prefix): "vision_model.pre_layrnorm.bias",
|
||||
"{}ln_pre.weight".format(prefix): "vision_model.pre_layrnorm.weight",
|
||||
}
|
||||
|
||||
for x in keys_to_replace:
|
||||
if x in sd_k:
|
||||
sd[keys_to_replace[x]] = sd.pop(x)
|
||||
|
||||
if "embedder.model.visual.proj" in sd_k:
|
||||
sd['visual_projection.weight'] = sd.pop("embedder.model.visual.proj").transpose(0, 1)
|
||||
if "{}proj".format(prefix) in sd_k:
|
||||
sd['visual_projection.weight'] = sd.pop("{}proj".format(prefix)).transpose(0, 1)
|
||||
|
||||
sd = transformers_convert(sd, "embedder.model.visual", "vision_model", 32)
|
||||
sd = transformers_convert(sd, prefix, "vision_model.", 32)
|
||||
return sd
|
||||
|
||||
def load_clipvision_from_sd(sd):
|
||||
sd = convert_to_transformers(sd)
|
||||
def load_clipvision_from_sd(sd, prefix):
|
||||
sd = convert_to_transformers(sd, prefix)
|
||||
if "vision_model.encoder.layers.30.layer_norm1.weight" in sd:
|
||||
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_h.json")
|
||||
else:
|
||||
|
Reference in New Issue
Block a user