Support saving stable audio checkpoint that can be loaded back.

2025-09-11 12:06:23 +00:00 · 2024-06-27 11:06:52 -04:00
parent 5ff3d4eb3a
commit 8ceb5a02a3
3 changed files with 14 additions and 2 deletions
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -627,3 +627,12 @@ class StableAudio1(BaseModel):
            cross_attn = torch.cat([cross_attn.to(device), seconds_start_embed.repeat((cross_attn.shape[0], 1, 1)), seconds_total_embed.repeat((cross_attn.shape[0], 1, 1))], dim=1)
            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
        return out
+
+    def state_dict_for_saving(self, clip_state_dict=None, vae_state_dict=None, clip_vision_state_dict=None):
+        sd = super().state_dict_for_saving(clip_state_dict=clip_state_dict, vae_state_dict=vae_state_dict, clip_vision_state_dict=clip_vision_state_dict)
+        d = {"conditioner.conditioners.seconds_start.": self.seconds_start_embedder.state_dict(), "conditioner.conditioners.seconds_total.": self.seconds_total_embedder.state_dict()}
+        for k in d:
+            s = d[k]
+            for l in s:
+                sd["{}{}".format(k, l)] = s[l]
+        return sd