From c308a8840aebf06649364e8e175862250a2d8823 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Fri, 15 Aug 2025 12:50:39 -0700
Subject: [PATCH] Add FluxKontextMultiReferenceLatentMethod node. (#9356)

This node is only useful if someone trains the kontext model to properly
use multiple reference images via the index method.

The default is the offset method which feeds the multiple images like if
they were stitched together as one. This method works with the current
flux kontext model.
---
 comfy/ldm/flux/model.py    | 24 ++++++++++++++++--------
 comfy/model_base.py        |  4 ++++
 comfy_extras/nodes_flux.py | 19 +++++++++++++++++++
 3 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/comfy/ldm/flux/model.py b/comfy/ldm/flux/model.py
index 8f4d99f54..c4de82795 100644
--- a/comfy/ldm/flux/model.py
+++ b/comfy/ldm/flux/model.py
@@ -224,19 +224,27 @@ class Flux(nn.Module):
         if ref_latents is not None:
             h = 0
             w = 0
+            index = 0
+            index_ref_method = kwargs.get("ref_latents_method", "offset") == "index"
             for ref in ref_latents:
-                h_offset = 0
-                w_offset = 0
-                if ref.shape[-2] + h > ref.shape[-1] + w:
-                    w_offset = w
+                if index_ref_method:
+                    index += 1
+                    h_offset = 0
+                    w_offset = 0
                 else:
-                    h_offset = h
+                    index = 1
+                    h_offset = 0
+                    w_offset = 0
+                    if ref.shape[-2] + h > ref.shape[-1] + w:
+                        w_offset = w
+                    else:
+                        h_offset = h
+                    h = max(h, ref.shape[-2] + h_offset)
+                    w = max(w, ref.shape[-1] + w_offset)
 
-                kontext, kontext_ids = self.process_img(ref, index=1, h_offset=h_offset, w_offset=w_offset)
+                kontext, kontext_ids = self.process_img(ref, index=index, h_offset=h_offset, w_offset=w_offset)
                 img = torch.cat([img, kontext], dim=1)
                 img_ids = torch.cat([img_ids, kontext_ids], dim=1)
-                h = max(h, ref.shape[-2] + h_offset)
-                w = max(w, ref.shape[-1] + w_offset)
 
         txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype)
         out = self.forward_orig(img, img_ids, context, txt_ids, timestep, y, guidance, control, transformer_options, attn_mask=kwargs.get("attention_mask", None))
diff --git a/comfy/model_base.py b/comfy/model_base.py
index cde61df7c..bf874b875 100644
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -890,6 +890,10 @@ class Flux(BaseModel):
             for lat in ref_latents:
                 latents.append(self.process_latent_in(lat))
             out['ref_latents'] = comfy.conds.CONDList(latents)
+
+            ref_latents_method = kwargs.get("reference_latents_method", None)
+            if ref_latents_method is not None:
+                out['ref_latents_method'] = comfy.conds.CONDConstant(ref_latents_method)
         return out
 
     def extra_conds_shapes(self, **kwargs):
diff --git a/comfy_extras/nodes_flux.py b/comfy_extras/nodes_flux.py
index 8a8a17698..c8db75bb3 100644
--- a/comfy_extras/nodes_flux.py
+++ b/comfy_extras/nodes_flux.py
@@ -100,9 +100,28 @@ class FluxKontextImageScale:
         return (image, )
 
 
+class FluxKontextMultiReferenceLatentMethod:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {
+            "conditioning": ("CONDITIONING", ),
+            "reference_latents_method": (("offset", "index"), ),
+            }}
+
+    RETURN_TYPES = ("CONDITIONING",)
+    FUNCTION = "append"
+    EXPERIMENTAL = True
+
+    CATEGORY = "advanced/conditioning/flux"
+
+    def append(self, conditioning, reference_latents_method):
+        c = node_helpers.conditioning_set_values(conditioning, {"reference_latents_method": reference_latents_method})
+        return (c, )
+
 NODE_CLASS_MAPPINGS = {
     "CLIPTextEncodeFlux": CLIPTextEncodeFlux,
     "FluxGuidance": FluxGuidance,
     "FluxDisableGuidance": FluxDisableGuidance,
     "FluxKontextImageScale": FluxKontextImageScale,
+    "FluxKontextMultiReferenceLatentMethod": FluxKontextMultiReferenceLatentMethod,
 }