From 3fd87cbd21d4834d0cdd8195edc0743dde9e6362 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Wed, 8 Feb 2023 17:09:47 -0500
Subject: [PATCH] Slightly smarter batching behaviour.

Try to keep batch sizes more consistent which seems to improve things on
AMD GPUs.
---
 comfy/samplers.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/comfy/samplers.py b/comfy/samplers.py
index 91b849c2d..d5a34efde 100644
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -86,15 +86,21 @@ class CFGDenoiserComplex(torch.nn.Module):
             while len(to_run) > 0:
                 first = to_run[0]
                 first_shape = first[0][0].shape
-                to_batch = []
+                to_batch_temp = []
                 for x in range(len(to_run)):
                     if to_run[x][0][0].shape == first_shape:
                         if to_run[x][0][2].shape == first[0][2].shape:
-                            to_batch += [x]
-                            if (len(to_batch) * first_shape[0] * first_shape[2] * first_shape[3] >= max_total_area):
-                                break
+                            to_batch_temp += [x]
+
+                to_batch_temp.reverse()
+                to_batch = to_batch_temp[:1]
+
+                for i in range(1, len(to_batch_temp) + 1):
+                    batch_amount = to_batch_temp[:len(to_batch_temp)//i]
+                    if (len(batch_amount) * first_shape[0] * first_shape[2] * first_shape[3] < max_total_area):
+                        to_batch = batch_amount
+                        break
 
-                to_batch.reverse()
                 input_x = []
                 mult = []
                 c = []