Change the TextEncodeQwenImageEdit node to use logic closer to reference. (#9432)

2025-09-10 11:35:40 +00:00 · 2025-08-19 13:49:01 -07:00
parent 54d8fdbed0
commit bddd69618b
1 changed files with 11 additions and 26 deletions
--- a/comfy_extras/nodes_qwen.py
+++ b/comfy_extras/nodes_qwen.py
@@ -1,25 +1,6 @@
 import node_helpers
 import comfy.utils
-
-PREFERRED_QWENIMAGE_RESOLUTIONS = [
-    (672, 1568),
-    (688, 1504),
-    (720, 1456),
-    (752, 1392),
-    (800, 1328),
-    (832, 1248),
-    (880, 1184),
-    (944, 1104),
-    (1024, 1024),
-    (1104, 944),
-    (1184, 880),
-    (1248, 832),
-    (1328, 800),
-    (1392, 752),
-    (1456, 720),
-    (1504, 688),
-    (1568, 672),
-]
+import math


 class TextEncodeQwenImageEdit:
@@ -42,13 +23,17 @@ class TextEncodeQwenImageEdit:
        if image is None:
            images = []
        else:
-            images = [image]
+            samples = image.movedim(-1, 1)
+            total = int(1024 * 1024)
+
+            scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2]))
+            width = round(samples.shape[3] * scale_by)
+            height = round(samples.shape[2] * scale_by)
+
+            s = comfy.utils.common_upscale(samples, width, height, "area", "disabled")
+            image = s.movedim(1, -1)
+            images = [image[:, :, :, :3]]
            if vae is not None:
-                width = image.shape[2]
-                height = image.shape[1]
-                aspect_ratio = width / height
-                _, width, height = min((abs(aspect_ratio - w / h), w, h) for w, h in PREFERRED_QWENIMAGE_RESOLUTIONS)
-                image = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "lanczos", "center").movedim(1, -1)
                ref_latent = vae.encode(image[:, :, :, :3])

        tokens = clip.tokenize(prompt, images=images)