Change the TextEncodeQwenImageEdit node to use logic closer to reference. (#9432)

This commit is contained in:
comfyanonymous
2025-08-19 13:49:01 -07:00
committed by GitHub
parent 54d8fdbed0
commit bddd69618b

View File

@@ -1,25 +1,6 @@
import node_helpers
import comfy.utils
PREFERRED_QWENIMAGE_RESOLUTIONS = [
(672, 1568),
(688, 1504),
(720, 1456),
(752, 1392),
(800, 1328),
(832, 1248),
(880, 1184),
(944, 1104),
(1024, 1024),
(1104, 944),
(1184, 880),
(1248, 832),
(1328, 800),
(1392, 752),
(1456, 720),
(1504, 688),
(1568, 672),
]
import math
class TextEncodeQwenImageEdit:
@@ -42,13 +23,17 @@ class TextEncodeQwenImageEdit:
if image is None:
images = []
else:
images = [image]
samples = image.movedim(-1, 1)
total = int(1024 * 1024)
scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2]))
width = round(samples.shape[3] * scale_by)
height = round(samples.shape[2] * scale_by)
s = comfy.utils.common_upscale(samples, width, height, "area", "disabled")
image = s.movedim(1, -1)
images = [image[:, :, :, :3]]
if vae is not None:
width = image.shape[2]
height = image.shape[1]
aspect_ratio = width / height
_, width, height = min((abs(aspect_ratio - w / h), w, h) for w, h in PREFERRED_QWENIMAGE_RESOLUTIONS)
image = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "lanczos", "center").movedim(1, -1)
ref_latent = vae.encode(image[:, :, :, :3])
tokens = clip.tokenize(prompt, images=images)