diff --git a/comfy_extras/nodes_qwen.py b/comfy_extras/nodes_qwen.py index b5088fae2..fff89556f 100644 --- a/comfy_extras/nodes_qwen.py +++ b/comfy_extras/nodes_qwen.py @@ -1,25 +1,6 @@ import node_helpers import comfy.utils - -PREFERRED_QWENIMAGE_RESOLUTIONS = [ - (672, 1568), - (688, 1504), - (720, 1456), - (752, 1392), - (800, 1328), - (832, 1248), - (880, 1184), - (944, 1104), - (1024, 1024), - (1104, 944), - (1184, 880), - (1248, 832), - (1328, 800), - (1392, 752), - (1456, 720), - (1504, 688), - (1568, 672), -] +import math class TextEncodeQwenImageEdit: @@ -42,13 +23,17 @@ class TextEncodeQwenImageEdit: if image is None: images = [] else: - images = [image] + samples = image.movedim(-1, 1) + total = int(1024 * 1024) + + scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2])) + width = round(samples.shape[3] * scale_by) + height = round(samples.shape[2] * scale_by) + + s = comfy.utils.common_upscale(samples, width, height, "area", "disabled") + image = s.movedim(1, -1) + images = [image[:, :, :, :3]] if vae is not None: - width = image.shape[2] - height = image.shape[1] - aspect_ratio = width / height - _, width, height = min((abs(aspect_ratio - w / h), w, h) for w, h in PREFERRED_QWENIMAGE_RESOLUTIONS) - image = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "lanczos", "center").movedim(1, -1) ref_latent = vae.encode(image[:, :, :, :3]) tokens = clip.tokenize(prompt, images=images)