From 5a8f502db5889873ffa13132b603b7b6daac605a Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Tue, 19 Aug 2025 22:08:11 -0700 Subject: [PATCH] Disable prompt weights for qwen. (#9438) --- comfy/sd1_clip.py | 5 ++++- comfy/text_encoders/qwen_image.py | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/comfy/sd1_clip.py b/comfy/sd1_clip.py index 1e8adbe69..f8a7c2a1b 100644 --- a/comfy/sd1_clip.py +++ b/comfy/sd1_clip.py @@ -534,7 +534,10 @@ class SDTokenizer: min_padding = tokenizer_options.get("{}_min_padding".format(self.embedding_key), self.min_padding) text = escape_important(text) - parsed_weights = token_weights(text, 1.0) + if kwargs.get("disable_weights", False): + parsed_weights = [(text, 1.0)] + else: + parsed_weights = token_weights(text, 1.0) # tokenize words tokens = [] diff --git a/comfy/text_encoders/qwen_image.py b/comfy/text_encoders/qwen_image.py index f07318d6c..6646b1003 100644 --- a/comfy/text_encoders/qwen_image.py +++ b/comfy/text_encoders/qwen_image.py @@ -15,7 +15,7 @@ class QwenImageTokenizer(sd1_clip.SD1Tokenizer): def __init__(self, embedding_directory=None, tokenizer_data={}): super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name="qwen25_7b", tokenizer=Qwen25_7BVLITokenizer) self.llama_template = "<|im_start|>system\nDescribe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n" - self.llama_template_images = "<|im_start|>system\nDescribe the key features of the input image \\(color, shape, size, texture, objects, background\\), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>\n<|im_start|>assistant\n" + self.llama_template_images = "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>\n<|im_start|>assistant\n" def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, images=[], **kwargs): if llama_template is None: @@ -25,7 +25,7 @@ class QwenImageTokenizer(sd1_clip.SD1Tokenizer): llama_text = self.llama_template.format(text) else: llama_text = llama_template.format(text) - tokens = super().tokenize_with_weights(llama_text, return_word_ids=return_word_ids, **kwargs) + tokens = super().tokenize_with_weights(llama_text, return_word_ids=return_word_ids, disable_weights=True, **kwargs) key_name = next(iter(tokens)) embed_count = 0 qwen_tokens = tokens[key_name]