From 65042f7d395e92d9cc10dc66b94f63f5e40a697d Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Tue, 4 Mar 2025 09:26:05 -0500 Subject: [PATCH] Make it easier to set a custom template for hunyuan video. --- comfy/sd.py | 4 ++-- comfy/sd1_clip.py | 4 ++-- comfy/sdxl_clip.py | 2 +- comfy/text_encoders/flux.py | 2 +- comfy/text_encoders/hunyuan_video.py | 7 +++++-- comfy/text_encoders/hydit.py | 2 +- comfy/text_encoders/sd3_clip.py | 2 +- 7 files changed, 13 insertions(+), 10 deletions(-) diff --git a/comfy/sd.py b/comfy/sd.py index 21913cf3e..b866c66c4 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -134,8 +134,8 @@ class CLIP: def clip_layer(self, layer_idx): self.layer_idx = layer_idx - def tokenize(self, text, return_word_ids=False): - return self.tokenizer.tokenize_with_weights(text, return_word_ids) + def tokenize(self, text, return_word_ids=False, **kwargs): + return self.tokenizer.tokenize_with_weights(text, return_word_ids, **kwargs) def add_hooks_to_dict(self, pooled_dict: dict[str]): if self.apply_hooks_to_conds: diff --git a/comfy/sd1_clip.py b/comfy/sd1_clip.py index d2457731d..692ae0518 100644 --- a/comfy/sd1_clip.py +++ b/comfy/sd1_clip.py @@ -482,7 +482,7 @@ class SDTokenizer: return (embed, leftover) - def tokenize_with_weights(self, text:str, return_word_ids=False): + def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): ''' Takes a prompt and converts it to a list of (token, weight, word id) elements. Tokens can both be integer tokens and pre computed CLIP tensors. @@ -596,7 +596,7 @@ class SD1Tokenizer: tokenizer = tokenizer_data.get("{}_tokenizer_class".format(self.clip), tokenizer) setattr(self, self.clip, tokenizer(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data)) - def tokenize_with_weights(self, text:str, return_word_ids=False): + def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): out = {} out[self.clip_name] = getattr(self, self.clip).tokenize_with_weights(text, return_word_ids) return out diff --git a/comfy/sdxl_clip.py b/comfy/sdxl_clip.py index 4d0a4e8e7..5b7c8a412 100644 --- a/comfy/sdxl_clip.py +++ b/comfy/sdxl_clip.py @@ -26,7 +26,7 @@ class SDXLTokenizer: self.clip_l = clip_l_tokenizer_class(embedding_directory=embedding_directory) self.clip_g = SDXLClipGTokenizer(embedding_directory=embedding_directory) - def tokenize_with_weights(self, text:str, return_word_ids=False): + def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): out = {} out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids) out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids) diff --git a/comfy/text_encoders/flux.py b/comfy/text_encoders/flux.py index b945b1aaa..a12995ec0 100644 --- a/comfy/text_encoders/flux.py +++ b/comfy/text_encoders/flux.py @@ -18,7 +18,7 @@ class FluxTokenizer: self.clip_l = clip_l_tokenizer_class(embedding_directory=embedding_directory) self.t5xxl = T5XXLTokenizer(embedding_directory=embedding_directory) - def tokenize_with_weights(self, text:str, return_word_ids=False): + def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): out = {} out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids) out["t5xxl"] = self.t5xxl.tokenize_with_weights(text, return_word_ids) diff --git a/comfy/text_encoders/hunyuan_video.py b/comfy/text_encoders/hunyuan_video.py index 7149d6878..bdee0b3df 100644 --- a/comfy/text_encoders/hunyuan_video.py +++ b/comfy/text_encoders/hunyuan_video.py @@ -41,11 +41,14 @@ class HunyuanVideoTokenizer: self.llama_template = """<|start_header_id|>system<|end_header_id|>\n\nDescribe the video by detailing the following aspects: 1. The main content and theme of the video.2. The color, shape, size, texture, quantity, text, and spatial relationships of the objects.3. Actions, events, behaviors temporal relationships, physical movement changes of the objects.4. background environment, light, style and atmosphere.5. camera angles, movements, and transitions used in the video:<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n""" # 95 tokens self.llama = LLAMA3Tokenizer(embedding_directory=embedding_directory, min_length=1) - def tokenize_with_weights(self, text:str, return_word_ids=False): + def tokenize_with_weights(self, text:str, return_word_ids=False, llama_template=None, **kwargs): out = {} out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids) - llama_text = "{}{}".format(self.llama_template, text) + if llama_template is None: + llama_text = "{}{}".format(self.llama_template, text) + else: + llama_text = "{}{}".format(llama_template, text) out["llama"] = self.llama.tokenize_with_weights(llama_text, return_word_ids) return out diff --git a/comfy/text_encoders/hydit.py b/comfy/text_encoders/hydit.py index 7cb790f45..7da3e9fc5 100644 --- a/comfy/text_encoders/hydit.py +++ b/comfy/text_encoders/hydit.py @@ -37,7 +37,7 @@ class HyditTokenizer: self.hydit_clip = HyditBertTokenizer(embedding_directory=embedding_directory) self.mt5xl = MT5XLTokenizer(tokenizer_data={"spiece_model": mt5_tokenizer_data}, embedding_directory=embedding_directory) - def tokenize_with_weights(self, text:str, return_word_ids=False): + def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): out = {} out["hydit_clip"] = self.hydit_clip.tokenize_with_weights(text, return_word_ids) out["mt5xl"] = self.mt5xl.tokenize_with_weights(text, return_word_ids) diff --git a/comfy/text_encoders/sd3_clip.py b/comfy/text_encoders/sd3_clip.py index 00d7e31ad..3ad2ed93a 100644 --- a/comfy/text_encoders/sd3_clip.py +++ b/comfy/text_encoders/sd3_clip.py @@ -43,7 +43,7 @@ class SD3Tokenizer: self.clip_g = sdxl_clip.SDXLClipGTokenizer(embedding_directory=embedding_directory) self.t5xxl = T5XXLTokenizer(embedding_directory=embedding_directory) - def tokenize_with_weights(self, text:str, return_word_ids=False): + def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): out = {} out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids) out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids)