Make it possible to load tokenizer data from checkpoints.

2025-09-12 20:48:22 +00:00 · 2024-07-24 16:43:53 -04:00
parent ce80e69fb8
commit 10c919f4c7
8 changed files with 26 additions and 31 deletions
--- a/comfy/text_encoders/spiece_tokenizer.py
+++ b/comfy/text_encoders/spiece_tokenizer.py
@@ -1,4 +1,5 @@
 import os
+import torch

 class SPieceTokenizer:
    add_eos = True
@@ -9,6 +10,9 @@ class SPieceTokenizer:

    def __init__(self, tokenizer_path):
        import sentencepiece
+        if torch.is_tensor(tokenizer_path):
+            tokenizer_path = tokenizer_path.numpy().tobytes()
+
        if isinstance(tokenizer_path, bytes):
            self.tokenizer = sentencepiece.SentencePieceProcessor(model_proto=tokenizer_path, add_eos=self.add_eos)
        else: