Make it possible to load tokenizer data from checkpoints.

This commit is contained in:
comfyanonymous
2024-07-24 16:43:53 -04:00
parent ce80e69fb8
commit 10c919f4c7
8 changed files with 26 additions and 31 deletions

View File

@@ -1,4 +1,5 @@
import os
import torch
class SPieceTokenizer:
add_eos = True
@@ -9,6 +10,9 @@ class SPieceTokenizer:
def __init__(self, tokenizer_path):
import sentencepiece
if torch.is_tensor(tokenizer_path):
tokenizer_path = tokenizer_path.numpy().tobytes()
if isinstance(tokenizer_path, bytes):
self.tokenizer = sentencepiece.SentencePieceProcessor(model_proto=tokenizer_path, add_eos=self.add_eos)
else: