Skip to content

Issue in Inference #49

@sushgandhi

Description

@sushgandhi

Just experimenting with this.
Tried to install tner and load model.

getting error for tokenizer file not found. same error when trying to use transformers lib here https://huggingface.co/tner/deberta-v3-large-fin

`---------------------------------------------------------------------------
Exception Traceback (most recent call last)
Cell In[16], line 1
----> 1 model = TransformersNER("tner/deberta-v3-large-fin")

File ~file_path/lib/python3.8/site-packages/tner/ner_model.py:103, in TransformersNER.init(self, model, max_length, crf, use_auth_token, label2id, non_entity_symbol)
101 # load pre processor
102 if self.crf_layer is not None:
--> 103 self.tokenizer = NERTokenizer(
104 self.model_name,
105 id2label=self.id2label,
106 padding_id=self.label2id[self.non_entity_symbol],
107 use_auth_token=use_auth_token)
108 else:
109 self.tokenizer = NERTokenizer(self.model_name, id2label=self.id2label, use_auth_token=use_auth_token)

File ~file_path/lib/python3.8/site-packages/tner/ner_tokenizer.py:40, in NERTokenizer.init(self, tokenizer_name, id2label, padding_id, use_auth_token, is_xlnet)
37 self.tokenizer = AutoTokenizer.from_pretrained(
38 tokenizer_name, use_auth_token=use_auth_token)
39 except Exception:
---> 40 self.tokenizer = AutoTokenizer.from_pretrained(
41 tokenizer_name, use_auth_token=use_auth_token, local_files_only=True)
42 if self.tokenizer.pad_token is None:
43 self.tokenizer.pad_token = PAD_TOKEN_LABEL_ID

File ~file_path/lib/python3.8/site-packages/transformers/models/auto/tokenization_auto.py:658, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
654 if tokenizer_class is None:
655 raise ValueError(
656 f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
657 )
--> 658 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
660 # Otherwise we have to be creative.
661 # if model is an encoder decoder, the encoder tokenizer class is used by default
662 if isinstance(config, EncoderDecoderConfig):

File ~file_path/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1804, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, *init_inputs, **kwargs)
1801 else:
1802 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 1804 return cls._from_pretrained(
1805 resolved_vocab_files,
1806 pretrained_model_name_or_path,
1807 init_configuration,
1808 *init_inputs,
1809 use_auth_token=use_auth_token,
1810 cache_dir=cache_dir,
1811 local_files_only=local_files_only,
1812 _commit_hash=commit_hash,
1813 **kwargs,
1814 )

File ~file_path/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1959, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, use_auth_token, cache_dir, local_files_only, _commit_hash, *init_inputs, **kwargs)
1957 # Instantiate tokenizer.
1958 try:
-> 1959 tokenizer = cls(*init_inputs, **init_kwargs)
1960 except OSError:
1961 raise OSError(
1962 "Unable to load vocabulary from file. "
1963 "Please check that the provided vocabulary is accessible and not corrupted."
1964 )

File ~file_path/lib/python3.8/site-packages/transformers/models/deberta_v2/tokenization_deberta_v2_fast.py:133, in DebertaV2TokenizerFast.init(self, vocab_file, tokenizer_file, do_lower_case, split_by_punct, bos_token, eos_token, unk_token, sep_token, pad_token, cls_token, mask_token, **kwargs)
118 def init(
119 self,
120 vocab_file=None,
(...)
131 **kwargs
132 ) -> None:
--> 133 super().init(
134 vocab_file,
135 tokenizer_file=tokenizer_file,
136 do_lower_case=do_lower_case,
137 bos_token=bos_token,
138 eos_token=eos_token,
139 unk_token=unk_token,
140 sep_token=sep_token,
141 pad_token=pad_token,
142 cls_token=cls_token,
143 mask_token=mask_token,
144 split_by_punct=split_by_punct,
145 **kwargs,
146 )
148 self.do_lower_case = do_lower_case
149 self.split_by_punct = split_by_punct

File ~file_path/lib/python3.8/site-packages/transformers/tokenization_utils_fast.py:111, in PreTrainedTokenizerFast.init(self, *args, **kwargs)
108 fast_tokenizer = copy.deepcopy(tokenizer_object)
109 elif fast_tokenizer_file is not None and not from_slow:
110 # We have a serialization from tokenizers which let us directly build the backend
--> 111 fast_tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
112 elif slow_tokenizer is not None:
113 # We need to convert a slow tokenizer to build the backend
114 fast_tokenizer = convert_slow_tokenizer(slow_tokenizer)

Exception: No such file or directory (os error 2)`

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions