Skip to content

train 과정에서 에러 발생 #28

@jamani135

Description

@jamani135

File "/content/drive/MyDrive/bart/train.py", line 230, in
trainer.fit(model, dm)
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py", line 460, in fit
self._run(model)
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py", line 758, in _run
self.dispatch()
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py", line 799, in dispatch
self.accelerator.start_training(self)
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/accelerators/accelerator.py", line 96, in start_training
self.training_type_plugin.start_training(trainer)
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 144, in start_training
self._results = trainer.run_stage()
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py", line 809, in run_stage
return self.run_train()
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py", line 871, in run_train
self.train_loop.run_training_epoch()
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/training_loop.py", line 491, in run_training_epoch
for batch_idx, (batch, is_last_batch) in train_dataloader:
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/profiler/profilers.py", line 112, in profile_iterable
value = next(iterator)
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/supporters.py", line 534, in prefetch_iterator
for val in it:
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/supporters.py", line 464, in next
return self.request_next_batch(self.loader_iters)
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/supporters.py", line 478, in request_next_batch
return apply_to_collection(loader_iters, Iterator, next)
File "/usr/local/lib/python3.7/dist-packages/pytorch_lightning/utilities/apply_func.py", line 85, in apply_to_collection
return function(data, *args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 521, in next
data = self._next_data()
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1203, in _next_data
return self._process_data(data)
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1229, in _process_data
data.reraise()
File "/usr/local/lib/python3.7/dist-packages/torch/_utils.py", line 434, in reraise
raise exception
TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
data = fetcher.fetch(index)
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py", line 49, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/content/drive/MyDrive/bart/dataset.py", line 46, in getitem
label_ids = self.tok.encode(instance['summary'])
File "/usr/local/lib/python3.7/dist-packages/transformers/tokenization_utils_base.py", line 2235, in encode
**kwargs,
File "/usr/local/lib/python3.7/dist-packages/transformers/tokenization_utils_base.py", line 2574, in encode_plus
**kwargs,
File "/usr/local/lib/python3.7/dist-packages/transformers/tokenization_utils_fast.py", line 516, in _encode_plus
**kwargs,
File "/usr/local/lib/python3.7/dist-packages/transformers/tokenization_utils_fast.py", line 429, in _batch_encode_plus
is_pretokenized=is_split_into_words,
TypeError: TextEncodeInput must be Union[TextInputSequence, Tuple[InputSequence, InputSequence]]

AI Hub에서 제공하는 신문기사 뿐만 아니라 기고문 잡지기사 법원 판결문 모두 훈련에 사용하기 위해서 모두 합치고 포맷을 동일하게 맞춰 tsv 파일로 저장하여 적용했는데 이런 에러가 계속해서 발생합니다.

제 데이터의 문제인건지 아니면 코드 문제인건지 궁금합니다.

현재 코랩 프로 플러스로 진행하고 있습니다.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions