-
Notifications
You must be signed in to change notification settings - Fork 2
Open
Description
I‘m doing Pytorch distributed data parallel training, and I used a generator to traverse the EArray data in my hdf5 dataset. As soon as the program started, I got an "RuntimeError: Unknown error type: 132 when handling execution of <_FuncPtr object at 0x7fb033f58280> with args (b'/nXmxo38xgBw=', 194, 384)". The code where multitables is involved is shown below.
class SignalTrainSetSpeedy(Dataset):
def __init__(self, augment='None'):
self.aug = augment
self.input_path = "/mnt/sdb/zzl/train_data_and_label.hdf5"
self.stream = None
self.data = None
def _yield_data(self):
for row in self.gen:
yield row
def __getitem__(self, index):
if self.stream is None:
self.stream = multitables.Streamer(filename=self.input_path)
self.gen = self.stream.get_generator(path='/data')
self.data = self._yield_data()
input_output = next(self.data)
input = input_output[:16000]
target = input_output[-61:]
input, target = torch.Tensor(input), torch.Tensor(target)
return input, target
def __len__(self):
return 2040151
The error message:
Traceback (most recent call last):
File "train.py", line 244, in <module>
next_batch = dataset_iter.next()
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 530, in __next__
Traceback (most recent call last):
File "train.py", line 244, in <module>
data = self._next_data()
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 570, in _next_data
next_batch = dataset_iter.next()
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 530, in __next__
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/mnt/sdf/zzl/FACEGOOD-Audio2Face/code/torch_train/signal_dataset.py", line 114, in __getitem__
data = self._next_data()
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 570, in _next_data
input_output = next(self.data)
File "/mnt/sdf/zzl/FACEGOOD-Audio2Face/code/torch_train/signal_dataset.py", line 103, in _yield_data
for row in self.gen:
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/multitables/streamer.py", line 247, in get_generator
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
q = self.get_queue(path=path, n_procs=n_procs, read_ahead=read_ahead, cyclic=cyclic, block_size=block_size, ordered=ordered, field=field, remainder=remainder)
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/multitables/streamer.py", line 195, in get_queue
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/mnt/sdf/zzl/FACEGOOD-Audio2Face/code/torch_train/signal_dataset.py", line 114, in __getitem__
stage_pool = stage.StagePool(dataset, block_size, read_ahead, timeout=0.1)
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/multitables/stage.py", line 104, in __init__
self._stage_pool.append(StagePool.StagePoolWrapper(dataset.create_stage(stage_size), self))input_output = next(self.data)
File "/mnt/sdf/zzl/FACEGOOD-Audio2Face/code/torch_train/signal_dataset.py", line 103, in _yield_data
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/multitables/dataset.py", line 64, in create_stage
return stage.Stage(numpy_utils._calc_nbytes(self.dtype, self._fill_shape(shape)))
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/multitables/stage.py", line 29, in __init__
for row in self.gen:
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/multitables/streamer.py", line 247, in get_generator
self._shm_buf = shared_mem.SharedBuffer(map_id=None, size_nbytes=self.size_nbytes)
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/multitables/shared_mem.py", line 124, in __init__
q = self.get_queue(path=path, n_procs=n_procs, read_ahead=read_ahead, cyclic=cyclic, block_size=block_size, ordered=ordered, field=field, remainder=remainder)
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/multitables/streamer.py", line 195, in get_queue
self._fd = _shm_open(map_id, os.O_CREAT | os.O_EXCL | os.O_RDWR, mode=0o600)
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/multitables/shared_mem.py", line 64, in _shm_open
stage_pool = stage.StagePool(dataset, block_size, read_ahead, timeout=0.1)return _posixshmlib.shm_open(name, access, mode)
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/multitables/stage.py", line 104, in __init__
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/multitables/shared_mem.py", line 46, in _handle_errno
raise RuntimeError("Unknown error type: {} when handling execution of {} with args {}".format(erno, func, args))
self._stage_pool.append(StagePool.StagePoolWrapper(dataset.create_stage(stage_size), self))
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/multitables/dataset.py", line 64, in create_stage
RuntimeError: Unknown error type: 132 when handling execution of <_FuncPtr object at 0x7f00263a2280> with args (b'/nXmxo38xgBw=', 194, 384)
return stage.Stage(numpy_utils._calc_nbytes(self.dtype, self._fill_shape(shape)))
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/multitables/stage.py", line 29, in __init__
self._shm_buf = shared_mem.SharedBuffer(map_id=None, size_nbytes=self.size_nbytes)
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/multitables/shared_mem.py", line 124, in __init__
self._fd = _shm_open(map_id, os.O_CREAT | os.O_EXCL | os.O_RDWR, mode=0o600)
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/multitables/shared_mem.py", line 64, in _shm_open
return _posixshmlib.shm_open(name, access, mode)
File "/home/zhuzengliang/anaconda3/envs/pytorch1/lib/python3.8/site-packages/multitables/shared_mem.py", line 46, in _handle_errno
raise RuntimeError("Unknown error type: {} when handling execution of {} with args {}".format(erno, func, args))
RuntimeError: Unknown error type: 132 when handling execution of <_FuncPtr object at 0x7f832a6a3280> with args (b'/nXmxo38xgBw=', 194, 384)
Metadata
Metadata
Assignees
Labels
No labels