HI @dstamoulis Thanks for your code!
I have modified TPU setting into GPU, like tf.estimator.Estimator, tf.estimator.RunConfig, and single GPU works.
However, when I apply "MirroredStrategy" into tf.estimator.RunConfig for multi-gpus, it can not work.
The Error is:
I0514 20:11:40.999713 139768726693632 tf_logging.py:115] Error reported to Coordinator:
Traceback (most recent call last):
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/python/training/coordinator.py", line 297, in stop_on_exception
yield
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/contrib/distribute/python/mirrored_strategy.py", line 783, in run
self.main_result = self.main_fn(*self.main_args, **self.main_kwargs)
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 1168, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "/data/project/tensorflow/FACE/SinglePath_NAS/single-path-nas-master_multi_gpus/nas-search/search_main.py", line 361, in nas_model_fn
train_op = ema.apply(ema_vars)
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/python/training/moving_averages.py", line 431, in apply
self._averages[var], var, decay, zero_debias=zero_debias))
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/python/training/moving_averages.py", line 84, in assign_moving_average
with ops.colocate_with(variable):
File "/usr/local/miniconda3/lib/python3.6/contextlib.py", line 81, in enter
return next(self.gen)
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 4092, in _colocate_with_for_gradient
with self.colocate_with(op, ignore_existing):
File "/usr/local/miniconda3/lib/python3.6/contextlib.py", line 81, in enter
return next(self.gen)
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 4144, in colocate_with
op = internal_convert_to_tensor_or_indexed_slices(op, as_ref=True).op
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1305, in internal_convert_to_tensor_or_indexed_slices
value, dtype=dtype, name=name, as_ref=as_ref)
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1144, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/contrib/distribute/python/values.py", line 447, in _tensor_conversion_mirrored
assert not as_ref
AssertionError
Any help would be appreciated, thank you!
HI @dstamoulis Thanks for your code!
I have modified TPU setting into GPU, like tf.estimator.Estimator, tf.estimator.RunConfig, and single GPU works.
However, when I apply "MirroredStrategy" into tf.estimator.RunConfig for multi-gpus, it can not work.
The Error is:
I0514 20:11:40.999713 139768726693632 tf_logging.py:115] Error reported to Coordinator:
Traceback (most recent call last):
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/python/training/coordinator.py", line 297, in stop_on_exception
yield
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/contrib/distribute/python/mirrored_strategy.py", line 783, in run
self.main_result = self.main_fn(*self.main_args, **self.main_kwargs)
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 1168, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "/data/project/tensorflow/FACE/SinglePath_NAS/single-path-nas-master_multi_gpus/nas-search/search_main.py", line 361, in nas_model_fn
train_op = ema.apply(ema_vars)
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/python/training/moving_averages.py", line 431, in apply
self._averages[var], var, decay, zero_debias=zero_debias))
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/python/training/moving_averages.py", line 84, in assign_moving_average
with ops.colocate_with(variable):
File "/usr/local/miniconda3/lib/python3.6/contextlib.py", line 81, in enter
return next(self.gen)
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 4092, in _colocate_with_for_gradient
with self.colocate_with(op, ignore_existing):
File "/usr/local/miniconda3/lib/python3.6/contextlib.py", line 81, in enter
return next(self.gen)
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 4144, in colocate_with
op = internal_convert_to_tensor_or_indexed_slices(op, as_ref=True).op
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1305, in internal_convert_to_tensor_or_indexed_slices
value, dtype=dtype, name=name, as_ref=as_ref)
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1144, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/usr/local/miniconda3/lib/python3.6/site-packages/tensorflow/contrib/distribute/python/values.py", line 447, in _tensor_conversion_mirrored
assert not as_ref
AssertionError
Any help would be appreciated, thank you!