-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_distributed_engines.py
More file actions
68 lines (56 loc) · 2.37 KB
/
run_distributed_engines.py
File metadata and controls
68 lines (56 loc) · 2.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
Wrapper to call torch.distributed.launch to run multi-gpu trainings.
Supports two engines: train and extract_features.
Supports SLURM as an option. Set config.SLURM.USE_SLURM=true to use slurm.
"""
import sys
from typing import List, Any
from hydra.experimental import initialize_config_module, compose
from vissl.utils.distributed_launcher import (
launch_distributed,
launch_distributed_on_slurm,
)
from vissl.utils.hydra_config import is_hydra_available, convert_to_attrdict
from vissl.utils.slurm import is_submitit_available
def hydra_main(overrides: List[Any]):
######################################################################################
# DO NOT MOVE THIS IMPORT TO TOP LEVEL: submitit processes will not be initialized
# correctly (MKL_THREADING_LAYER will be set to INTEL instead of GNU)
######################################################################################
from vissl.hooks import default_hook_generator
######################################################################################
print(f"####### overrides: {overrides}")
with initialize_config_module(config_module="vissl.config"):
cfg = compose("defaults", overrides=overrides)
exp_name = None
for i in overrides:
if 'config=' in i:
exp_name = i.split('/')[-1]
break
if exp_name is None:
raise ValueError('Experiment name is not defined')
args, config = convert_to_attrdict(cfg)
config['EXP_NAME'] = exp_name
config['CHECKPOINT']['DIR'] = config['CHECKPOINT']['DIR'] + exp_name
if config.SLURM.USE_SLURM:
assert (
is_submitit_available()
), "Please 'pip install submitit' to schedule jobs on SLURM"
launch_distributed_on_slurm(engine_name=args.engine_name, cfg=config)
else:
launch_distributed(
cfg=config,
node_id=args.node_id,
engine_name=args.engine_name,
hook_generator=default_hook_generator,
)
if __name__ == "__main__":
"""
Example usage:
`python tools/run_distributed_engines.py config=test/integration_test/quick_simclr`
"""
overrides = sys.argv[1:]
assert is_hydra_available(), "Make sure to install hydra"
overrides.append("hydra.verbose=true")
hydra_main(overrides=overrides)