We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 0095fc0 commit efc99caCopy full SHA for efc99ca
src/twinkle/sampler/vllm_sampler/vllm_sampler.py
@@ -116,11 +116,12 @@ def __init__(
116
# Auto-detect tensor_parallel_size from CUDA_VISIBLE_DEVICES
117
if 'tensor_parallel_size' not in engine_kwargs:
118
tp_size = 1
119
- visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES', '')
+ visible_devices = os.environ.get(Platform.visible_device_env(), '')
120
if visible_devices:
121
num_gpus = len([d for d in visible_devices.split(',') if d.strip()])
122
if num_gpus > 0:
123
tp_size = num_gpus
124
+ logger.info(f'vLLM TP size: {tp_size}')
125
engine_kwargs['tensor_parallel_size'] = tp_size
126
127
# Set unique seed per engine based on rank for diverse sampling across DP workers
0 commit comments