We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
2 parents 80fa6de + af9ee69 commit 43ed0a7Copy full SHA for 43ed0a7
src/twinkle/sampler/vllm_sampler/vllm_engine.py
@@ -47,12 +47,12 @@ def __init__(
47
model_id: str,
48
*,
49
tensor_parallel_size: int = 1,
50
- gpu_memory_utilization: float = 0.9,
+ gpu_memory_utilization: float = 0.7,
51
max_model_len: Optional[int] = None,
52
max_num_seqs: int = 256,
53
enable_lora: bool = True,
54
- max_loras: int = 64,
55
- max_lora_rank: int = 64,
+ max_loras: int = 5,
+ max_lora_rank: int = 32,
56
enable_sleep_mode: bool = False,
57
enable_prefix_caching: bool = False,
58
enforce_eager: bool = False,
0 commit comments