Chen-zexi · Chen-zexi · Dec 3, 2025 · Nov 30, 2025
diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: help test lint format clean install ci-test all
+.PHONY: help test test-cov lint format format-check type-check clean install ci-test ci-local ci-matrix all pre-commit
 
 # Default target
 help:

diff --git a/README.md b/README.md
@@ -132,7 +132,7 @@ pipx install --pip-args="--pre" "vllm-cli[vllm]"
 
 ```bash
 # Interactive mode - menu-driven interface
-vllm-cl
+vllm-cli
 # Serve a model
 vllm-cli serve --model openai/gpt-oss-20b
 

diff --git a/docs/profiles.md b/docs/profiles.md
@@ -5,7 +5,7 @@ Seven carefully designed profiles cover most common use cases and hardware confi
 ## General Purpose Profiles
 
 ### `standard` - Minimal configuration with smart defaults
-Uses vLLM's defaults configuration. Perfect for most models and hardware setups.
+Uses vLLM's default configuration. Perfect for most models and hardware setups.
 
 **Use Case:** Starting point for any model, general inference tasks
 **Configuration:** No additional arguments - uses vLLM defaults
@@ -182,7 +182,7 @@ Common environment variables used in profiles:
 
 | Variable | Purpose | Values |
 |----------|---------|---------|
-| `VLLM_ATTENTION_BACKEND` | Attention computation backend | `FLASH_ATTN`, `XFORMERS`, `TRITON` |
+| `VLLM_ATTENTION_BACKEND` | Attention computation backend | `FLASH_ATTN`, `XFORMERS`, `TRITON_ATTN_VLLM_V1` |
 | `VLLM_USE_TRITON_FLASH_ATTN` | Enable Triton flash attention | `0`, `1` |
 | `VLLM_ENABLE_FUSED_MOE_ACTIVATION_CHUNKING` | MoE activation chunking | `0`, `1` |
 | `VLLM_USE_FLASHINFER_MXFP4_BF16_MOE` | BF16 precision for MoE | `0`, `1` |

diff --git a/scripts/test_ci_locally.sh b/scripts/test_ci_locally.sh
@@ -51,8 +51,10 @@ fi
 run_test "Unit Tests" "pytest tests/ -v --tb=short"
 
 # 4. Check Test Coverage (optional but informative)
-if command -v pytest-cov &> /dev/null; then
+if python -c "import pytest_cov" 2>/dev/null; then
     run_test "Test Coverage" "pytest tests/ --cov=src/vllm_cli --cov-report=term-missing --cov-fail-under=50"
+else
+    echo -e "${YELLOW}⚠️  Skipping coverage (pytest-cov not installed)${NC}\n"
 fi
 
 # 5. Linting with flake8
@@ -90,7 +92,16 @@ run_test "CLI Help Test" "python -m vllm_cli --help > /dev/null"
 
 # 12. Validate pyproject.toml
 if [ -f "pyproject.toml" ]; then
-    run_test "Validate pyproject.toml" "python -c 'import toml; toml.load(\"pyproject.toml\"); print(\"pyproject.toml is valid\")'"
+    run_test "Validate pyproject.toml" "python -c '
+try:
+    import tomllib
+    with open(\"pyproject.toml\", \"rb\") as f:
+        tomllib.load(f)
+except ImportError:
+    import toml
+    toml.load(\"pyproject.toml\")
+print(\"pyproject.toml is valid\")
+'"
 fi
 
 # Summary