diff --git a/Dockerfile b/Dockerfile
index dcc4173..59d6bea 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -100,16 +100,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     ninja-build \
     curl \
     ca-certificates \
+    wget \
     # Core libs for Python packages
     libssl3 \
+    libssl-dev \
     libffi8 \
     libcurl4 \
     libcurl4-openssl-dev \
     libopenblas0 \
-    # GPU acceleration support
-    libvulkan1 \
-    vulkan-tools \
-    mesa-vulkan-drivers \
+    # GPU acceleration support (CUDA only)
     ocl-icd-libopencl1 \
     libnuma1 \
     pciutils \
diff --git a/README.md b/README.md
index cb1ecef..b8db9e3 100644
--- a/README.md
+++ b/README.md
@@ -7,8 +7,6 @@ It is designed for **power users running models on a single machine or small ser
 - **CPU-only** inference (OpenBLAS)
 - **NVIDIA CUDA GPUs** (via the NVIDIA Container Toolkit)
 
-There is **no built-in support for Vulkan/ROCm/Metal backends** and **no Smart Auto feature** – configuration is explicit and predictable.
-
 ### Key capabilities
 
 - **HuggingFace search (GGUF + safetensors)**: Search the Hub, inspect metadata, and plan downloads by quantization or safetensors bundle.
diff --git a/backend/gpu_detector.py b/backend/gpu_detector.py
index 9591feb..b1fa038 100644
--- a/backend/gpu_detector.py
+++ b/backend/gpu_detector.py
@@ -1,10 +1,9 @@
 """
 GPU Detection and Capability Discovery
 
-This module provides comprehensive GPU detection across multiple vendors:
+This module provides GPU detection for:
 - NVIDIA GPUs (CUDA support)
-- AMD GPUs (ROCm and Vulkan support)
-- GPU acceleration backends (CUDA, Vulkan, Metal, OpenBLAS)
+- CPU acceleration (OpenBLAS)
 """
 
 import subprocess
@@ -61,20 +60,6 @@ def _cpu_only_response() -> Dict:
 # ============================================================================
 
 
-def _check_vulkan_drivers() -> bool:
-    """Check if Vulkan drivers are installed"""
-    try:
-        result = subprocess.run(
-            ["vulkaninfo", "--summary"], capture_output=True, text=True, timeout=5
-        )
-        return result.returncode == 0
-    except (subprocess.CalledProcessError, FileNotFoundError):
-        # Check if vulkan libraries exist
-        return os.path.exists("/usr/share/vulkan") or os.path.exists(
-            "/usr/lib/x86_64-linux-gnu/libvulkan.so"
-        )
-
-
 def _check_openblas() -> bool:
     """Check if OpenBLAS is available"""
     try:
@@ -92,20 +77,6 @@ def _check_openblas() -> bool:
         ) or os.path.exists("/usr/local/lib/libopenblas.so")
 
 
-def _check_metal() -> bool:
-    """Check if Metal is available (macOS only)"""
-    try:
-        if os.uname().sysname == "Darwin":
-            return os.path.exists(
-                "/System/Library/Extensions/GeForceMTLDriver.bundle"
-            ) or os.path.exists(
-                "/Library/Apple/System/Library/CoreServices/GPUWrangler.app"
-            )
-    except:
-        pass
-    return False
-
-
 def _resolve_nvidia_smi() -> Optional[str]:
     """Resolve the nvidia-smi binary path across environments."""
     candidates = []
@@ -549,16 +520,6 @@ async def detect_build_capabilities() -> Dict[str, Dict[str, any]]:
                 "recommended": False,
                 "reason": _gpu_disable_reason or "GPU detection disabled",
             },
-            "vulkan": {
-                "available": False,
-                "recommended": False,
-                "reason": "CPU-only mode",
-            },
-            "metal": {
-                "available": False,
-                "recommended": False,
-                "reason": "CPU-only mode",
-            },
             "openblas": {
                 "available": openblas_available,
                 "recommended": openblas_available,
@@ -578,63 +539,21 @@ async def detect_build_capabilities() -> Dict[str, Dict[str, any]]:
     if gpu_info.get("device_count", 0) > 0:
         cuda_available = vendor == "nvidia"
 
-    # Check other backends
-    metal_available = _check_metal()
     openblas_available = _check_openblas()
 
-    # Vulkan is only available if:
-    # 1. An AMD GPU is detected AND Vulkan drivers are installed, OR
-    # 2. A GPU device directory exists (indicating GPU passthrough in a container)
-    vulkan_available = False
-    if vendor == "amd":
-        # For AMD GPUs, check if Vulkan drivers are available
-        vulkan_available = _check_vulkan_drivers()
-    elif vendor is None:
-        # No specific GPU detected, but check if we have GPU access in a container
-        if os.path.exists("/dev/dri"):
-            vulkan_available = _check_vulkan_drivers()
-
-    # Build capabilities response
     capabilities = {
         "cuda": {
             "available": cuda_available,
-            "recommended": cuda_available
-            and not vulkan_available
-            and not openblas_available,
+            "recommended": cuda_available and not openblas_available,
             "reason": (
                 f"{gpu_info.get('device_count', 0)} NVIDIA GPU(s) detected"
                 if cuda_available
                 else "No NVIDIA GPU detected"
             ),
         },
-        "vulkan": {
-            "available": vulkan_available,
-            "recommended": (vulkan_available and not cuda_available)
-            or (gpu_info.get("vendor") == "amd"),
-            "reason": (
-                "Vulkan drivers available"
-                if vulkan_available
-                else (
-                    "Available for AMD GPUs in containers"
-                    if gpu_info.get("vendor") == "amd"
-                    else "Vulkan drivers not detected"
-                )
-            ),
-        },
-        "metal": {
-            "available": metal_available,
-            "recommended": metal_available
-            and not cuda_available
-            and not vulkan_available,
-            "reason": (
-                "Metal available (macOS)" if metal_available else "Not running on macOS"
-            ),
-        },
         "openblas": {
             "available": openblas_available,
-            "recommended": openblas_available
-            and not cuda_available
-            and not vulkan_available,
+            "recommended": openblas_available and not cuda_available,
             "reason": (
                 "OpenBLAS library available"
                 if openblas_available
@@ -643,22 +562,11 @@ async def detect_build_capabilities() -> Dict[str, Dict[str, any]]:
         },
     }
 
-    # Special handling for AMD GPUs
     if gpu_info.get("vendor") == "amd":
-        capabilities["cuda"]["reason"] = "AMD GPU detected - use Vulkan instead"
-        capabilities["cuda"]["available"] = False  # Explicitly disable CUDA for AMD
-        capabilities["vulkan"]["recommended"] = True
-        capabilities["vulkan"][
-            "reason"
-        ] = f"AMD GPU detected ({gpu_info.get('device_count', 0)} device(s)) - Vulkan recommended"
-
-    # If no GPU available, recommend OpenBLAS for CPU acceleration
-    if (
-        not cuda_available
-        and not vulkan_available
-        and not metal_available
-        and openblas_available
-    ):
+        capabilities["cuda"]["reason"] = "AMD GPU detected - CUDA not supported"
+        capabilities["cuda"]["available"] = False
+
+    if not cuda_available and openblas_available:
         capabilities["openblas"]["recommended"] = True
 
     return capabilities
@@ -669,11 +577,6 @@ async def detect_build_capabilities() -> Dict[str, Dict[str, any]]:
 # ============================================================================
 
 
-async def check_vulkan() -> bool:
-    """Legacy function for Vulkan check (for backward compatibility)"""
-    return _check_vulkan_drivers()
-
-
 async def detect_gpu_capabilities() -> Dict[str, bool]:
     """Legacy function for GPU capabilities (for backward compatibility)"""
     try:
diff --git a/backend/llama_manager.py b/backend/llama_manager.py
index 55f5d2e..4659f8e 100644
--- a/backend/llama_manager.py
+++ b/backend/llama_manager.py
@@ -23,8 +23,6 @@ class BuildConfig:
 
     # GPU backends
     enable_cuda: bool = False
-    enable_vulkan: bool = False
-    enable_metal: bool = False
     enable_openblas: bool = False
     enable_flash_attention: bool = False  # Enables -DGGML_CUDA_FA_ALL_QUANTS=ON
 
@@ -84,6 +82,12 @@ class LlamaManager:
         "ik_llama.cpp": IK_LLAMA_CPP_REPO,
     }
 
+    # Build options: llama.cpp vs ik_llama.cpp
+    # - Both use the same GGML_* / LLAMA_* CMake options (GGML_CUDA, GGML_NATIVE, LLAMA_BUILD_*, etc.).
+    # - ik_llama.cpp is a fork with IQK quantization and optimizations; IQK is built-in (no extra CMake flag).
+    # - ik_llama.cpp puts the server binary under examples/, so LLAMA_BUILD_EXAMPLES must be ON for
+    #   the server to be built. We enforce build_examples=True when repository_source == "ik_llama.cpp".
+
     def __init__(self):
         # Use absolute path so clone/build work regardless of process cwd (e.g. --app-dir backend)
         if os.path.exists("/app/data"):
@@ -492,8 +496,6 @@ def _extract_asset_features(self, asset_name: str) -> List[str]:
 
         feature_map = {
             "cuda": "CUDA",
-            "vulkan": "Vulkan",
-            "metal": "Metal",
             "opencl": "OpenCL",
             "hip": "HIP/ROCm",
             "rocm": "HIP/ROCm",
@@ -1436,8 +1438,6 @@ def set_flag(flag: str, value: bool):
                     logger.info(
                         f"CUDA configuration: compiler={nvcc_path}, toolkit={validated_cuda_root}"
                     )
-            set_flag("GGML_VULKAN", build_config.enable_vulkan)
-            set_flag("GGML_METAL", build_config.enable_metal)
             set_flag("GGML_BLAS", build_config.enable_openblas)
             if build_config.enable_openblas:
                 cmake_args.append("-DGGML_BLAS_VENDOR=OpenBLAS")
@@ -1461,6 +1461,8 @@ def set_flag(flag: str, value: bool):
             set_flag("LLAMA_BUILD_EXAMPLES", build_config.build_examples)
             set_flag("LLAMA_BUILD_SERVER", build_config.build_server)
             set_flag("LLAMA_TOOLS_INSTALL", build_config.install_tools)
+            # HTTPS support (required for model URLs, etc.)
+            set_flag("LLAMA_OPENSSL", True)
 
             # Advanced GGML options
             set_flag("GGML_BACKEND_DL", build_config.enable_backend_dl)
diff --git a/backend/routes/llama_versions.py b/backend/routes/llama_versions.py
index deb69ac..de6ab8f 100644
--- a/backend/routes/llama_versions.py
+++ b/backend/routes/llama_versions.py
@@ -97,14 +97,28 @@ async def list_llama_versions():
 
 
 def _default_build_settings() -> dict:
-    """Default build-settings payload for engines when nothing is saved yet."""
+    """Default build-settings payload for engines when nothing is saved yet.
+    Covers all BuildConfig fields so backend and frontend stay in sync.
+    """
     return {
+        "build_type": "Release",
         "cuda": False,
+        "openblas": False,
         "flash_attention": False,
-        "native": True,
+        "build_common": True,
+        "build_tests": True,
+        "build_tools": True,
+        "build_examples": True,
+        "build_server": True,
+        "install_tools": True,
         "backend_dl": False,
         "cpu_all_variants": False,
+        "lto": False,
+        "native": True,
+        "custom_cmake_args": "",
         "cuda_architectures": "",
+        "cflags": "",
+        "cxxflags": "",
     }
 
 
@@ -120,25 +134,56 @@ def _bool(v):
             return v.strip().lower() in ("1", "true", "yes", "on")
         return bool(v)
 
+    def _str(v, default=""):
+        return str(v).strip() if v is not None else default
+
+    build_type = _str(settings.get("build_type"), base["build_type"])
+    if build_type not in ("Debug", "Release", "RelWithDebInfo", "MinSizeRel"):
+        build_type = base["build_type"]
+
     return {
+        "build_type": build_type,
         "cuda": _bool(settings.get("cuda", base["cuda"])),
+        "openblas": _bool(settings.get("openblas", base["openblas"])),
         "flash_attention": _bool(settings.get("flash_attention", base["flash_attention"])),
-        "native": _bool(settings.get("native", base["native"])),
+        "build_common": _bool(settings.get("build_common", base["build_common"])),
+        "build_tests": _bool(settings.get("build_tests", base["build_tests"])),
+        "build_tools": _bool(settings.get("build_tools", base["build_tools"])),
+        "build_examples": _bool(settings.get("build_examples", base["build_examples"])),
+        "build_server": _bool(settings.get("build_server", base["build_server"])),
+        "install_tools": _bool(settings.get("install_tools", base["install_tools"])),
         "backend_dl": _bool(settings.get("backend_dl", base["backend_dl"])),
         "cpu_all_variants": _bool(settings.get("cpu_all_variants", base["cpu_all_variants"])),
-        "cuda_architectures": str(settings.get("cuda_architectures") or ""),
+        "lto": _bool(settings.get("lto", base["lto"])),
+        "native": _bool(settings.get("native", base["native"])),
+        "custom_cmake_args": _str(settings.get("custom_cmake_args"), base["custom_cmake_args"]),
+        "cuda_architectures": _str(settings.get("cuda_architectures"), base["cuda_architectures"]),
+        "cflags": _str(settings.get("cflags"), base["cflags"]),
+        "cxxflags": _str(settings.get("cxxflags"), base["cxxflags"]),
     }
 
 
 def _build_config_from_settings(settings: Optional[dict]) -> BuildConfig:
     normalized = _coerce_build_settings(settings)
     return BuildConfig(
+        build_type=normalized["build_type"],
         enable_cuda=normalized["cuda"],
+        enable_openblas=normalized["openblas"],
         enable_flash_attention=normalized["flash_attention"],
-        enable_native=normalized["native"],
+        build_common=normalized["build_common"],
+        build_tests=normalized["build_tests"],
+        build_tools=normalized["build_tools"],
+        build_examples=normalized["build_examples"],
+        build_server=normalized["build_server"],
+        install_tools=normalized["install_tools"],
         enable_backend_dl=normalized["backend_dl"],
         enable_cpu_all_variants=normalized["cpu_all_variants"],
+        enable_lto=normalized["lto"],
+        enable_native=normalized["native"],
+        custom_cmake_args=normalized["custom_cmake_args"],
         cuda_architectures=normalized["cuda_architectures"],
+        cflags=normalized["cflags"],
+        cxxflags=normalized["cxxflags"],
     )
 
 
@@ -187,6 +232,14 @@ def _fetch_latest_release(repository_source: str) -> Optional[dict]:
     return None
 
 
+def _apply_engine_specific_build_defaults(engine: str, settings: dict) -> dict:
+    """Apply engine-specific build defaults. ik_llama.cpp requires LLAMA_BUILD_EXAMPLES=ON (server in examples)."""
+    out = dict(settings)
+    if engine == "ik_llama":
+        out["build_examples"] = True
+    return out
+
+
 @router.get("/build-settings")
 async def get_build_settings(engine: str = "llama_cpp"):
     """Get persisted build settings for an engine ('llama_cpp' or 'ik_llama')."""
@@ -197,7 +250,7 @@ async def get_build_settings(engine: str = "llama_cpp"):
     # Always return a full shape so the frontend can rely on defaults.
     base = _default_build_settings()
     base.update({k: v for k, v in settings.items() if k in base})
-    return base
+    return _apply_engine_specific_build_defaults(engine, base)
 
 
 @router.put("/build-settings")
@@ -211,10 +264,11 @@ async def update_build_settings(engine: str = "llama_cpp", settings: dict = Body
     # Only persist known build keys; ignore extras.
     allowed = _default_build_settings().keys()
     filtered = {k: v for k, v in settings.items() if k in allowed}
+    filtered = _apply_engine_specific_build_defaults(engine, filtered)
     stored = store.update_engine_build_settings(engine, filtered)
     base = _default_build_settings()
     base.update({k: v for k, v in stored.items() if k in base})
-    return base
+    return _apply_engine_specific_build_defaults(engine, base)
 
 
 @router.post("/update")
@@ -320,7 +374,7 @@ async def get_release_assets(tag_name: str):
 
 @router.get("/build-capabilities")
 async def get_build_capabilities_endpoint():
-    """Get build capabilities based on detected hardware"""
+    """Get build capabilities (CUDA, OpenBLAS)."""
     try:
         return await detect_build_capabilities()
     except Exception as e:
@@ -332,16 +386,6 @@ async def get_build_capabilities_endpoint():
                 "recommended": False,
                 "reason": f"Error: {str(e)}",
             },
-            "vulkan": {
-                "available": False,
-                "recommended": False,
-                "reason": f"Error: {str(e)}",
-            },
-            "metal": {
-                "available": False,
-                "recommended": False,
-                "reason": f"Error: {str(e)}",
-            },
             "openblas": {
                 "available": False,
                 "recommended": False,
@@ -459,14 +503,32 @@ def _bool(v):
                     return v.strip().lower() in ("1", "true", "yes", "on")
                 return bool(v)
 
-            # Frontend sends cuda, flash_attention, native, backend_dl, cpu_all_variants
+            def _str(v, default=""):
+                return str(v).strip() if v is not None else default
+
+            bt = _str(build_config_dict.get("build_type"), "Release")
+            if bt not in ("Debug", "Release", "RelWithDebInfo", "MinSizeRel"):
+                bt = "Release"
+
             mapped = {
+                "build_type": bt,
                 "enable_cuda": _bool(build_config_dict.get("cuda", False)),
+                "enable_openblas": _bool(build_config_dict.get("openblas", False)),
                 "enable_flash_attention": _bool(build_config_dict.get("flash_attention", False)),
-                "enable_native": _bool(build_config_dict.get("native", True)),
+                "build_common": _bool(build_config_dict.get("build_common", True)),
+                "build_tests": _bool(build_config_dict.get("build_tests", True)),
+                "build_tools": _bool(build_config_dict.get("build_tools", True)),
+                "build_examples": _bool(build_config_dict.get("build_examples", True)),
+                "build_server": _bool(build_config_dict.get("build_server", True)),
+                "install_tools": _bool(build_config_dict.get("install_tools", True)),
                 "enable_backend_dl": _bool(build_config_dict.get("backend_dl", False)),
                 "enable_cpu_all_variants": _bool(build_config_dict.get("cpu_all_variants", False)),
-                "cuda_architectures": str(build_config_dict.get("cuda_architectures") or ""),
+                "enable_lto": _bool(build_config_dict.get("lto", False)),
+                "enable_native": _bool(build_config_dict.get("native", True)),
+                "custom_cmake_args": _str(build_config_dict.get("custom_cmake_args")),
+                "cuda_architectures": _str(build_config_dict.get("cuda_architectures")),
+                "cflags": _str(build_config_dict.get("cflags")),
+                "cxxflags": _str(build_config_dict.get("cxxflags")),
             }
             try:
                 build_config = BuildConfig(**mapped)
@@ -794,14 +856,55 @@ async def delete_version(version_id: str):
     if active and str(active.get("version")) == version_str:
         raise HTTPException(status_code=400, detail="Cannot delete active version")
     try:
-        binary_path = version_entry.get("binary_path")
-        if binary_path:
-            if not os.path.isabs(binary_path):
-                binary_path = os.path.join("/app", binary_path)
-            if os.path.exists(binary_path):
-                version_dir = os.path.dirname(os.path.dirname(binary_path))
-                if os.path.exists(version_dir):
-                    _robust_rmtree(version_dir)
+        binary_path = _resolve_binary_path(version_entry.get("binary_path") or "")
+        if binary_path and os.path.exists(binary_path):
+            # Safely resolve the on-disk version directory without ever deleting the
+            # entire llama-cpp root. Versions are stored as subdirectories of
+            # llama_manager.llama_dir (e.g. <llama_dir>/<version>/.../llama-server).
+            try:
+                llama_root = os.path.realpath(llama_manager.llama_dir)
+                binary_real = os.path.realpath(binary_path)
+            except Exception:
+                llama_root = llama_manager.llama_dir
+                binary_real = binary_path
+
+            version_dir = None
+
+            # If the binary lives under the llama root, treat the first path
+            # component under that root as the version directory.
+            try:
+                if os.path.commonpath([binary_real, llama_root]) == llama_root:
+                    rel = os.path.relpath(binary_real, llama_root)
+                    first_component = rel.split(os.sep)[0]
+                    if first_component and first_component not in (".", ""):
+                        candidate = os.path.join(llama_root, first_component)
+                        if os.path.isdir(candidate):
+                            version_dir = candidate
+            except Exception:
+                # Fall back to parent-directory logic below if commonpath/relpath fail
+                version_dir = None
+
+            # Fallback: use the binary's parent directory, but never delete the
+            # llama root itself.
+            if not version_dir:
+                candidate = os.path.dirname(binary_real)
+                if (
+                    candidate
+                    and os.path.isdir(candidate)
+                    and os.path.commonpath([candidate, llama_root]) == llama_root
+                    and os.path.abspath(candidate) != os.path.abspath(llama_root)
+                ):
+                    version_dir = candidate
+
+            if version_dir and os.path.exists(version_dir):
+                _robust_rmtree(version_dir)
+            else:
+                # As a last resort, remove just the binary to avoid leaving a
+                # completely broken entry on disk.
+                try:
+                    os.remove(binary_real)
+                except OSError:
+                    pass
         store.delete_engine_version(engine, version_str)
         logger.info(f"Deleted version: {version_str}")
         return {"message": f"Deleted version {version_str}"}
diff --git a/frontend/src/views/EnginesView.vue b/frontend/src/views/EnginesView.vue
index 363e291..aa578b7 100644
--- a/frontend/src/views/EnginesView.vue
+++ b/frontend/src/views/EnginesView.vue
@@ -363,7 +363,7 @@
     <!-- ── Build Settings Dialog ─────────────────────────── -->
     <Dialog v-model:visible="buildDialogVisible"
       :header="`Build settings — ${buildTarget === 'ik_llama' ? 'ik_llama.cpp' : 'llama.cpp'}`"
-      modal :style="{ width: '560px' }">
+      modal :style="{ width: '620px' }" class="build-settings-dialog">
       <div class="dialog-body">
         <div class="form-field">
           <label>Ref (tag / branch / commit)</label>
@@ -378,9 +378,48 @@
           <small>Appended to version name. Defaults to timestamp if empty.</small>
         </div>
         <div class="form-field">
-          <label>Build Options</label>
+          <label>Build type</label>
+          <Dropdown v-model="buildForm.buildConfig.build_type"
+            :options="buildTypeOptions"
+            optionLabel="label"
+            optionValue="value"
+            placeholder="Release"
+            style="width:100%" />
+        </div>
+        <div class="form-field">
+          <label class="build-options-section">GPU &amp; backends</label>
           <div class="toggle-grid">
-            <div v-for="opt in buildOptions" :key="opt.key" class="toggle-row">
+            <div v-for="opt in buildOptionsGpu" :key="opt.key" class="toggle-row">
+              <InputSwitch v-model="buildForm.buildConfig[opt.key]" />
+              <div>
+                <span class="opt-label">{{ opt.label }}</span>
+                <small class="opt-desc">{{ opt.desc }}</small>
+              </div>
+            </div>
+          </div>
+        </div>
+        <div class="form-field">
+          <label class="build-options-section">Build artifacts</label>
+          <div v-if="buildTarget === 'ik_llama'" class="build-note build-note--info">
+            For ik_llama.cpp, <strong>Examples</strong> is required (server binary lives in examples).
+          </div>
+          <div class="toggle-grid">
+            <div v-for="opt in buildOptionsArtifacts" :key="opt.key" class="toggle-row">
+              <InputSwitch
+                v-model="buildForm.buildConfig[opt.key]"
+                :disabled="buildTarget === 'ik_llama' && opt.key === 'build_examples'"
+              />
+              <div>
+                <span class="opt-label">{{ opt.label }}</span>
+                <small class="opt-desc">{{ opt.desc }}</small>
+              </div>
+            </div>
+          </div>
+        </div>
+        <div class="form-field">
+          <label class="build-options-section">GGML / CPU options</label>
+          <div class="toggle-grid">
+            <div v-for="opt in buildOptionsGGML" :key="opt.key" class="toggle-row">
               <InputSwitch v-model="buildForm.buildConfig[opt.key]" />
               <div>
                 <span class="opt-label">{{ opt.label }}</span>
@@ -394,6 +433,18 @@
           <InputText v-model="buildForm.buildConfig.cuda_architectures"
             placeholder="e.g. 86;89 (blank = auto)" style="width:100%" />
         </div>
+        <div class="form-field">
+          <label>Custom CMake args <span class="optional">(optional)</span></label>
+          <InputText v-model="buildForm.buildConfig.custom_cmake_args"
+            placeholder="e.g. -DFOO=ON -DBAR=OFF" style="width:100%" />
+        </div>
+        <div class="form-field">
+          <label>CFLAGS / CXXFLAGS <span class="optional">(optional)</span></label>
+          <div class="flags-row">
+            <InputText v-model="buildForm.buildConfig.cflags" placeholder="CFLAGS" style="flex:1" />
+            <InputText v-model="buildForm.buildConfig.cxxflags" placeholder="CXXFLAGS" style="flex:1" />
+          </div>
+        </div>
       </div>
       <template #footer>
         <Button label="Cancel" severity="secondary" outlined @click="buildDialogVisible = false" />
@@ -645,31 +696,58 @@ const savingBuildSettings = ref(false)
 const buildForm = ref({
   commitSha: '',
   versionSuffix: '',
-  buildConfig: {
-    cuda: false,
-    flash_attention: false,
-    native: true,
-    backend_dl: false,
-    cpu_all_variants: false,
-    cuda_architectures: '',
-  },
+  buildConfig: _defaultBuildConfig(),
 })
-const buildOptions = [
-  { key: 'cuda',             label: 'CUDA Support',             desc: 'GGML_CUDA=on' },
-  { key: 'flash_attention',  label: 'Flash Attention',          desc: 'GGML_CUDA_FA_ALL_QUANTS=on (requires CUDA)' },
-  { key: 'native',           label: 'Native CPU Optimizations', desc: 'GGML_NATIVE=on' },
-  { key: 'backend_dl',       label: 'Backend Dynamic Loading',  desc: 'GGML_BACKEND_DL=on' },
-  { key: 'cpu_all_variants', label: 'CPU All Variants',         desc: 'GGML_CPU_ALL_VARIANTS=on' },
+
+const buildTypeOptions = [
+  { label: 'Release', value: 'Release' },
+  { label: 'Debug', value: 'Debug' },
+  { label: 'RelWithDebInfo', value: 'RelWithDebInfo' },
+  { label: 'MinSizeRel', value: 'MinSizeRel' },
+]
+
+const buildOptionsGpu = [
+  { key: 'cuda', label: 'CUDA', desc: 'GGML_CUDA=on' },
+  { key: 'flash_attention', label: 'Flash Attention', desc: 'GGML_CUDA_FA_ALL_QUANTS=on (requires CUDA)' },
+  { key: 'openblas', label: 'OpenBLAS', desc: 'GGML_BLAS=on (CPU acceleration)' },
+]
+
+const buildOptionsArtifacts = [
+  { key: 'build_common', label: 'Common lib', desc: 'LLAMA_BUILD_COMMON=on' },
+  { key: 'build_tests', label: 'Tests', desc: 'LLAMA_BUILD_TESTS=on' },
+  { key: 'build_tools', label: 'Tools', desc: 'LLAMA_BUILD_TOOLS=on' },
+  { key: 'build_examples', label: 'Examples', desc: 'LLAMA_BUILD_EXAMPLES=on' },
+  { key: 'build_server', label: 'Server', desc: 'LLAMA_BUILD_SERVER=on (required for serving)' },
+  { key: 'install_tools', label: 'Install tools', desc: 'LLAMA_TOOLS_INSTALL=on' },
+]
+
+const buildOptionsGGML = [
+  { key: 'native', label: 'Native CPU', desc: 'GGML_NATIVE=on' },
+  { key: 'backend_dl', label: 'Backend DL', desc: 'GGML_BACKEND_DL=on' },
+  { key: 'cpu_all_variants', label: 'CPU all variants', desc: 'GGML_CPU_ALL_VARIANTS=on' },
+  { key: 'lto', label: 'LTO', desc: 'GGML_LTO=on (link-time optimization)' },
 ]
 
 function _defaultBuildConfig() {
   return {
+    build_type: 'Release',
     cuda: false,
+    openblas: false,
     flash_attention: false,
-    native: true,
+    build_common: true,
+    build_tests: true,
+    build_tools: true,
+    build_examples: true,
+    build_server: true,
+    install_tools: true,
     backend_dl: false,
     cpu_all_variants: false,
+    lto: false,
+    native: true,
+    custom_cmake_args: '',
     cuda_architectures: '',
+    cflags: '',
+    cxxflags: '',
   }
 }
 
@@ -714,6 +792,10 @@ async function openBuildDialog(engineKey) {
   } catch {
     // Ignore, fall back to defaults
   }
+  // ik_llama.cpp requires Build examples (server is in examples/)
+  if (engineKey === 'ik_llama') {
+    baseConfig.build_examples = true
+  }
   buildForm.value.commitSha = updateInfo?.latest_version || (engineKey === 'ik_llama' ? 'main' : 'master')
   buildForm.value.versionSuffix = ''
   buildForm.value.buildConfig = baseConfig
@@ -726,8 +808,7 @@ async function doStartBuild() {
     const repoSource = buildTarget.value === 'ik_llama' ? 'ik_llama.cpp' : 'llama.cpp'
     const engineId = buildTarget.value === 'ik_llama' ? 'ik_llama' : 'llama_cpp'
     const config = { ...buildForm.value.buildConfig }
-    if (!config.cuda_architectures) delete config.cuda_architectures
-    // Persist settings before triggering a manual build
+    // Persist settings before triggering a manual build (full config)
     await saveEngineBuildSettings(engineId, config)
     await enginesStore.buildSource({
       commit_sha: buildForm.value.commitSha || (buildTarget.value === 'ik_llama' ? 'main' : 'master'),
@@ -748,7 +829,6 @@ async function doStartBuild() {
 async function saveBuildSettingsOnly() {
   const engineId = buildTarget.value === 'ik_llama' ? 'ik_llama' : 'llama_cpp'
   const config = { ...buildForm.value.buildConfig }
-  if (!config.cuda_architectures) delete config.cuda_architectures
   savingBuildSettings.value = true
   try {
     await saveEngineBuildSettings(engineId, config)
@@ -1323,4 +1403,26 @@ code {
 
 .opt-label { font-size: 0.875rem; font-weight: 500; display: block; }
 .opt-desc  { font-size: 0.75rem; color: var(--text-secondary); display: block; }
+
+.build-options-section {
+  font-size: 0.8rem;
+  font-weight: 600;
+  color: var(--text-secondary);
+  margin-bottom: 0.25rem;
+  display: block;
+}
+.flags-row { display: flex; gap: 0.5rem; }
+
+.build-note {
+  font-size: 0.8rem;
+  padding: 0.5rem 0.6rem;
+  border-radius: 6px;
+  margin-bottom: 0.5rem;
+}
+.build-note--info {
+  background: var(--surface-100);
+  color: var(--text-color);
+  border: 1px solid var(--surface-border);
+}
+.build-note strong { font-weight: 600; }
 </style>