qsh/setup.sh at main · woodRock/qsh · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
#!/bin/bash
set -e

echo "--- 🐚 qsh Installation ---"

# 1. Check for Rust/Cargo
if ! command -v cargo &> /dev/null; then
    echo "Error: Rust/Cargo not found. Please install it first: https://rustup.rs"
    exit 1
fi

# 2. Check for Python 3.10+, cmake (for llamacpp)
PYTHON_CMD=""
for cmd in python3.14 python3.13 python3.12 python3.11 python3.10 python3; do
    if command -v $cmd &> /dev/null; then
        VERSION=$($cmd -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')
        if [ "$(echo "$VERSION >= 3.10" | bc -l)" -eq 1 ]; then
            PYTHON_CMD=$(command -v $cmd)
            echo "Found compatible Python: $PYTHON_CMD (version $VERSION)"
            break
        fi
    fi
done

if [ -z "$PYTHON_CMD" ]; then
    echo "Error: Python 3.10 or higher is required."
    exit 1
fi

if ! command -v cmake &> /dev/null; then
    echo "Warning: 'cmake' not found. It will be required if you choose the LlamaCpp engine."
fi

# 3. Setup Wizard (Engine & Model)
echo ""
echo "--- Setup Wizard ---"
echo "Select your preferred inference engine:"
echo "1) Python (Transformers) - Default, easiest to setup."
echo "2) Rust (Candle) - Fast, lightweight, but experimental."
echo "3) LlamaCpp (TurboQuant) - Highest performance on Apple Silicon (M2+), requires manual build of the turboquant_plus fork."
read -p "Engine [1-3, default: 1]: " ENGINE_CHOICE
ENGINE_CHOICE=${ENGINE_CHOICE:-1}

case $ENGINE_CHOICE in
    1) ENGINE="python" ;;
    2) ENGINE="rust" ;;
    3) ENGINE="llamacpp" ;;
    *) ENGINE="python" ;;
esac

echo ""
echo "Select your preferred Qwen 3.5 model size:"
echo "1) 0.8B - ~0.5GB disk, ~1GB RAM. Fast, for basic shell commands. Fits any M-series Mac."
echo "2) 9B   - ~5.5GB disk, ~8GB RAM (16GB recommended). Balanced for most tasks."
echo "3) 27B  - ~16GB disk, ~20GB RAM (32GB+ recommended). Expert reasoning, complex scripting."
read -p "Model [1-3, default: 1]: " MODEL_CHOICE
MODEL_CHOICE=${MODEL_CHOICE:-1}

case $MODEL_CHOICE in
    1) MODEL="Qwen/Qwen3.5-0.8B" ;;
    2) MODEL="Qwen/Qwen3.5-9B" ;;
    3) MODEL="Qwen/Qwen3.5-27B" ;;
    *) MODEL="Qwen/Qwen3.5-0.8B" ;;
esac

# Fail fast if cmake is required but missing
if [ "$ENGINE" == "llamacpp" ] && ! command -v cmake &> /dev/null; then
    echo "Error: 'cmake' is required to build the LlamaCpp engine. Please install it (e.g., 'brew install cmake') and run setup again."
    exit 1
fi

echo ""
echo "Engine: $ENGINE | Model: $MODEL"
echo "Starting installation..."
echo ""

# 4. Determine Installation Directory
# We install everything into ~/.local/share/qsh to keep it self-contained
INSTALL_ROOT="$HOME/.local/share/qsh"
BIN_DIR="$HOME/.local/bin"
mkdir -p "$INSTALL_ROOT/src"
mkdir -p "$BIN_DIR"

# 5. Clone and Build
TEMP_DIR=$(mktemp -d)
echo "Cloning repository..."
git clone https://github.com/woodRock/qsh.git "$TEMP_DIR"
cd "$TEMP_DIR/qsh"

echo "Building Rust CLI (this may take a minute)..."
cargo build --release --quiet

# 6. Install Components
echo "Installing components to $INSTALL_ROOT..."
cp target/release/qsh "$INSTALL_ROOT/qsh-bin"
cp src/inference.py "$INSTALL_ROOT/src/inference.py"

# Create a wrapper script in ~/.local/bin/qsh
cat << EOF > "$BIN_DIR/qsh"
#!/bin/bash
exec "$INSTALL_ROOT/qsh-bin" "\$@"
EOF
chmod +x "$BIN_DIR/qsh"

# 7. Setup Python Virtual Environment
echo "Setting up Python environment (qenv)..."
cd "$INSTALL_ROOT"
$PYTHON_CMD -m venv qenv
source qenv/bin/activate
echo "Installing Python dependencies (transformers, torch, peft, datasets, etc.)..."
pip install --upgrade pip --quiet
pip install --quiet git+https://github.com/huggingface/transformers.git qwen-vl-utils torch torchvision accelerate pillow peft datasets

# Create initial config
CONFIG_DIR="$HOME/Library/Application Support/com.qwen.qsh"
mkdir -p "$CONFIG_DIR"
CONFIG_FILE="$CONFIG_DIR/config.toml"

cat << EOF > "$CONFIG_FILE"
default_engine = "$ENGINE"
default_model = "$MODEL"
safety_check = true

[llama_cpp]
server_url = "http://localhost:8080"
turbo_k = "q8_0"
turbo_v = "turbo4"
flash_attn = true
EOF

if [ "$ENGINE" == "llamacpp" ]; then
    echo ""
    echo "Setting up TurboQuant+ (llama.cpp fork)..."
    TURBO_DIR="$INSTALL_ROOT/llama-cpp-turboquant"
    if [ ! -d "$TURBO_DIR" ]; then
        git clone https://github.com/TheTom/llama-cpp-turboquant.git "$TURBO_DIR"
    fi

    cd "$TURBO_DIR"
    git checkout feature/turboquant-kv-cache --quiet

    echo "Building llama-server with Metal support (this may take a few minutes)..."
    cmake -B build -DGGML_METAL=ON -DGGML_METAL_EMBED_LIBRARY=ON -DCMAKE_BUILD_TYPE=Release
    cmake --build build --config Release --target llama-server -j$(sysctl -n hw.ncpu)

    SERVER_BIN="$TURBO_DIR/build/bin/llama-server"

    # 7.2 Download GGUF Model
    MODEL_FILENAME=$(echo "$MODEL" | sed 's/.*\///')-Q4_K_M.gguf
    # Note: Unsloth uses a different repo naming convention
    REPO_NAME="unsloth/$(echo "$MODEL" | sed 's/.*\///')-GGUF"
    MODEL_URL="https://huggingface.co/$REPO_NAME/resolve/main/$MODEL_FILENAME"
    MODEL_DEST="$INSTALL_ROOT/models/$MODEL_FILENAME"

    # 7.3 Download Vision Projector (mmproj)
    MMPROJ_FILENAME="mmproj-BF16.gguf"
    MMPROJ_URL="https://huggingface.co/$REPO_NAME/resolve/main/$MMPROJ_FILENAME"
    MMPROJ_DEST="$INSTALL_ROOT/models/$MMPROJ_FILENAME"

    mkdir -p "$INSTALL_ROOT/models"

    if [ ! -f "$MODEL_DEST" ]; then
        echo ""
        echo "📥 Downloading $MODEL_FILENAME (~$(case $MODEL_CHOICE in 1) echo "0.5GB" ;; 2) echo "5.5GB" ;; 3) echo "16GB" ;; esac))..."
        curl -L "$MODEL_URL" -o "$MODEL_DEST"
    else
        echo ""
        echo "✅ Model $MODEL_FILENAME already exists at $MODEL_DEST"
    fi

    if [ ! -f "$MMPROJ_DEST" ]; then
        echo "📥 Downloading Vision Projector $MMPROJ_FILENAME..."
        curl -L "$MMPROJ_URL" -o "$MMPROJ_DEST"
    else
        echo "✅ Vision Projector $MMPROJ_FILENAME already exists."
    fi

    # Update config with binary, model path and mmproj
    cat << EOF > "$CONFIG_FILE"
default_engine = "$ENGINE"
default_model = "$MODEL"
safety_check = true

[llama_cpp]
server_url = "http://localhost:8080"
server_binary = "$SERVER_BIN"
model_path = "$MODEL_DEST"
mmproj_path = "$MMPROJ_DEST"
turbo_k = "q8_0"
turbo_v = "turbo4"
flash_attn = true
EOF

    echo ""
    echo "✅ TurboQuant+ built successfully at $SERVER_BIN"
    echo "✅ Model downloaded and configured at $MODEL_DEST"
fi

# Cleanup
rm -rf "$TEMP_DIR"

echo "----------------------------------------"
echo "✅ Installation successful!"
echo ""
echo "qsh is installed at $BIN_DIR/qsh"
if [[ ":$PATH:" != *":$BIN_DIR:"* ]]; then
    echo "⚠️ Warning: $BIN_DIR is not in your PATH."
    echo "Add this to your shell profile (e.g., .bashrc or .zshrc):"
    echo "  export PATH=\"\$PATH:$BIN_DIR\""
fi
echo ""
echo "Try it out:"
echo "  qsh 'list files by size'"
echo "  ls *.jpg | qsh vision 'is there a cat?'"