Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 108 additions & 0 deletions demos/workloads/inference/nim-chat-server.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#!/bin/bash
# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# NIM Chat UI — single script to launch everything
# Usage: ./nim-chat-server.sh
# Then open: http://127.0.0.1:9090/chat.html

set -e

NAMESPACE="${NAMESPACE:-nim-workload}"
SERVICE="${SERVICE:-svc/llama-3-2-1b}"
API_PORT=8000
UI_PORT=9090

cleanup() {
echo "Shutting down..."
kill $PF_PID 2>/dev/null
kill $PY_PID 2>/dev/null
exit 0
}
trap cleanup EXIT INT TERM

# Kill anything already on our ports
for port in $API_PORT $UI_PORT; do
pids=$(lsof -ti :$port 2>/dev/null || true)
if [ -n "$pids" ]; then
echo "Killing existing processes on port $port"
echo "$pids" | xargs kill 2>/dev/null || true
sleep 1
fi
done

# Start port-forward to NIM service
echo "Starting port-forward to $SERVICE on :$API_PORT..."
kubectl port-forward -n "$NAMESPACE" "$SERVICE" "$API_PORT":8000 &
PF_PID=$!
sleep 2

# Start chat UI + API proxy on UI_PORT
echo "Starting chat UI on :$UI_PORT..."
python3 -c "
import http.server, urllib.request, io

API = 'http://127.0.0.1:${API_PORT}'
HTML_PATH = '$(dirname "$0")/nim-chat.html'

class H(http.server.BaseHTTPRequestHandler):
def do_GET(self):
if self.path == '/' or self.path == '/chat.html':
html = open(HTML_PATH, 'rb').read() if __import__('os').path.exists(HTML_PATH) else b''
self.send_response(200)
self.send_header('Content-Type', 'text/html')
self.send_header('Content-Length', len(html))
self.end_headers()
self.wfile.write(html)
elif self.path.startswith('/v1/'):
self._proxy()
else:
self.send_error(404)

def do_POST(self):
if self.path.startswith('/v1/'):
self._proxy()
else:
self.send_error(404)

def _proxy(self):
length = int(self.headers.get('Content-Length', 0))
body = self.rfile.read(length) if length else None
req = urllib.request.Request(
API + self.path, data=body,
headers={'Content-Type': self.headers.get('Content-Type', 'application/json')},
method=self.command)
try:
with urllib.request.urlopen(req) as r:
data = r.read()
self.send_response(r.status)
self.send_header('Content-Type', r.headers.get('Content-Type', 'application/json'))
self.send_header('Content-Length', len(data))
self.end_headers()
self.wfile.write(data)
except urllib.error.URLError as e:
self.send_error(502, str(e))

def log_message(self, fmt, *args): pass

http.server.HTTPServer(('127.0.0.1', ${UI_PORT}), H).serve_forever()
" &
PY_PID=$!

echo ""
echo "Ready! Open http://127.0.0.1:${UI_PORT}/chat.html"
echo "Press Ctrl+C to stop."
echo ""

wait
239 changes: 239 additions & 0 deletions demos/workloads/inference/nim-chat.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
<!DOCTYPE html>
<!--
Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>NIM Chat</title>
<style>
* { box-sizing: border-box; margin: 0; padding: 0; }
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; background: #1a1a2e; color: #e0e0e0; height: 100vh; display: flex; flex-direction: column; }
header { padding: 16px 24px; background: #16213e; border-bottom: 1px solid #0f3460; display: flex; align-items: center; gap: 12px; }
header h1 { font-size: 18px; font-weight: 600; }
header span { font-size: 12px; color: #888; background: #0f3460; padding: 2px 8px; border-radius: 10px; }
#chat { flex: 1; overflow-y: auto; padding: 24px; display: flex; flex-direction: column; gap: 16px; }
.msg { max-width: 720px; width: 100%; margin: 0 auto; display: flex; gap: 12px; }
.msg.user { flex-direction: row-reverse; }
.msg .avatar { width: 32px; height: 32px; border-radius: 50%; display: flex; align-items: center; justify-content: center; font-size: 14px; flex-shrink: 0; }
.msg.user .avatar { background: #533483; }
.msg.assistant .avatar { background: #76b900; }
.msg .bubble { padding: 12px 16px; border-radius: 12px; line-height: 1.6; word-break: break-word; }
.msg.user .bubble { background: #533483; border-bottom-right-radius: 4px; }
.msg.assistant .bubble { background: #16213e; border: 1px solid #0f3460; border-bottom-left-radius: 4px; }
.bubble h1, .bubble h2, .bubble h3 { margin: 12px 0 6px; color: #fff; }
.bubble h1 { font-size: 1.3em; } .bubble h2 { font-size: 1.15em; } .bubble h3 { font-size: 1.05em; }
.bubble p { margin: 6px 0; }
.bubble ul, .bubble ol { margin: 6px 0 6px 20px; }
.bubble li { margin: 2px 0; }
.bubble code { background: #0d1b2a; padding: 2px 6px; border-radius: 4px; font-size: 0.9em; }
.bubble pre { background: #0d1b2a; padding: 12px; border-radius: 8px; overflow-x: auto; margin: 8px 0; }
.bubble pre code { background: none; padding: 0; }
.bubble strong { color: #fff; }
.bubble hr { border: none; border-top: 1px solid #0f3460; margin: 12px 0; }
#input-area { padding: 16px 24px; background: #16213e; border-top: 1px solid #0f3460; }
#input-row { max-width: 720px; margin: 0 auto; display: flex; gap: 8px; }
#input { flex: 1; padding: 12px 16px; border-radius: 12px; border: 1px solid #0f3460; background: #1a1a2e; color: #e0e0e0; font-size: 15px; outline: none; resize: none; font-family: inherit; }
#input:focus { border-color: #76b900; }
#send { padding: 12px 24px; border-radius: 12px; border: none; background: #76b900; color: white; font-size: 15px; cursor: pointer; font-weight: 600; }
#send:hover { background: #8fd400; }
#send:disabled { background: #333; cursor: not-allowed; }
.status { text-align: center; color: #666; font-size: 13px; padding: 8px; }
</style>
</head>
<body>
<header>
<h1>Llama 3.2 1B Chat</h1>
<span>NVIDIA NIM on EKS</span>
</header>
<div id="chat">
<div class="status">Send a message to start chatting</div>
</div>
<div id="input-area">
<div id="input-row">
<textarea id="input" rows="1" placeholder="Type a message... (Shift+Enter for newline)" autofocus></textarea>
<button id="send" onclick="sendMsg()">Send</button>
</div>
</div>
<script>
var API = '/v1/chat/completions';
var MODEL = 'meta/llama-3.2-1b-instruct';
var messages = [];
var sending = false;

var chatEl = document.getElementById('chat');
var inputEl = document.getElementById('input');
var btnEl = document.getElementById('send');

inputEl.addEventListener('keydown', function(e) {
if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); sendMsg(); }
});

inputEl.addEventListener('input', function() {
inputEl.style.height = 'auto';
inputEl.style.height = Math.min(inputEl.scrollHeight, 120) + 'px';
});

function escapeHtml(s) {
return s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
}

function md(src) {
var codeBlocks = [];
var text = src.replace(/```(\w*)\n([\s\S]*?)```/g, function(m, lang, code) {
codeBlocks.push('<pre><code>' + escapeHtml(code) + '</code></pre>');
return '\x00CB' + (codeBlocks.length - 1) + '\x00';
});

var lines = text.split('\n');
var html = [];
var inList = false;

for (var i = 0; i < lines.length; i++) {
var line = lines[i];

var cbMatch = line.match(/^\x00CB(\d+)\x00$/);
if (cbMatch) {
if (inList) { html.push('</ul>'); inList = false; }
html.push(codeBlocks[parseInt(cbMatch[1])]);
continue;
}

var hMatch = line.match(/^(#{1,3})\s+(.+)$/);
if (hMatch) {
if (inList) { html.push('</ul>'); inList = false; }
var level = hMatch[1].length;
html.push('<h' + level + '>' + inlineFormat(hMatch[2]) + '</h' + level + '>');
continue;
}

if (line.match(/^---+$/)) {
if (inList) { html.push('</ul>'); inList = false; }
html.push('<hr>');
continue;
}

var liMatch = line.match(/^[-*]\s+(.+)$/);
if (liMatch) {
if (!inList) { html.push('<ul>'); inList = true; }
html.push('<li>' + inlineFormat(liMatch[1]) + '</li>');
continue;
}

var olMatch = line.match(/^\d+\.\s+(.+)$/);
if (olMatch) {
if (!inList) { html.push('<ul>'); inList = true; }
html.push('<li>' + inlineFormat(olMatch[1]) + '</li>');
continue;
}

if (inList) { html.push('</ul>'); inList = false; }

if (line.trim() === '') {
html.push('<br>');
continue;
}

html.push('<p>' + inlineFormat(line) + '</p>');
}

if (inList) html.push('</ul>');
return html.join('\n');
}

function inlineFormat(text) {
var codes = [];
text = text.replace(/`([^`]+)`/g, function(m, code) {
codes.push('<code>' + escapeHtml(code) + '</code>');
return '\x00IC' + (codes.length - 1) + '\x00';
});

text = escapeHtml(text);

text = text.replace(/\*\*\*(.+?)\*\*\*/g, '<strong><em>$1</em></strong>');
text = text.replace(/\*\*(.+?)\*\*/g, '<strong>$1</strong>');
text = text.replace(/\*(.+?)\*/g, '<em>$1</em>');

text = text.replace(/\x00IC(\d+)\x00/g, function(m, idx) {
return codes[parseInt(idx)];
});

return text;
}

function addMsg(role, content) {
var status = chatEl.querySelector('.status');
if (status) status.remove();

var div = document.createElement('div');
div.className = 'msg ' + role;
var avatar = role === 'user' ? 'You' : 'NIM';
div.innerHTML = '<div class="avatar">' + avatar + '</div><div class="bubble"></div>';
chatEl.appendChild(div);
chatEl.scrollTop = chatEl.scrollHeight;

var bubble = div.querySelector('.bubble');
if (content) setContent(bubble, content);
return bubble;
}

function setContent(bubble, text) {
bubble.innerHTML = md(text.trim());
}

function sendMsg() {
var text = inputEl.value.trim();
if (!text || sending) return;

sending = true;
btnEl.disabled = true;
inputEl.value = '';
inputEl.style.height = 'auto';

addMsg('user', text);
messages.push({ role: 'user', content: text });

var bubble = addMsg('assistant', '');
bubble.textContent = 'Thinking...';

fetch(API, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model: MODEL, messages: messages, max_tokens: 512 }),
})
.then(function(res) {
if (!res.ok) throw new Error('HTTP ' + res.status);
return res.json();
})
.then(function(data) {
var content = data.choices[0].message.content;
messages.push({ role: 'assistant', content: content });
setContent(bubble, content);
})
.catch(function(err) {
bubble.textContent = 'Error: ' + err.message + '. Make sure nim-chat-server.sh is running.';
})
.finally(function() {
sending = false;
btnEl.disabled = false;
chatEl.scrollTop = chatEl.scrollHeight;
inputEl.focus();
});
}
</script>
</body>
</html>
Loading
Loading