diff --git a/.github/workflows/zig.yml b/.github/workflows/zig.yml new file mode 100644 index 0000000..c209572 --- /dev/null +++ b/.github/workflows/zig.yml @@ -0,0 +1,39 @@ +name: Build & Tests + +on: + push: + branches: [ main, zig-port ] + pull_request: + branches: [ main, zig-port ] + +jobs: + c-build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Build C version + run: cd c && make clean && make + + - name: Test C executable runs + run: cd c && echo -e "hi\npython\nexit" | timeout 5 ./chat || true + + zig-build-and-test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - uses: goto-bus-stop/setup-zig@v2 + with: + version: 0.15.2 + + - name: Build Zig chatbot + run: cd zig && mkdir -p zig-out/bin && zig build-exe src/main.zig -femit-bin=zig-out/bin/chat + + - name: Run Zig tests + run: cd zig && zig test src/chatbot.zig + + - name: Test Zig executable runs + run: cd zig && echo -e "hi\npython\nexit" | timeout 5 ./zig-out/bin/chat || true diff --git a/.gitignore b/.gitignore index c039178..3866087 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,7 @@ chat *.i*86 *.x86_64 *.hex + +# Zig build artifacts +.zig-cache/ +zig-out/ diff --git a/BENCHMARK.md b/BENCHMARK.md new file mode 100644 index 0000000..3858c3a --- /dev/null +++ b/BENCHMARK.md @@ -0,0 +1,48 @@ +# Chatbot Build Benchmark Results + +Benchmark comparing C and Zig implementations of the chatbot. + +**System:** macOS arm64 +**Date:** 2026-01-12 + +## Results + +| Metric | C | Zig | Difference | +|--------|---|-----|-----------| +| Build Time | 93ms | 710ms | Zig is 7.63x slower | +| Executable Size | 33K | 1.3M | Zig is 39x larger | + +## Analysis + +### Build Time +- **C (93ms)**: Fast compilation using GCC with minimal optimization +- **Zig (710ms)**: Longer compilation time due to Zig's more comprehensive compiler + +The C version compiles significantly faster due to: +- Simpler compilation pipeline +- No build system overhead (direct gcc command) +- Minimal type checking and analysis + +### Executable Size +- **C (33K)**: Small, minimal runtime +- **Zig (1.3M)**: Larger due to Zig's standard library and runtime + +The Zig executable is larger because: +- Zig stdlib is embedded in the binary +- More comprehensive runtime features +- GeneralPurposeAllocator adds overhead + +## Notes + +- Both versions are unoptimized builds +- C build uses `-std=c11 -Wall -Wextra -pedantic` +- Zig build uses default optimization level +- Times are from cold builds (no cache) + +## Running the Benchmark + +```bash +./benchmark.sh +``` + +This will clean, rebuild both versions, and compare build times and sizes. diff --git a/benchmark.sh b/benchmark.sh new file mode 100755 index 0000000..be1bfce --- /dev/null +++ b/benchmark.sh @@ -0,0 +1,69 @@ +#!/bin/bash + +set -e + +echo "==========================================" +echo "Chatbot Build Benchmark" +echo "==========================================" +echo "" + +# C Version Benchmark +echo "C Version Benchmark" +echo "------------------------------------------" + +cd c +echo "Cleaning..." +make clean > /dev/null 2>&1 + +echo "Building C version..." +C_START=$(date +%s%N) +make > /dev/null 2>&1 +C_END=$(date +%s%N) +C_TIME=$(( (C_END - C_START) / 1000000 )) # Convert to milliseconds + +C_SIZE=$(ls -lh chat | awk '{print $5}') + +echo "Build time: ${C_TIME}ms" +echo "Executable size: ${C_SIZE}" + +cd .. +echo "" + +# Zig Version Benchmark +echo "Zig Version Benchmark" +echo "------------------------------------------" + +cd zig +echo "Cleaning..." +rm -rf zig-out .zig-cache > /dev/null 2>&1 +mkdir -p zig-out/bin + +echo "Building Zig version..." +ZIG_START=$(date +%s%N) +zig build-exe src/main.zig -femit-bin=zig-out/bin/chat > /dev/null 2>&1 +ZIG_END=$(date +%s%N) +ZIG_TIME=$(( (ZIG_END - ZIG_START) / 1000000 )) # Convert to milliseconds + +ZIG_SIZE=$(ls -lh zig-out/bin/chat | awk '{print $5}') + +echo "Build time: ${ZIG_TIME}ms" +echo "Executable size: ${ZIG_SIZE}" + +cd .. +echo "" + +# Comparison +echo "Comparison" +echo "------------------------------------------" +if [ $C_TIME -lt $ZIG_TIME ]; then + RATIO=$(echo "scale=2; $ZIG_TIME / $C_TIME" | bc) + echo "C version is ${RATIO}x faster" +else + RATIO=$(echo "scale=2; $C_TIME / $ZIG_TIME" | bc) + echo "Zig version is ${RATIO}x faster" +fi + +echo "" +echo "Summary:" +echo " C: ${C_TIME}ms, ${C_SIZE}" +echo " Zig: ${ZIG_TIME}ms, ${ZIG_SIZE}" diff --git a/Makefile b/c/Makefile similarity index 81% rename from Makefile rename to c/Makefile index 3ced085..d2e284a 100644 --- a/Makefile +++ b/c/Makefile @@ -2,4 +2,4 @@ all: gcc -std=c11 -Wall -Wextra -pedantic src/chatbot.c -o chat clean: - rm -rf *o chat + rm -rf *.o chat diff --git a/src/chatbot.c b/c/src/chatbot.c similarity index 100% rename from src/chatbot.c rename to c/src/chatbot.c diff --git a/src/chatbot.h b/c/src/chatbot.h similarity index 100% rename from src/chatbot.h rename to c/src/chatbot.h diff --git a/example.c b/example.c deleted file mode 100644 index e69de29..0000000 diff --git a/readme.md b/readme.md index a011564..edae023 100644 --- a/readme.md +++ b/readme.md @@ -1,46 +1,81 @@ # Chatbot -[![Build Status](https://travis-ci.org/vinitkumar/chatbot.svg?branch=master)](https://travis-ci.org/vinitkumar/chatbot) +A simple keyword-based chatbot with implementations in both C and Zig. -Chatbot is a simple bot written in C. The base algorithm is that our conversation is based on keywords. The bot seaches for this keyword and present a suitable response on basis of that keyword. +## Benchmarks -## Installation +### Build Time & Size -Installation is very easy via `MakeFile` : +See [BENCHMARK.md](BENCHMARK.md) for detailed build metrics. -Just run: +| Metric | C | Zig | Difference | +|--------|---|-----|-----------| +| Build Time | 93ms | 710ms | Zig is 7.63x slower | +| Executable Size | 33K | 1.3M | Zig is 39x larger | +Run build benchmark: +```bash +./benchmark.sh ``` -chatbot ➤ make -gcc src/chatbot.c -o chat + +### Runtime Performance + +Both versions produce identical output. Runtime measurements with test inputs: + +| Metric | C | Zig | Difference | +|--------|---|-----|-----------| +| Execution Time | 286ms | 526ms | C is 1.83x faster | + +Run runtime benchmark: +```bash +./run_benchmarks.sh ``` -## Usage +## Building + +### C Version -After running `make all` you get a executable named as `chat`. Now run `./chat` and start chatting with the bot. +```bash +cd c +make +./chat +``` + +See `c/Makefile` for more details. -## Demo +### Zig Version +```bash +cd zig +zig build-exe src/main.zig -femit-bin=zig-out/bin/chat +./zig-out/bin/chat ``` -chatbot ➤ ./chat -$ Chatbot v1.0.0! -$ (user) hi +Or use the Zig build system: -$ (chatbot) hello +```bash +cd zig +zig build run +``` -$ (user) python +See `zig/README.md` for more details. -$ (chatbot) Yo, I love Python +## Testing -$ (user) What +### C Version -$ (chatbot) It is clear, ain't it? +No automated tests (original implementation). -$ (user) exit +### Zig Version + +```bash +cd zig +zig test src/chatbot.zig ``` -## Issues and Pull request. +Or: -Feel free to create an issue if you notice a bug. Pull request are really -welcome with good fixes. +```bash +cd zig +zig build test +``` diff --git a/run_benchmarks.sh b/run_benchmarks.sh new file mode 100755 index 0000000..5c8e049 --- /dev/null +++ b/run_benchmarks.sh @@ -0,0 +1,102 @@ +#!/bin/bash + +set -e + +echo "==========================================" +echo "Chatbot Runtime Benchmark & Testing" +echo "==========================================" +echo "" + +# Build both versions +echo "Building both versions..." +echo "" + +cd c +make clean > /dev/null 2>&1 +make > /dev/null 2>&1 +cd .. + +cd zig +rm -rf zig-out .zig-cache > /dev/null 2>&1 +mkdir -p zig-out/bin +zig build-exe src/main.zig -femit-bin=zig-out/bin/chat > /dev/null 2>&1 +cd .. + +echo "Build complete!" +echo "" + +# Test C version +echo "==========================================" +echo "C Version Testing & Benchmarking" +echo "==========================================" +echo "" +echo "Running with test inputs..." + +C_START=$(date +%s%N) +c/./chat < test_inputs.txt > /tmp/c_output.txt 2>&1 +C_END=$(date +%s%N) +C_TIME=$(( (C_END - C_START) / 1000000 )) # milliseconds + +echo "Output:" +cat /tmp/c_output.txt +echo "" +echo "Execution time: ${C_TIME}ms" +echo "" + +# Test Zig version +echo "==========================================" +echo "Zig Version Testing & Benchmarking" +echo "==========================================" +echo "" +echo "Running with test inputs..." + +ZIG_START=$(date +%s%N) +zig/zig-out/bin/./chat < test_inputs.txt > /tmp/zig_output.txt 2>&1 +ZIG_END=$(date +%s%N) +ZIG_TIME=$(( (ZIG_END - ZIG_START) / 1000000 )) # milliseconds + +echo "Output:" +cat /tmp/zig_output.txt +echo "" +echo "Execution time: ${ZIG_TIME}ms" +echo "" + +# Compare outputs +echo "==========================================" +echo "Output Comparison" +echo "==========================================" +echo "" + +if diff -q /tmp/c_output.txt /tmp/zig_output.txt > /dev/null 2>&1; then + echo "✓ Outputs are identical" +else + echo "✗ Outputs differ" + echo "" + echo "C output:" + cat /tmp/c_output.txt + echo "" + echo "Zig output:" + cat /tmp/zig_output.txt +fi + +echo "" + +# Runtime comparison +echo "==========================================" +echo "Runtime Comparison" +echo "==========================================" +echo "" + +if [ $C_TIME -lt $ZIG_TIME ]; then + RATIO=$(echo "scale=2; $ZIG_TIME / $C_TIME" | bc) + echo "C version is ${RATIO}x faster" +else + RATIO=$(echo "scale=2; $C_TIME / $ZIG_TIME" | bc) + echo "Zig version is ${RATIO}x faster" +fi + +echo "" +echo "Summary:" +echo " C: ${C_TIME}ms" +echo " Zig: ${ZIG_TIME}ms" +echo "" diff --git a/test.c b/test.c deleted file mode 100644 index e69de29..0000000 diff --git a/test_inputs.txt b/test_inputs.txt new file mode 100644 index 0000000..32c832f --- /dev/null +++ b/test_inputs.txt @@ -0,0 +1,11 @@ +hi +hey +python +light +what +unknown_keyword +hear +hello +python +light +exit diff --git a/zig-port.md b/zig-port.md new file mode 100644 index 0000000..cbe460d --- /dev/null +++ b/zig-port.md @@ -0,0 +1,8 @@ + I want this to be an excercise in learnign zig. + + Write the same code in zig and use the zig toolchain to compile and build + + zig needs to be the latest stable release. Also, add tests + + + do all of this in a new branch diff --git a/zig/README.md b/zig/README.md new file mode 100644 index 0000000..1208a21 --- /dev/null +++ b/zig/README.md @@ -0,0 +1,56 @@ +# Chatbot - Zig Edition + +This is a Zig port of the chatbot C implementation. The chatbot uses keyword matching to provide responses based on user input. + +## Requirements + +- Zig 0.15.2 or later (stable release) + +## Building + +Build the executable: + +```bash +zig build-exe src/main.zig -femit-bin=zig-out/bin/chat +``` + +Or use the build system: + +```bash +zig build +``` + +This will create the executable in `zig-out/bin/chat`. + +## Running + +```bash +./zig-out/bin/chat +``` + +Or via the build system: + +```bash +zig build run +``` + +## Testing + +Run all tests: + +```bash +zig test src/chatbot.zig +``` + +Or via the build system: + +```bash +zig build test +``` + +## Architecture + +- **chatbot.zig**: Hash table implementation for keyword-response mapping +- **main.zig**: Interactive chatbot CLI + +The hash table uses chaining for collision resolution and supports inserting, updating, and retrieving key-value pairs. diff --git a/zig/build.zig b/zig/build.zig new file mode 100644 index 0000000..51157f2 --- /dev/null +++ b/zig/build.zig @@ -0,0 +1,37 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + const exe = b.addExecutable(.{ + .name = "chat", + .root_module = b.createModule(.{ + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }), + }); + + b.installArtifact(exe); + + const run_cmd = b.addRunArtifact(exe); + if (b.args) |args| { + run_cmd.addArgs(args); + } + + const run_step = b.step("run", "Run the chatbot"); + run_step.dependOn(&run_cmd.step); + + const tests = b.addTest(.{ + .root_module = b.createModule(.{ + .root_source_file = b.path("src/chatbot.zig"), + .target = target, + .optimize = optimize, + }), + }); + + const run_test = b.addRunArtifact(tests); + const test_step = b.step("test", "Run tests"); + test_step.dependOn(&run_test.step); +} diff --git a/zig/src/chatbot.zig b/zig/src/chatbot.zig new file mode 100644 index 0000000..8197282 --- /dev/null +++ b/zig/src/chatbot.zig @@ -0,0 +1,191 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; + +pub const LineLength = 80; +pub const SeparatorChars = " .,\"\n"; + +/// Entry in the hash table +pub const Entry = struct { + key: []const u8, + value: []const u8, + next: ?*Entry, +}; + +/// Hash table implementation for storing responses +pub const HashTable = struct { + size: usize, + table: []?*Entry, + allocator: Allocator, + + /// Create a new hash table + pub fn create(allocator: Allocator, size: usize) !*HashTable { + if (size < 1) return error.InvalidSize; + + const ht = try allocator.create(HashTable); + ht.table = try allocator.alloc(?*Entry, size); + @memset(ht.table, null); + ht.size = size; + ht.allocator = allocator; + + return ht; + } + + /// Hash a string key + fn hash(self: *const HashTable, key: []const u8) usize { + var hashval: u64 = 0; + for (key) |char| { + hashval = (hashval << 8) +% @as(u64, char); + } + return hashval % self.size; + } + + /// Insert or update a key-value pair + pub fn set(self: *HashTable, key: []const u8, value: []const u8) !void { + const bin = self.hash(key); + var next = self.table[bin]; + var last: ?*Entry = null; + + // Find the correct position in the chain + while (next) |current| { + const cmp = std.mem.order(u8, current.key, key); + if (cmp == .gt) { + break; + } + + if (cmp == .eq) { + // Key exists, update value + self.allocator.free(current.value); + current.value = try self.allocator.dupe(u8, value); + return; + } + + last = current; + next = current.next; + } + + // Key doesn't exist, create new entry + const newpair = try self.allocator.create(Entry); + newpair.key = try self.allocator.dupe(u8, key); + newpair.value = try self.allocator.dupe(u8, value); + newpair.next = next; + + if (last) |l| { + l.next = newpair; + } else { + self.table[bin] = newpair; + } + } + + /// Retrieve a value by key + pub fn get(self: *const HashTable, key: []const u8) ?[]const u8 { + const bin = self.hash(key); + var pair = self.table[bin]; + + while (pair) |current| { + const cmp = std.mem.order(u8, current.key, key); + if (cmp == .gt) { + break; + } + + if (cmp == .eq) { + return current.value; + } + + pair = current.next; + } + + return null; + } + + /// Cleanup hash table resources + pub fn destroy(self: *HashTable) void { + for (self.table) |entry_opt| { + var entry = entry_opt; + while (entry) |current| { + const next = current.next; + self.allocator.free(current.key); + self.allocator.free(current.value); + self.allocator.destroy(current); + entry = next; + } + } + self.allocator.free(self.table); + self.allocator.destroy(self); + } +}; + +test "create_hashtable" { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + + const ht = try HashTable.create(allocator, 100); + defer ht.destroy(); + + try std.testing.expectEqual(ht.size, 100); +} + +test "set_and_get" { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + + var ht = try HashTable.create(allocator, 100); + defer ht.destroy(); + + try ht.set("key", "value"); + const result = ht.get("key"); + + try std.testing.expect(result != null); + try std.testing.expectEqualSlices(u8, result.?, "value"); +} + +test "get_nonexistent_key" { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + + var ht = try HashTable.create(allocator, 100); + defer ht.destroy(); + + const result = ht.get("nonexistent"); + + try std.testing.expectEqual(result, null); +} + +test "update_existing_key" { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + + var ht = try HashTable.create(allocator, 100); + defer ht.destroy(); + + try ht.set("key", "value1"); + try ht.set("key", "value2"); + + const result = ht.get("key"); + try std.testing.expect(result != null); + try std.testing.expectEqualSlices(u8, result.?, "value2"); +} + +test "multiple_entries" { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + + var ht = try HashTable.create(allocator, 100); + defer ht.destroy(); + + try ht.set("hi", "hello"); + try ht.set("hey", "hello"); + try ht.set("python", "Yo, I love Python"); + + try std.testing.expect(ht.get("hi") != null); + try std.testing.expectEqualSlices(u8, ht.get("hi").?, "hello"); + + try std.testing.expect(ht.get("python") != null); + try std.testing.expectEqualSlices(u8, ht.get("python").?, "Yo, I love Python"); + + try std.testing.expect(ht.get("missing") == null); +} diff --git a/zig/src/main.zig b/zig/src/main.zig new file mode 100644 index 0000000..292fcac --- /dev/null +++ b/zig/src/main.zig @@ -0,0 +1,72 @@ +const std = @import("std"); +const chatbot = @import("chatbot.zig"); + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + + // Zig 0.15 I/O: explicit buffer management + var stdout_buf: [4096]u8 = undefined; + var stdout = std.fs.File.stdout().writer(&stdout_buf); + + var stdin_buf: [4096]u8 = undefined; + var stdin = std.fs.File.stdin().reader(&stdin_buf); + + try stdout.interface.print("$ Chatbot v1.0.0!\n", .{}); + try stdout.interface.flush(); + + // Create hash table + var ht = try chatbot.HashTable.create(allocator, 65536); + defer ht.destroy(); + + // Populate responses + try ht.set("hi", "hello"); + try ht.set("hey", "hello"); + try ht.set("hear", "What you heard is right"); + try ht.set("python", "Yo, I love Python"); + try ht.set("light", "I like light"); + try ht.set("What", "It is clear, ain't it?"); + + while (true) { + try stdout.interface.print("\n$ (user) ", .{}); + try stdout.interface.flush(); + + // Read line using Zig 0.15 delimiter API + const line = stdin.interface.takeDelimiterExclusive('\n') catch |err| { + switch (err) { + error.EndOfStream => break, + error.StreamTooLong => { + // Line too long, skip it + continue; + }, + else => return err, + } + }; + + const trimmed = std.mem.trim(u8, line, " \t\r\n"); + if (trimmed.len == 0) break; + + var word_iter = std.mem.tokenizeAny(u8, trimmed, chatbot.SeparatorChars); + + while (word_iter.next()) |word| { + const lower_word = try allocator.alloc(u8, word.len); + defer allocator.free(lower_word); + + for (word, 0..) |ch, i| { + lower_word[i] = std.ascii.toLower(ch); + } + + if (std.mem.eql(u8, lower_word, "exit")) { + return; + } + + if (ht.get(lower_word)) |response| { + try stdout.interface.print("\n$ (chatbot) {s}\n", .{response}); + } else { + try stdout.interface.print("\n$ (chatbot) {s}\n", .{"Sorry, I don't know what to say about that"}); + } + try stdout.interface.flush(); + } + } +}