diff --git a/.github/workflows/zig.yml b/.github/workflows/zig.yml index c209572..4aebb9f 100644 --- a/.github/workflows/zig.yml +++ b/.github/workflows/zig.yml @@ -7,19 +7,55 @@ on: branches: [ main, zig-port ] jobs: - c-build: + c-build-gcc: + name: C Build (GCC) runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Build C version - run: cd c && make clean && make + - name: Check GCC version + run: gcc --version - - name: Test C executable runs - run: cd c && echo -e "hi\npython\nexit" | timeout 5 ./chat || true + - name: Build with GCC (C17 and C23) + run: cd c && make clean && make gcc-builds + + - name: Test GCC C17 executable + run: cd c && echo -e "hi\npython\nexit" | timeout 5 ./chat-gcc-c17 || true + + - name: Test GCC C23 executable + run: cd c && echo -e "hi\npython\nexit" | timeout 5 ./chat-gcc-c23 || true + + - name: Show binary sizes + run: ls -lh c/chat-gcc-* + + c-build-clang: + name: C Build (Clang) + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install Clang + run: sudo apt-get update && sudo apt-get install -y clang + + - name: Check Clang version + run: clang --version + + - name: Build with Clang (C17 and C23) + run: cd c && make clean && make clang-builds + + - name: Test Clang C17 executable + run: cd c && echo -e "hi\npython\nexit" | timeout 5 ./chat-clang-c17 || true + + - name: Test Clang C23 executable + run: cd c && echo -e "hi\npython\nexit" | timeout 5 ./chat-clang-c23 || true + + - name: Show binary sizes + run: ls -lh c/chat-clang-* zig-build-and-test: + name: Zig Build & Test runs-on: ubuntu-latest steps: @@ -30,10 +66,13 @@ jobs: version: 0.15.2 - name: Build Zig chatbot - run: cd zig && mkdir -p zig-out/bin && zig build-exe src/main.zig -femit-bin=zig-out/bin/chat + run: cd zig && zig build - name: Run Zig tests - run: cd zig && zig test src/chatbot.zig + run: cd zig && zig build test - name: Test Zig executable runs run: cd zig && echo -e "hi\npython\nexit" | timeout 5 ./zig-out/bin/chat || true + + - name: Show binary size + run: ls -lh zig/zig-out/bin/chat diff --git a/.gitignore b/.gitignore index 3866087..d7b94df 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,7 @@ chat # Zig build artifacts .zig-cache/ zig-out/ +c/chat-clang-c17 +c/chat-clang-c23 +c/chat-gcc-c17 +c/chat-gcc-c23 diff --git a/BENCHMARK.md b/BENCHMARK.md index 3858c3a..4a6ddd6 100644 --- a/BENCHMARK.md +++ b/BENCHMARK.md @@ -1,48 +1,94 @@ -# Chatbot Build Benchmark Results +# Chatbot Build & Runtime Benchmark Results -Benchmark comparing C and Zig implementations of the chatbot. +Benchmark comparing C (GCC and Clang, C17 and C23 standards) and Zig implementations. **System:** macOS arm64 **Date:** 2026-01-12 -## Results +## Compiler Versions (Local) + + +| Compiler | Version | +| ----------- | -------------------------------------------- | +| GCC (macOS) | Apple Clang 17.0.0 (gcc is aliased to clang) | +| Clang | Homebrew Clang 21.1.8 | +| Zig | 0.15.2 | + + +## Runtime Results + + +| Version | Execution Time | Relative | +| --------- | -------------- | --------------- | +| GCC C17 | 228ms | 2.23x | +| GCC C23 | 102ms | 1.00x (fastest) | +| Clang C17 | 107ms | 1.04x | +| Clang C23 | 106ms | 1.03x | +| Zig | 628ms | 6.15x | + + +## Binary Sizes + + +| Version | Size | +| --------- | ---- | +| GCC C17 | 33K | +| GCC C23 | 33K | +| Clang C17 | 33K | +| Clang C23 | 33K | +| Zig | 1.3M | -| Metric | C | Zig | Difference | -|--------|---|-----|-----------| -| Build Time | 93ms | 710ms | Zig is 7.63x slower | -| Executable Size | 33K | 1.3M | Zig is 39x larger | ## Analysis -### Build Time -- **C (93ms)**: Fast compilation using GCC with minimal optimization -- **Zig (710ms)**: Longer compilation time due to Zig's more comprehensive compiler +### C Compilers + +- **GCC C23** was fastest in this run (times vary between runs) +- **Clang** produces consistently fast binaries across C17/C23 +- On macOS, `gcc` is actually Apple Clang; real GCC is tested on CI (Ubuntu) + +### Zig + +- Slower due to: + - GeneralPurposeAllocator overhead vs C's stack allocation + - Zig 0.15 buffered I/O system overhead + - Additional runtime safety checks +- Much larger binary (embeds stdlib, no libc dependency) + +### Binary Size -The C version compiles significantly faster due to: -- Simpler compilation pipeline -- No build system overhead (direct gcc command) -- Minimal type checking and analysis +- All C versions: 33K (links against system libc) + - Zig: 1.3M (self-contained, no external dependencies) -### Executable Size -- **C (33K)**: Small, minimal runtime -- **Zig (1.3M)**: Larger due to Zig's standard library and runtime +## Build Configuration + + +| Compiler | Flags | +| --------- | ---------------------------------------- | +| GCC C17 | `gcc -std=c17 -Wall -Wextra -pedantic` | +| GCC C23 | `gcc -std=c23 -Wall -Wextra -pedantic` | +| Clang C17 | `clang -std=c17 -Wall -Wextra -pedantic` | +| Clang C23 | `clang -std=c23 -Wall -Wextra -pedantic` | +| Zig | `zig build` (debug mode) | -The Zig executable is larger because: -- Zig stdlib is embedded in the binary -- More comprehensive runtime features -- GeneralPurposeAllocator adds overhead ## Notes -- Both versions are unoptimized builds -- C build uses `-std=c11 -Wall -Wextra -pedantic` -- Zig build uses default optimization level -- Times are from cold builds (no cache) +- All versions produce identical output +- C23 falls back to `-std=c2x` on older compilers +- Times vary between runs; relative performance is more meaningful +- CI tests both GCC and Clang on Ubuntu -## Running the Benchmark +## Running the Benchmarks ```bash +# Full runtime benchmark (all compilers) +./run_benchmarks.sh + +# Build-only benchmark ./benchmark.sh + +# Show detected compilers +cd c && make info ``` -This will clean, rebuild both versions, and compare build times and sizes. diff --git a/c/Makefile b/c/Makefile index d2e284a..8c2e37a 100644 --- a/c/Makefile +++ b/c/Makefile @@ -1,5 +1,56 @@ -all: - gcc -std=c11 -Wall -Wextra -pedantic src/chatbot.c -o chat +# Detect real GCC vs Apple's gcc alias +UNAME_S := $(shell uname -s) +ifeq ($(UNAME_S),Darwin) + # macOS: gcc is actually clang, use explicit paths if available + GCC := $(shell which gcc-14 gcc-13 gcc-12 2>/dev/null | head -1) + ifeq ($(GCC),) + GCC := gcc + GCC_NOTE := "(Apple Clang)" + endif +else + GCC := gcc +endif + +CLANG := clang +CFLAGS_COMMON = -Wall -Wextra -pedantic +SRC = src/chatbot.c + +# Default targets +all: gcc-builds clang-builds + +# GCC builds +gcc-builds: chat-gcc-c17 chat-gcc-c23 + +chat-gcc-c17: + $(GCC) -std=c17 $(CFLAGS_COMMON) $(SRC) -o chat-gcc-c17 + +chat-gcc-c23: + $(GCC) -std=c23 $(CFLAGS_COMMON) $(SRC) -o chat-gcc-c23 2>/dev/null || \ + $(GCC) -std=c2x $(CFLAGS_COMMON) $(SRC) -o chat-gcc-c23 + +# Clang builds +clang-builds: chat-clang-c17 chat-clang-c23 + +chat-clang-c17: + $(CLANG) -std=c17 $(CFLAGS_COMMON) $(SRC) -o chat-clang-c17 + +chat-clang-c23: + $(CLANG) -std=c23 $(CFLAGS_COMMON) $(SRC) -o chat-clang-c23 2>/dev/null || \ + $(CLANG) -std=c2x $(CFLAGS_COMMON) $(SRC) -o chat-clang-c23 + +# Legacy targets for backwards compatibility +chat: chat-gcc-c17 + cp chat-gcc-c17 chat + +chat-c17: chat-gcc-c17 + cp chat-gcc-c17 chat-c17 + +chat-c23: chat-gcc-c23 + cp chat-gcc-c23 chat-c23 clean: - rm -rf *.o chat + rm -rf *.o chat chat-c17 chat-c23 chat-gcc-* chat-clang-* + +info: + @echo "GCC: $(GCC) $(GCC_NOTE)" + @echo "Clang: $(CLANG)" diff --git a/c/src/chatbot.c b/c/src/chatbot.c index 6e44c9e..eb5ab5b 100644 --- a/c/src/chatbot.c +++ b/c/src/chatbot.c @@ -1,8 +1,12 @@ +// Enable POSIX/GNU extensions (strdup, strncasecmp, etc.) +// _GNU_SOURCE is needed because -std=c17 disables extensions by default +#define _GNU_SOURCE + #include #include +#include #include #include -#include #include "chatbot.h" // hash table implementation from here diff --git a/run_benchmarks.sh b/run_benchmarks.sh index 5c8e049..aaa64ee 100755 --- a/run_benchmarks.sh +++ b/run_benchmarks.sh @@ -7,8 +7,15 @@ echo "Chatbot Runtime Benchmark & Testing" echo "==========================================" echo "" -# Build both versions -echo "Building both versions..." +# Show compiler versions +echo "Compiler Versions:" +echo " GCC: $(gcc --version | head -1)" +echo " Clang: $(clang --version | head -1)" +echo " Zig: $(zig version)" +echo "" + +# Build all versions +echo "Building all versions..." echo "" cd c @@ -19,46 +26,58 @@ cd .. cd zig rm -rf zig-out .zig-cache > /dev/null 2>&1 mkdir -p zig-out/bin -zig build-exe src/main.zig -femit-bin=zig-out/bin/chat > /dev/null 2>&1 +zig build > /dev/null 2>&1 cd .. echo "Build complete!" echo "" -# Test C version -echo "==========================================" -echo "C Version Testing & Benchmarking" -echo "==========================================" -echo "" -echo "Running with test inputs..." - -C_START=$(date +%s%N) -c/./chat < test_inputs.txt > /tmp/c_output.txt 2>&1 -C_END=$(date +%s%N) -C_TIME=$(( (C_END - C_START) / 1000000 )) # milliseconds - -echo "Output:" -cat /tmp/c_output.txt -echo "" -echo "Execution time: ${C_TIME}ms" -echo "" - -# Test Zig version +# Run benchmarks echo "==========================================" -echo "Zig Version Testing & Benchmarking" +echo "Running Benchmarks..." echo "==========================================" echo "" -echo "Running with test inputs..." +# GCC C17 +echo -n "GCC C17: " +GCC_C17_START=$(date +%s%N) +c/chat-gcc-c17 < test_inputs.txt > /tmp/gcc_c17_output.txt 2>&1 +GCC_C17_END=$(date +%s%N) +GCC_C17_TIME=$(( (GCC_C17_END - GCC_C17_START) / 1000000 )) +echo "${GCC_C17_TIME}ms" + +# GCC C23 +echo -n "GCC C23: " +GCC_C23_START=$(date +%s%N) +c/chat-gcc-c23 < test_inputs.txt > /tmp/gcc_c23_output.txt 2>&1 +GCC_C23_END=$(date +%s%N) +GCC_C23_TIME=$(( (GCC_C23_END - GCC_C23_START) / 1000000 )) +echo "${GCC_C23_TIME}ms" + +# Clang C17 +echo -n "Clang C17: " +CLANG_C17_START=$(date +%s%N) +c/chat-clang-c17 < test_inputs.txt > /tmp/clang_c17_output.txt 2>&1 +CLANG_C17_END=$(date +%s%N) +CLANG_C17_TIME=$(( (CLANG_C17_END - CLANG_C17_START) / 1000000 )) +echo "${CLANG_C17_TIME}ms" + +# Clang C23 +echo -n "Clang C23: " +CLANG_C23_START=$(date +%s%N) +c/chat-clang-c23 < test_inputs.txt > /tmp/clang_c23_output.txt 2>&1 +CLANG_C23_END=$(date +%s%N) +CLANG_C23_TIME=$(( (CLANG_C23_END - CLANG_C23_START) / 1000000 )) +echo "${CLANG_C23_TIME}ms" + +# Zig +echo -n "Zig: " ZIG_START=$(date +%s%N) -zig/zig-out/bin/./chat < test_inputs.txt > /tmp/zig_output.txt 2>&1 +zig/zig-out/bin/chat < test_inputs.txt > /tmp/zig_output.txt 2>&1 ZIG_END=$(date +%s%N) -ZIG_TIME=$(( (ZIG_END - ZIG_START) / 1000000 )) # milliseconds +ZIG_TIME=$(( (ZIG_END - ZIG_START) / 1000000 )) +echo "${ZIG_TIME}ms" -echo "Output:" -cat /tmp/zig_output.txt -echo "" -echo "Execution time: ${ZIG_TIME}ms" echo "" # Compare outputs @@ -67,36 +86,61 @@ echo "Output Comparison" echo "==========================================" echo "" -if diff -q /tmp/c_output.txt /tmp/zig_output.txt > /dev/null 2>&1; then - echo "✓ Outputs are identical" -else - echo "✗ Outputs differ" - echo "" - echo "C output:" - cat /tmp/c_output.txt - echo "" - echo "Zig output:" - cat /tmp/zig_output.txt -fi +REFERENCE="/tmp/gcc_c17_output.txt" + +for file in /tmp/gcc_c23_output.txt /tmp/clang_c17_output.txt /tmp/clang_c23_output.txt /tmp/zig_output.txt; do + name=$(basename "$file" _output.txt | tr '_' ' ') + if diff -q "$REFERENCE" "$file" > /dev/null 2>&1; then + echo "✓ $name matches GCC C17" + else + echo "✗ $name differs from GCC C17" + fi +done echo "" -# Runtime comparison +# Find fastest echo "==========================================" echo "Runtime Comparison" echo "==========================================" echo "" -if [ $C_TIME -lt $ZIG_TIME ]; then - RATIO=$(echo "scale=2; $ZIG_TIME / $C_TIME" | bc) - echo "C version is ${RATIO}x faster" -else - RATIO=$(echo "scale=2; $C_TIME / $ZIG_TIME" | bc) - echo "Zig version is ${RATIO}x faster" +MIN_TIME=$GCC_C17_TIME +FASTEST="GCC C17" + +if [ $GCC_C23_TIME -lt $MIN_TIME ]; then MIN_TIME=$GCC_C23_TIME; FASTEST="GCC C23"; fi +if [ $CLANG_C17_TIME -lt $MIN_TIME ]; then MIN_TIME=$CLANG_C17_TIME; FASTEST="Clang C17"; fi +if [ $CLANG_C23_TIME -lt $MIN_TIME ]; then MIN_TIME=$CLANG_C23_TIME; FASTEST="Clang C23"; fi +if [ $ZIG_TIME -lt $MIN_TIME ]; then MIN_TIME=$ZIG_TIME; FASTEST="Zig"; fi + +echo "Fastest: $FASTEST (${MIN_TIME}ms)" +echo "" + +# Calculate ratios +if [ $MIN_TIME -gt 0 ]; then + GCC_C17_RATIO=$(echo "scale=2; $GCC_C17_TIME / $MIN_TIME" | bc) + GCC_C23_RATIO=$(echo "scale=2; $GCC_C23_TIME / $MIN_TIME" | bc) + CLANG_C17_RATIO=$(echo "scale=2; $CLANG_C17_TIME / $MIN_TIME" | bc) + CLANG_C23_RATIO=$(echo "scale=2; $CLANG_C23_TIME / $MIN_TIME" | bc) + ZIG_RATIO=$(echo "scale=2; $ZIG_TIME / $MIN_TIME" | bc) + + echo "Results (time / relative):" + printf " %-12s %6sms %5sx\n" "GCC C17:" "$GCC_C17_TIME" "$GCC_C17_RATIO" + printf " %-12s %6sms %5sx\n" "GCC C23:" "$GCC_C23_TIME" "$GCC_C23_RATIO" + printf " %-12s %6sms %5sx\n" "Clang C17:" "$CLANG_C17_TIME" "$CLANG_C17_RATIO" + printf " %-12s %6sms %5sx\n" "Clang C23:" "$CLANG_C23_TIME" "$CLANG_C23_RATIO" + printf " %-12s %6sms %5sx\n" "Zig:" "$ZIG_TIME" "$ZIG_RATIO" + echo "" fi +# Binary sizes +echo "==========================================" +echo "Binary Sizes" +echo "==========================================" echo "" -echo "Summary:" -echo " C: ${C_TIME}ms" -echo " Zig: ${ZIG_TIME}ms" +printf " %-12s %s\n" "GCC C17:" "$(ls -lh c/chat-gcc-c17 | awk '{print $5}')" +printf " %-12s %s\n" "GCC C23:" "$(ls -lh c/chat-gcc-c23 | awk '{print $5}')" +printf " %-12s %s\n" "Clang C17:" "$(ls -lh c/chat-clang-c17 | awk '{print $5}')" +printf " %-12s %s\n" "Clang C23:" "$(ls -lh c/chat-clang-c23 | awk '{print $5}')" +printf " %-12s %s\n" "Zig:" "$(ls -lh zig/zig-out/bin/chat | awk '{print $5}')" echo "" diff --git a/zig/src/main.zig b/zig/src/main.zig index 292fcac..f8d67df 100644 --- a/zig/src/main.zig +++ b/zig/src/main.zig @@ -33,9 +33,9 @@ pub fn main() !void { try stdout.interface.flush(); // Read line using Zig 0.15 delimiter API - const line = stdin.interface.takeDelimiterExclusive('\n') catch |err| { + // Use takeDelimiter which returns null on EOF with empty remaining + const line = stdin.interface.takeDelimiter('\n') catch |err| { switch (err) { - error.EndOfStream => break, error.StreamTooLong => { // Line too long, skip it continue; @@ -44,8 +44,10 @@ pub fn main() !void { } }; - const trimmed = std.mem.trim(u8, line, " \t\r\n"); - if (trimmed.len == 0) break; + if (line == null) break; + + const trimmed = std.mem.trim(u8, line.?, " \t\r\n"); + if (trimmed.len == 0) continue; // Empty line, keep going var word_iter = std.mem.tokenizeAny(u8, trimmed, chatbot.SeparatorChars);