Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 105 additions & 32 deletions benches/cli/bench_cli.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@
# CLI Calculator Benchmarks — Correctness & Performance
#
# Compares decimo against bc and python3 on every expression:
# 1. Correctness — first 15 significant digits must agree
# 1. Correctness — all significant digits must agree at full precision
# (minus 1 guard digit for last-digit rounding differences)
# 2. Performance — average wall-clock latency over $ITERATIONS runs
#
# bc is the golden reference (mismatches fail the script).
# python3/mpmath is informational (mismatches are shown but do not fail).
#
# Usage:
# bash benches/cli/bench_cli.sh
# ITERATIONS=20 bash benches/cli/bench_cli.sh
Expand All @@ -32,15 +36,28 @@ if ! [[ -x "$BINARY" ]]; then
exit 1
fi

HAS_BC=false; command -v bc &>/dev/null && HAS_BC=true
if ! command -v bc &>/dev/null; then
echo "Error: bc is required (golden reference) but not found."
exit 1
fi
HAS_PY=false; command -v python3 &>/dev/null && HAS_PY=true

# ── Counters ───────────────────────────────────────────────────────────────

COMPARISONS=0
MATCHES=0
MISMATCHES=0
ERRORS=0
# Decimo errors — any decimo failure is fatal.
DECIMO_ERRORS=0

# bc is the golden reference — mismatches here fail the script.
BC_COMPARISONS=0
BC_MATCHES=0
BC_MISMATCHES=0
BC_ERRORS=0

# python3 is informational — mismatches are reported but do not fail.
PY_COMPARISONS=0
PY_MATCHES=0
PY_MISMATCHES=0
PY_ERRORS=0

# ── Helpers ────────────────────────────────────────────────────────────────

Expand All @@ -67,9 +84,9 @@ elapsed_ms() {
}

# Extract a canonical comparison key from a numeric string:
# adjusted base-10 exponent + first 15 significant digits.
# adjusted base-10 exponent + ALL significant digits.
# This ensures values that differ only by exponent (e.g. 1E+10 vs 1E+11)
# are correctly detected as a MISMATCH.
# are correctly detected as a MISMATCH, and full-precision agreement is verified.
sig_digits() {
local s="${1#-}" # strip sign; check_match handles sign separately
local explicit_exp=0
Expand Down Expand Up @@ -112,10 +129,14 @@ sig_digits() {
adjusted_exp=$(( explicit_exp - first_nonzero - 1 ))
fi

echo "${adjusted_exp}:$(echo "$digits" | cut -c1-15)"
echo "${adjusted_exp}:${digits}"
}

# Compare two results by leading significant digits.
# Compare two results by all significant digits.
# Both keys are "adjusted_exp:digits". The exponents must match exactly.
# The digit strings are compared up to the length of the shorter one,
# minus 1 guard digit (the very last digit often differs between tools
# due to rounding vs truncation — standard in MP arithmetic).
check_match() {
local a="$1" b="$2"
local sign_a="" sign_b=""
Expand All @@ -125,7 +146,26 @@ check_match() {
local sa sb
sa=$(sig_digits "$a")
sb=$(sig_digits "$b")
if [[ "$sa" == "$sb" ]]; then echo "MATCH"; else echo "MISMATCH"; fi

# Split into exponent and digit parts
local exp_a="${sa%%:*}" digits_a="${sa#*:}"
local exp_b="${sb%%:*}" digits_b="${sb#*:}"

# Exponents must match exactly
if [[ "$exp_a" != "$exp_b" ]]; then echo "MISMATCH"; return 0; fi

# Compare digits up to (shorter length - 1) to allow last-digit rounding
local len_a=${#digits_a} len_b=${#digits_b}
local min_len=$len_a
if (( len_b < min_len )); then min_len=$len_b; fi
local cmp_len=$(( min_len - 1 ))
if (( cmp_len < 1 )); then cmp_len=1; fi

if [[ "${digits_a:0:$cmp_len}" == "${digits_b:0:$cmp_len}" ]]; then
echo "MATCH"
else
echo "MISMATCH"
fi
return 0
}

Expand All @@ -134,17 +174,30 @@ preview() {
if (( ${#1} > PREVIEW )); then echo "${1:0:$PREVIEW}..."; else echo "$1"; fi
}

# Record a comparison result.
# Record a comparison result for a specific tool.
record() {
local tag="$1"
if [[ "$tag" == "ERROR" ]]; then
ERRORS=$((ERRORS + 1))
local tool="$1" tag="$2"
if [[ "$tool" == "bc" ]]; then
if [[ "$tag" == "ERROR" ]]; then
BC_ERRORS=$((BC_ERRORS + 1))
else
BC_COMPARISONS=$((BC_COMPARISONS + 1))
if [[ "$tag" == "MATCH" ]]; then
BC_MATCHES=$((BC_MATCHES + 1))
else
BC_MISMATCHES=$((BC_MISMATCHES + 1))
fi
fi
else
COMPARISONS=$((COMPARISONS + 1))
if [[ "$tag" == "MATCH" ]]; then
MATCHES=$((MATCHES + 1))
if [[ "$tag" == "ERROR" ]]; then
PY_ERRORS=$((PY_ERRORS + 1))
else
MISMATCHES=$((MISMATCHES + 1))
PY_COMPARISONS=$((PY_COMPARISONS + 1))
if [[ "$tag" == "MATCH" ]]; then
PY_MATCHES=$((PY_MATCHES + 1))
else
PY_MISMATCHES=$((PY_MISMATCHES + 1))
fi
fi
fi
return 0
Expand All @@ -171,13 +224,13 @@ bench_compare() {
d_ms=$(elapsed_ms "$BINARY" "$d_expr" -P "$prec")
printf " %-10s %-38s %8s ms\n" "decimo:" "$(preview "$d_result")" "$d_ms"
if [[ "$d_result" == "ERROR" ]]; then
record "ERROR"
DECIMO_ERRORS=$((DECIMO_ERRORS + 1))
echo ""
return
fi

# ── bc ──
if [[ -n "$bc_expr" ]] && $HAS_BC; then
if [[ -n "$bc_expr" ]]; then
local full_bc="scale=$prec; $bc_expr"
local b_result b_ms tag
# tr -d '\\\n' removes bc's backslash line-continuations
Expand All @@ -189,7 +242,7 @@ bench_compare() {
tag=$(check_match "$d_result" "$b_result")
fi
printf " %-10s %-38s %8s ms %s\n" "bc:" "$(preview "$b_result")" "$b_ms" "$tag"
record "$tag"
record bc "$tag"
fi

# ── python3 ──
Expand All @@ -204,7 +257,7 @@ bench_compare() {
tag=$(check_match "$d_result" "$p_result")
fi
printf " %-10s %-38s %8s ms %s\n" "python3:" "$(preview "$p_result")" "$p_ms" "$tag"
record "$tag"
record py "$tag"
fi

echo ""
Expand Down Expand Up @@ -276,10 +329,12 @@ bench_compare "exp(1)" 50 \
"e(1)" \
"${PY_MP};print(mp.exp(1))"

bench_compare "sin(1)" 50 \
"sin(1)" \
"s(1)" \
"${PY_MP};print(mp.sin(1))"
# NOTE: mpmath diverges from decimo & WolframAlpha at digit ~21 for sin(near-pi).
# See docs/internal_notes.md. Kept here as a reference comparison.
bench_compare "sin(3.1415926535897932384626433833)" 50 \
"sin(3.1415926535897932384626433833)" \
"s(3.1415926535897932384626433833)" \
"${PY_MP};print(mp.sin(mp.mpf('3.1415926535897932384626433833')))"

bench_compare "cos(0)" 50 \
"cos(0)" \
Expand Down Expand Up @@ -359,14 +414,32 @@ echo ""
# ── Summary ────────────────────────────────────────────────────────────────

echo "============================================================"
printf " Summary: %d comparisons — %d MATCH, %d MISMATCH" \
"$COMPARISONS" "$MATCHES" "$MISMATCHES"
if (( ERRORS > 0 )); then
printf ", %d ERROR (tool missing or failed)" "$ERRORS"
if (( DECIMO_ERRORS > 0 )); then
printf " decimo: %d ERROR(s)\n" "$DECIMO_ERRORS"
fi
printf " bc (golden): %d comparisons — %d MATCH, %d MISMATCH" \
"$BC_COMPARISONS" "$BC_MATCHES" "$BC_MISMATCHES"
if (( BC_ERRORS > 0 )); then
printf ", %d ERROR" "$BC_ERRORS"
fi
echo ""
printf " python3 (ref): %d comparisons — %d MATCH, %d MISMATCH" \
"$PY_COMPARISONS" "$PY_MATCHES" "$PY_MISMATCHES"
if (( PY_ERRORS > 0 )); then
printf ", %d ERROR" "$PY_ERRORS"
fi
echo ""
echo "============================================================"

if (( MISMATCHES > 0 )); then
if (( DECIMO_ERRORS > 0 )); then
echo "FAIL: decimo evaluation errors detected."
exit 1
fi
if (( BC_MISMATCHES > 0 )); then
echo "FAIL: bc (golden reference) mismatches detected."
exit 1
fi
if (( BC_ERRORS > 0 )); then
echo "FAIL: bc (golden reference) errors detected."
exit 1
fi
1 change: 1 addition & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ This is a list of changes for the Decimo package (formerly DeciMojo).
1. Add **file mode**: use `--file` / `-F` flag to evaluate expressions from a file, one per line (e.g. `decimo -F expressions.dm -P 50`). Comments (`#`), inline comments, and blank lines are skipped. All CLI flags (precision, formatting, rounding) apply to every expression.
1. Add **shell completion** documentation for Bash, Zsh, and Fish (`decimo --completions bash|zsh|fish`).
1. Add **CLI performance benchmarks** (`benches/cli/bench_cli.sh`) comparing correctness and timing against `bc` and `python3` across 47 comparisons — all results match to 15 significant digits; `decimo` is 3–4× faster than `python3 -c`.
1. Add **interactive REPL**: launch with `decimo` (no arguments, TTY attached). Features coloured `decimo>` prompt on stderr, per-line error recovery with caret diagnostics, comment/blank-line skipping, and graceful exit via `exit`, `quit`, or Ctrl-D. All CLI flags (`-P`, `--scientific`, etc.) apply to the REPL session.

### 🦋 Changed in v0.10.0

Expand Down
Loading
Loading