Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 90 additions & 1 deletion sqlite-vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,95 @@ typedef size_t usize;
#define countof(x) (sizeof(x) / sizeof((x)[0]))
#define min(a, b) (((a) <= (b)) ? (a) : (b))

// Locale-independent strtod implementation for parsing JSON floats
// Fixes issue #241: strtod is locale-dependent and breaks with non-C locales
//
// This custom parser always uses '.' as decimal separator regardless of locale.
// Simpler and more portable than strtod_l, with no thread-safety issues.
static double strtod_c(const char *str, char **endptr) {
const char *p = str;
double result = 0.0;
int sign = 1;
int has_digits = 0;

// Skip leading whitespace
while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') {
p++;
}

// Handle optional sign
if (*p == '-') {
sign = -1;
p++;
} else if (*p == '+') {
p++;
}

// Parse integer part
while (*p >= '0' && *p <= '9') {
result = result * 10.0 + (*p - '0');
p++;
has_digits = 1;
}

// Parse fractional part
if (*p == '.') {
double fraction = 0.0;
double divisor = 1.0;
p++;

while (*p >= '0' && *p <= '9') {
fraction = fraction * 10.0 + (*p - '0');
divisor *= 10.0;
p++;
has_digits = 1;
}

result += fraction / divisor;
}

// Parse exponent
if ((*p == 'e' || *p == 'E') && has_digits) {
int exp_sign = 1;
int exponent = 0;
p++;

if (*p == '-') {
exp_sign = -1;
p++;
} else if (*p == '+') {
p++;
}

while (*p >= '0' && *p <= '9') {
exponent = exponent * 10 + (*p - '0');
p++;
}

// Apply exponent using pow() for accuracy
if (exponent > 0) {
double exp_mult = pow(10.0, (double)exponent);
if (exp_sign == 1) {
result *= exp_mult;
} else {
result /= exp_mult;
}
}
}

// Set end pointer
if (endptr) {
*endptr = (char *)(has_digits ? p : str);
}

// Check for overflow/underflow
if (result == HUGE_VAL || result == -HUGE_VAL) {
errno = ERANGE;
}

return sign * result;
}

enum VectorElementType {
// clang-format off
SQLITE_VEC_ELEMENT_TYPE_FLOAT32 = 223 + 0,
Expand Down Expand Up @@ -751,7 +840,7 @@ static int fvec_from_value(sqlite3_value *value, f32 **vector,
char *endptr;

errno = 0;
double result = strtod(ptr, &endptr);
double result = strtod_c(ptr, &endptr);
if ((errno != 0 && result == 0) // some interval error?
|| (errno == ERANGE &&
(result == HUGE_VAL || result == -HUGE_VAL)) // too big / smalls
Expand Down
48 changes: 48 additions & 0 deletions tests/test-loadable.py
Original file line number Diff line number Diff line change
Expand Up @@ -951,6 +951,54 @@ def test_vec0_inserts():
db.execute("insert into txt_pk(txt_id, aaa) values ('b', '[2,2,2,2]')")


def test_vec0_locale_independent():
"""Test that JSON float parsing is locale-independent (issue #241)"""
import locale

db = connect(EXT_PATH)
db.execute("create virtual table v using vec0(embedding float[3])")

# Test with C locale first (baseline)
db.execute("insert into v(rowid, embedding) values (1, '[0.1, 0.2, 0.3]')")

# Try to set a non-C locale that uses comma as decimal separator
# Common locales: fr_FR, de_DE, it_IT, es_ES, pt_BR, etc.
test_locales = ['fr_FR.UTF-8', 'de_DE.UTF-8', 'it_IT.UTF-8', 'C.UTF-8']
locale_set = False
original_locale = locale.setlocale(locale.LC_NUMERIC)

for test_locale in test_locales:
try:
locale.setlocale(locale.LC_NUMERIC, test_locale)
locale_set = True
break
except locale.Error:
continue

try:
# Even with non-C locale, JSON parsing should work (using dot as decimal separator)
# Before the fix, this would fail in French/German/etc locales
db.execute("insert into v(rowid, embedding) values (2, '[0.4, 0.5, 0.6]')")

# Verify the data was inserted correctly
result = db.execute("select embedding from v where rowid = 2").fetchone()
expected = _f32([0.4, 0.5, 0.6])
assert result[0] == expected, f"Expected {expected}, got {result[0]}"

# Also verify with different decimal values
db.execute("insert into v(rowid, embedding) values (3, '[1.23, 4.56, 7.89]')")
result = db.execute("select embedding from v where rowid = 3").fetchone()
expected = _f32([1.23, 4.56, 7.89])
assert result[0] == expected, f"Expected {expected}, got {result[0]}"

finally:
# Restore original locale
locale.setlocale(locale.LC_NUMERIC, original_locale)

# If we couldn't set a non-C locale, the test still passes (baseline check)
# but we didn't really test the locale-independence


def test_vec0_insert_errors2():
db = connect(EXT_PATH)
db.execute("create virtual table t1 using vec0(aaa float[4], chunk_size=8)")
Expand Down