diff --git a/sqlite-vec.c b/sqlite-vec.c index 3cc802f0..435b89e7 100644 --- a/sqlite-vec.c +++ b/sqlite-vec.c @@ -112,6 +112,95 @@ typedef size_t usize; #define countof(x) (sizeof(x) / sizeof((x)[0])) #define min(a, b) (((a) <= (b)) ? (a) : (b)) +// Locale-independent strtod implementation for parsing JSON floats +// Fixes issue #241: strtod is locale-dependent and breaks with non-C locales +// +// This custom parser always uses '.' as decimal separator regardless of locale. +// Simpler and more portable than strtod_l, with no thread-safety issues. +static double strtod_c(const char *str, char **endptr) { + const char *p = str; + double result = 0.0; + int sign = 1; + int has_digits = 0; + + // Skip leading whitespace + while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') { + p++; + } + + // Handle optional sign + if (*p == '-') { + sign = -1; + p++; + } else if (*p == '+') { + p++; + } + + // Parse integer part + while (*p >= '0' && *p <= '9') { + result = result * 10.0 + (*p - '0'); + p++; + has_digits = 1; + } + + // Parse fractional part + if (*p == '.') { + double fraction = 0.0; + double divisor = 1.0; + p++; + + while (*p >= '0' && *p <= '9') { + fraction = fraction * 10.0 + (*p - '0'); + divisor *= 10.0; + p++; + has_digits = 1; + } + + result += fraction / divisor; + } + + // Parse exponent + if ((*p == 'e' || *p == 'E') && has_digits) { + int exp_sign = 1; + int exponent = 0; + p++; + + if (*p == '-') { + exp_sign = -1; + p++; + } else if (*p == '+') { + p++; + } + + while (*p >= '0' && *p <= '9') { + exponent = exponent * 10 + (*p - '0'); + p++; + } + + // Apply exponent using pow() for accuracy + if (exponent > 0) { + double exp_mult = pow(10.0, (double)exponent); + if (exp_sign == 1) { + result *= exp_mult; + } else { + result /= exp_mult; + } + } + } + + // Set end pointer + if (endptr) { + *endptr = (char *)(has_digits ? p : str); + } + + // Check for overflow/underflow + if (result == HUGE_VAL || result == -HUGE_VAL) { + errno = ERANGE; + } + + return sign * result; +} + enum VectorElementType { // clang-format off SQLITE_VEC_ELEMENT_TYPE_FLOAT32 = 223 + 0, @@ -751,7 +840,7 @@ static int fvec_from_value(sqlite3_value *value, f32 **vector, char *endptr; errno = 0; - double result = strtod(ptr, &endptr); + double result = strtod_c(ptr, &endptr); if ((errno != 0 && result == 0) // some interval error? || (errno == ERANGE && (result == HUGE_VAL || result == -HUGE_VAL)) // too big / smalls diff --git a/tests/test-loadable.py b/tests/test-loadable.py index a8058c9e..eaffae12 100644 --- a/tests/test-loadable.py +++ b/tests/test-loadable.py @@ -951,6 +951,54 @@ def test_vec0_inserts(): db.execute("insert into txt_pk(txt_id, aaa) values ('b', '[2,2,2,2]')") +def test_vec0_locale_independent(): + """Test that JSON float parsing is locale-independent (issue #241)""" + import locale + + db = connect(EXT_PATH) + db.execute("create virtual table v using vec0(embedding float[3])") + + # Test with C locale first (baseline) + db.execute("insert into v(rowid, embedding) values (1, '[0.1, 0.2, 0.3]')") + + # Try to set a non-C locale that uses comma as decimal separator + # Common locales: fr_FR, de_DE, it_IT, es_ES, pt_BR, etc. + test_locales = ['fr_FR.UTF-8', 'de_DE.UTF-8', 'it_IT.UTF-8', 'C.UTF-8'] + locale_set = False + original_locale = locale.setlocale(locale.LC_NUMERIC) + + for test_locale in test_locales: + try: + locale.setlocale(locale.LC_NUMERIC, test_locale) + locale_set = True + break + except locale.Error: + continue + + try: + # Even with non-C locale, JSON parsing should work (using dot as decimal separator) + # Before the fix, this would fail in French/German/etc locales + db.execute("insert into v(rowid, embedding) values (2, '[0.4, 0.5, 0.6]')") + + # Verify the data was inserted correctly + result = db.execute("select embedding from v where rowid = 2").fetchone() + expected = _f32([0.4, 0.5, 0.6]) + assert result[0] == expected, f"Expected {expected}, got {result[0]}" + + # Also verify with different decimal values + db.execute("insert into v(rowid, embedding) values (3, '[1.23, 4.56, 7.89]')") + result = db.execute("select embedding from v where rowid = 3").fetchone() + expected = _f32([1.23, 4.56, 7.89]) + assert result[0] == expected, f"Expected {expected}, got {result[0]}" + + finally: + # Restore original locale + locale.setlocale(locale.LC_NUMERIC, original_locale) + + # If we couldn't set a non-C locale, the test still passes (baseline check) + # but we didn't really test the locale-independence + + def test_vec0_insert_errors2(): db = connect(EXT_PATH) db.execute("create virtual table t1 using vec0(aaa float[4], chunk_size=8)")