From 2ce13b422b3fb0a79c17f610736295638ede9974 Mon Sep 17 00:00:00 2001 From: "Midhun K.V." Date: Thu, 20 Nov 2025 08:20:17 +0200 Subject: [PATCH 01/12] Add docker-demo with sheetreader extension test --- docker-demo/Dockerfile | 25 ++++++++++++ docker-demo/README.md | 72 ++++++++++++++++++++++++++++++++++ docker-demo/demo.sql | 46 ++++++++++++++++++++++ docker-demo/docker-compose.yml | 13 ++++++ 4 files changed, 156 insertions(+) create mode 100644 docker-demo/Dockerfile create mode 100644 docker-demo/README.md create mode 100644 docker-demo/demo.sql create mode 100644 docker-demo/docker-compose.yml diff --git a/docker-demo/Dockerfile b/docker-demo/Dockerfile new file mode 100644 index 0000000..fa841c4 --- /dev/null +++ b/docker-demo/Dockerfile @@ -0,0 +1,25 @@ +# Use Ubuntu as base image +FROM ubuntu:22.04 + +# Install dependencies +RUN apt-get update && apt-get install -y \ + wget \ + unzip \ + && rm -rf /var/lib/apt/lists/* + +# Download and install DuckDB +RUN wget https://github.com/duckdb/duckdb/releases/download/v1.1.3/duckdb_cli-linux-amd64.zip \ + && unzip duckdb_cli-linux-amd64.zip \ + && mv duckdb /usr/local/bin/ \ + && chmod +x /usr/local/bin/duckdb \ + && rm duckdb_cli-linux-amd64.zip + +# Create working directory +WORKDIR /workspace + +# Copy the Excel file and demo script +COPY test.xlsx /workspace/ +COPY demo.sql /workspace/ + +# Set the entrypoint to bash for interactive use +CMD ["/bin/bash"] diff --git a/docker-demo/README.md b/docker-demo/README.md new file mode 100644 index 0000000..6123eb4 --- /dev/null +++ b/docker-demo/README.md @@ -0,0 +1,72 @@ +# SheetReader DuckDB Docker Demo + +Demo of the **sheetreader-duckdb** extension running in a Docker container to safely query Excel files with SQL. + +## Prerequisites + +- Docker Desktop must be running + +## Quick Start (Interactive Mode) + +### Step 1: Navigate to the demo directory +```bash +cd c:\Users\mithu\OneDrive\Desktop\SA_Harry\SheetReader\docker-demo +``` + +### Step 2: Build and start DuckDB (first time only) +```bash +docker-compose build +``` + +### Step 3: Start DuckDB +```bash +docker-compose run --rm duckdb +``` + +### Step 4: Inside DuckDB, run SQL commands +```sql +-- Install and load the extension +INSTALL sheetreader FROM community; +LOAD sheetreader; + +-- Query your Excel file +SELECT * FROM sheetreader('test.xlsx'); + +-- Get statistics +SELECT + MIN(Numeric0) as min_value, + MAX(Numeric0) as max_value, + AVG(Numeric0) as avg_value, + SUM(Numeric0) as sum_value +FROM sheetreader('test.xlsx'); + +-- Create a table +CREATE TABLE excel_data AS FROM sheetreader('test.xlsx'); + +-- Filter data +SELECT * FROM excel_data WHERE Numeric0 > 50; +``` + +### Step 5: Exit when done +``` +.exit +``` + +--- + +## Run the Full Demo Script (Automated) + +To run all queries automatically: + +```bash +docker-compose run --rm duckdb -init demo.sql +``` + +--- + +## Files + +- **Dockerfile** - Container setup with DuckDB +- **docker-compose.yml** - Docker configuration (includes volume mounts) +- **demo.sql** - Demo SQL script +- **test.xlsx** - Sample Excel file with 5 random numbers diff --git a/docker-demo/demo.sql b/docker-demo/demo.sql new file mode 100644 index 0000000..199545e --- /dev/null +++ b/docker-demo/demo.sql @@ -0,0 +1,46 @@ +-- SheetReader DuckDB Extension Demo +-- This script demonstrates how to use the sheetreader extension to query Excel files + +-- Step 1: Install the sheetreader extension from community extensions +INSTALL sheetreader FROM community; + +-- Step 2: Load the extension +LOAD sheetreader; + +-- Step 3: Query the Excel file directly +.print '=== Reading test.xlsx with sheetreader ===' +SELECT * FROM sheetreader('test.xlsx'); + +-- Step 4: Get row count +.print '' +.print '=== Row count ===' +SELECT COUNT(*) as total_rows FROM sheetreader('test.xlsx'); + +-- Step 5: Calculate statistics on the data +.print '' +.print '=== Statistics ===' +SELECT + MIN(Numeric0) as min_value, + MAX(Numeric0) as max_value, + AVG(Numeric0) as avg_value, + SUM(Numeric0) as sum_value +FROM sheetreader('test.xlsx'); + +-- Step 6: Create a table from the Excel data +.print '' +.print '=== Creating table from Excel data ===' +CREATE TABLE excel_data AS +FROM sheetreader('test.xlsx'); + +-- Step 7: Query the created table +.print '' +.print '=== Querying the created table ===' +SELECT * FROM excel_data; + +-- Step 8: Filter data (example: values greater than 50) +.print '' +.print '=== Filtering values > 50 ===' +SELECT * FROM excel_data WHERE Numeric0 > 50; + +.print '' +.print '=== Demo completed successfully! ===' diff --git a/docker-demo/docker-compose.yml b/docker-demo/docker-compose.yml new file mode 100644 index 0000000..d3b5124 --- /dev/null +++ b/docker-demo/docker-compose.yml @@ -0,0 +1,13 @@ +version: '3.8' + +services: + duckdb: + build: . + container_name: sheetreader-demo + volumes: + - ./test.xlsx:/workspace/test.xlsx + - ./demo.sql:/workspace/demo.sql + working_dir: /workspace + stdin_open: true + tty: true + entrypoint: [ "duckdb" ] From bf8d472c5885d10b6c5213c19596cadbe76585a7 Mon Sep 17 00:00:00 2001 From: "Midhun K.V." Date: Thu, 20 Nov 2025 09:25:36 +0200 Subject: [PATCH 02/12] Update extension API for DuckDB v1.4.0 compatibility - Migrated to ExtensionLoader API (LoadInternal, DUCKDB_CPP_EXTENSION_ENTRY) - Updated docker-demo to use DuckDB v1.4.2 - Added todo.md for Ubuntu verification Addresses GitHub issue #63 --- docker-demo/todo.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 docker-demo/todo.md diff --git a/docker-demo/todo.md b/docker-demo/todo.md new file mode 100644 index 0000000..4b7473e --- /dev/null +++ b/docker-demo/todo.md @@ -0,0 +1,36 @@ +# Ubuntu Verification Tasks + +## Context +The `sheetreader-duckdb` extension has been updated for DuckDB v1.4.0 compatibility. +The code changes are applied in `src/` (migrated to `ExtensionLoader` API). +Verification is required on an Ubuntu machine. + +## Tasks + +1. **Install Dependencies** + Ensure the following packages are installed: + ```bash + sudo apt-get update && sudo apt-get install -y git cmake build-essential ninja-build libssl-dev python3-dev + ``` + +2. **Build Extension** + Build the extension from source using `ninja`: + ```bash + # In the sheetreader-duckdb directory + GEN=ninja make + ``` + +3. **Verify Functionality** + Run a test query using the built DuckDB binary (which has the extension pre-loaded): + + ```bash + # Create a simple test script + echo "SELECT * FROM sheetreader('docker-demo/test.xlsx');" > test_verification.sql + + # Run it with the built binary + ./build/release/duckdb < test_verification.sql + ``` + +4. **Confirm Success** + - Verify that the output shows the contents of the Excel file (Numeric0 column with values). + - Confirm that the extension builds and runs correctly with DuckDB v1.4.0+ API. From 3cf267669ec8ce039e5eaa895eef808789592b84 Mon Sep 17 00:00:00 2001 From: "Midhun K.V." Date: Thu, 20 Nov 2025 07:34:23 +0200 Subject: [PATCH 03/12] Implemented necessary changes. Test pending --- docker-demo/Dockerfile | 2 +- docker-demo/docker-compose.yml | 13 ---------- src/include/sheetreader_extension.hpp | 2 +- src/sheetreader_extension.cpp | 22 +++++----------- todo.md | 36 +++++++++++++++++++++++++++ 5 files changed, 44 insertions(+), 31 deletions(-) delete mode 100644 docker-demo/docker-compose.yml create mode 100644 todo.md diff --git a/docker-demo/Dockerfile b/docker-demo/Dockerfile index fa841c4..db68a0b 100644 --- a/docker-demo/Dockerfile +++ b/docker-demo/Dockerfile @@ -8,7 +8,7 @@ RUN apt-get update && apt-get install -y \ && rm -rf /var/lib/apt/lists/* # Download and install DuckDB -RUN wget https://github.com/duckdb/duckdb/releases/download/v1.1.3/duckdb_cli-linux-amd64.zip \ +RUN wget https://github.com/duckdb/duckdb/releases/download/v1.4.2/duckdb_cli-linux-amd64.zip \ && unzip duckdb_cli-linux-amd64.zip \ && mv duckdb /usr/local/bin/ \ && chmod +x /usr/local/bin/duckdb \ diff --git a/docker-demo/docker-compose.yml b/docker-demo/docker-compose.yml deleted file mode 100644 index d3b5124..0000000 --- a/docker-demo/docker-compose.yml +++ /dev/null @@ -1,13 +0,0 @@ -version: '3.8' - -services: - duckdb: - build: . - container_name: sheetreader-demo - volumes: - - ./test.xlsx:/workspace/test.xlsx - - ./demo.sql:/workspace/demo.sql - working_dir: /workspace - stdin_open: true - tty: true - entrypoint: [ "duckdb" ] diff --git a/src/include/sheetreader_extension.hpp b/src/include/sheetreader_extension.hpp index 45cda24..ff7d9ba 100644 --- a/src/include/sheetreader_extension.hpp +++ b/src/include/sheetreader_extension.hpp @@ -14,7 +14,7 @@ namespace duckdb { class SheetreaderExtension : public Extension { public: - void Load(DuckDB &db) override; + void Load(ExtensionLoader &loader) override; std::string Name() override; }; diff --git a/src/sheetreader_extension.cpp b/src/sheetreader_extension.cpp index d2a55bf..0bc503f 100644 --- a/src/sheetreader_extension.cpp +++ b/src/sheetreader_extension.cpp @@ -25,7 +25,6 @@ #include "duckdb/common/exception.hpp" #include "duckdb/common/string_util.hpp" #include "duckdb/function/table_function.hpp" -#include "duckdb/main/extension_util.hpp" #include "sheetreader_extension.hpp" #include @@ -940,7 +939,7 @@ inline unique_ptr SheetreaderBindFun(ClientContext &context, Table return std::move(bind_data); } -static void LoadInternal(DatabaseInstance &instance) { +static void LoadInternal(ExtensionLoader &loader) { // Register a table function TableFunction sheetreader_table_function("sheetreader", {LogicalType::VARCHAR}, SheetreaderCopyTableFun, SheetreaderBindFun, SRGlobalTableFunctionState::Init, @@ -959,11 +958,11 @@ static void LoadInternal(DatabaseInstance &instance) { sheetreader_table_function.named_parameters["force_types"] = LogicalType::BOOLEAN; sheetreader_table_function.named_parameters["coerce_to_string"] = LogicalType::BOOLEAN; - ExtensionUtil::RegisterFunction(instance, sheetreader_table_function); + loader.RegisterFunction(sheetreader_table_function); } -void SheetreaderExtension::Load(DuckDB &db) { - LoadInternal(*db.instance); +void SheetreaderExtension::Load(ExtensionLoader &loader) { + LoadInternal(loader); } std::string SheetreaderExtension::Name() { return "sheetreader"; @@ -973,16 +972,7 @@ std::string SheetreaderExtension::Name() { extern "C" { -DUCKDB_EXTENSION_API void sheetreader_init(duckdb::DatabaseInstance &db) { - duckdb::DuckDB db_wrapper(db); - db_wrapper.LoadExtension(); -} - -DUCKDB_EXTENSION_API const char *sheetreader_version() { - return duckdb::DuckDB::LibraryVersion(); +DUCKDB_CPP_EXTENSION_ENTRY(sheetreader, loader) { + duckdb::LoadInternal(loader); } } - -#ifndef DUCKDB_EXTENSION_MAIN -#error DUCKDB_EXTENSION_MAIN not defined -#endif diff --git a/todo.md b/todo.md new file mode 100644 index 0000000..4b7473e --- /dev/null +++ b/todo.md @@ -0,0 +1,36 @@ +# Ubuntu Verification Tasks + +## Context +The `sheetreader-duckdb` extension has been updated for DuckDB v1.4.0 compatibility. +The code changes are applied in `src/` (migrated to `ExtensionLoader` API). +Verification is required on an Ubuntu machine. + +## Tasks + +1. **Install Dependencies** + Ensure the following packages are installed: + ```bash + sudo apt-get update && sudo apt-get install -y git cmake build-essential ninja-build libssl-dev python3-dev + ``` + +2. **Build Extension** + Build the extension from source using `ninja`: + ```bash + # In the sheetreader-duckdb directory + GEN=ninja make + ``` + +3. **Verify Functionality** + Run a test query using the built DuckDB binary (which has the extension pre-loaded): + + ```bash + # Create a simple test script + echo "SELECT * FROM sheetreader('docker-demo/test.xlsx');" > test_verification.sql + + # Run it with the built binary + ./build/release/duckdb < test_verification.sql + ``` + +4. **Confirm Success** + - Verify that the output shows the contents of the Excel file (Numeric0 column with values). + - Confirm that the extension builds and runs correctly with DuckDB v1.4.0+ API. From 16f075755da2a7228edeca6a0740eeb68206655d Mon Sep 17 00:00:00 2001 From: midhun Date: Thu, 20 Nov 2025 09:47:02 +0100 Subject: [PATCH 04/12] Fix DuckDB v1.4.0 extension API compatibility - Updated Load method signature from ExtensionLoader to DuckDB &db - Changed to use ExtensionUtil::RegisterFunction instead of loader.RegisterFunction - Updated extern C entry point to use new API - Added build dependencies to Dockerfile (git, cmake, ninja, ccache) - Created docker-compose.yml for easier development workflow - Added test_verification.sql for testing - Updated README with verification instructions --- docker-demo/Dockerfile | 7 ++ docker-demo/README.md | 105 ++++++++++++++++---------- docker-demo/docker-compose.yml | 17 +++++ docker-demo/test_verification.sql | 1 + docker-demo/todo.md | 36 --------- src/include/sheetreader_extension.hpp | 2 +- src/sheetreader_extension.cpp | 20 +++-- todo.md | 36 --------- 8 files changed, 105 insertions(+), 119 deletions(-) create mode 100644 docker-demo/docker-compose.yml create mode 100644 docker-demo/test_verification.sql delete mode 100644 docker-demo/todo.md delete mode 100644 todo.md diff --git a/docker-demo/Dockerfile b/docker-demo/Dockerfile index db68a0b..919c728 100644 --- a/docker-demo/Dockerfile +++ b/docker-demo/Dockerfile @@ -5,6 +5,13 @@ FROM ubuntu:22.04 RUN apt-get update && apt-get install -y \ wget \ unzip \ + git \ + cmake \ + build-essential \ + ninja-build \ + libssl-dev \ + python3-dev \ + ccache \ && rm -rf /var/lib/apt/lists/* # Download and install DuckDB diff --git a/docker-demo/README.md b/docker-demo/README.md index 6123eb4..33c741a 100644 --- a/docker-demo/README.md +++ b/docker-demo/README.md @@ -1,72 +1,97 @@ # SheetReader DuckDB Docker Demo -Demo of the **sheetreader-duckdb** extension running in a Docker container to safely query Excel files with SQL. +Demo of the **sheetreader-duckdb** extension with DuckDB v1.4.0+ compatibility. ## Prerequisites -- Docker Desktop must be running +- Docker and Docker Compose installed and running -## Quick Start (Interactive Mode) +## DuckDB v1.4.0 Extension Verification -### Step 1: Navigate to the demo directory +This setup allows you to verify that the sheetreader extension works correctly with DuckDB v1.4.0+. + +### Build and Test the Extension + +**Step 1: Navigate to the demo directory** ```bash -cd c:\Users\mithu\OneDrive\Desktop\SA_Harry\SheetReader\docker-demo +cd docker-demo ``` -### Step 2: Build and start DuckDB (first time only) +**Step 2: Build the Docker image** ```bash -docker-compose build +docker compose build ``` -### Step 3: Start DuckDB +**Step 3: Build the extension from source** ```bash -docker-compose run --rm duckdb +docker compose run --rm sheetreader-dev bash -c "GEN=ninja make" ``` -### Step 4: Inside DuckDB, run SQL commands -```sql --- Install and load the extension -INSTALL sheetreader FROM community; -LOAD sheetreader; - --- Query your Excel file -SELECT * FROM sheetreader('test.xlsx'); - --- Get statistics -SELECT - MIN(Numeric0) as min_value, - MAX(Numeric0) as max_value, - AVG(Numeric0) as avg_value, - SUM(Numeric0) as sum_value -FROM sheetreader('test.xlsx'); - --- Create a table -CREATE TABLE excel_data AS FROM sheetreader('test.xlsx'); - --- Filter data -SELECT * FROM excel_data WHERE Numeric0 > 50; +This will: +- Build DuckDB v1.4.0+ from source +- Compile the sheetreader extension with the new API +- Create a DuckDB binary with the extension pre-loaded + +**Step 4: Run the verification test** +```bash +docker compose run --rm sheetreader-dev bash -c "./build/release/duckdb < docker-demo/test_verification.sql" ``` -### Step 5: Exit when done +**Expected output:** ``` -.exit +┌──────────┐ +│ Numeric0 │ +│ double │ +├──────────┤ +│ 92.0 │ +│ 48.0 │ +│ 99.0 │ +│ 35.0 │ +│ 97.0 │ +└──────────┘ ``` +If you see this output, the extension is working correctly with DuckDB v1.4.0+! ✅ + --- -## Run the Full Demo Script (Automated) +## Interactive Development + +For interactive development and testing: -To run all queries automatically: +**Start an interactive shell:** +```bash +docker compose run --rm sheetreader-dev bash +``` +**Inside the container, you can:** ```bash -docker-compose run --rm duckdb -init demo.sql +# Build the extension +GEN=ninja make + + +# start DuckDB interactively +./build/release/duckdb +``` + +**Inside DuckDB, try queries:** +```sql +-- Query the Excel file +SELECT * FROM sheetreader('docker-demo/test.xlsx'); + +``` + +**Exit:** +``` +.exit # Exit DuckDB +exit # Exit container ``` --- ## Files -- **Dockerfile** - Container setup with DuckDB -- **docker-compose.yml** - Docker configuration (includes volume mounts) -- **demo.sql** - Demo SQL script -- **test.xlsx** - Sample Excel file with 5 random numbers +- **Dockerfile** - Ubuntu 22.04 with build dependencies (git, cmake, ninja, etc.) +- **docker-compose.yml** - Docker Compose setup with volume mounts and ccache +- **test.xlsx** - Sample Excel file with test data +- **test_verification.sql** - Verification query for testing diff --git a/docker-demo/docker-compose.yml b/docker-demo/docker-compose.yml new file mode 100644 index 0000000..28b88cf --- /dev/null +++ b/docker-demo/docker-compose.yml @@ -0,0 +1,17 @@ +version: '3.8' + +services: + sheetreader-dev: + build: + context: . + dockerfile: Dockerfile + volumes: + - ../:/workspace/sheetreader-duckdb + - ccache_vol:/root/.ccache + environment: + - PATH=/usr/lib/ccache:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + working_dir: /workspace/sheetreader-duckdb + command: /bin/bash + +volumes: + ccache_vol: diff --git a/docker-demo/test_verification.sql b/docker-demo/test_verification.sql new file mode 100644 index 0000000..cc280a9 --- /dev/null +++ b/docker-demo/test_verification.sql @@ -0,0 +1 @@ +SELECT * FROM sheetreader('docker-demo/test.xlsx'); diff --git a/docker-demo/todo.md b/docker-demo/todo.md deleted file mode 100644 index 4b7473e..0000000 --- a/docker-demo/todo.md +++ /dev/null @@ -1,36 +0,0 @@ -# Ubuntu Verification Tasks - -## Context -The `sheetreader-duckdb` extension has been updated for DuckDB v1.4.0 compatibility. -The code changes are applied in `src/` (migrated to `ExtensionLoader` API). -Verification is required on an Ubuntu machine. - -## Tasks - -1. **Install Dependencies** - Ensure the following packages are installed: - ```bash - sudo apt-get update && sudo apt-get install -y git cmake build-essential ninja-build libssl-dev python3-dev - ``` - -2. **Build Extension** - Build the extension from source using `ninja`: - ```bash - # In the sheetreader-duckdb directory - GEN=ninja make - ``` - -3. **Verify Functionality** - Run a test query using the built DuckDB binary (which has the extension pre-loaded): - - ```bash - # Create a simple test script - echo "SELECT * FROM sheetreader('docker-demo/test.xlsx');" > test_verification.sql - - # Run it with the built binary - ./build/release/duckdb < test_verification.sql - ``` - -4. **Confirm Success** - - Verify that the output shows the contents of the Excel file (Numeric0 column with values). - - Confirm that the extension builds and runs correctly with DuckDB v1.4.0+ API. diff --git a/src/include/sheetreader_extension.hpp b/src/include/sheetreader_extension.hpp index ff7d9ba..45cda24 100644 --- a/src/include/sheetreader_extension.hpp +++ b/src/include/sheetreader_extension.hpp @@ -14,7 +14,7 @@ namespace duckdb { class SheetreaderExtension : public Extension { public: - void Load(ExtensionLoader &loader) override; + void Load(DuckDB &db) override; std::string Name() override; }; diff --git a/src/sheetreader_extension.cpp b/src/sheetreader_extension.cpp index 0bc503f..05c40d0 100644 --- a/src/sheetreader_extension.cpp +++ b/src/sheetreader_extension.cpp @@ -28,6 +28,8 @@ #include "sheetreader_extension.hpp" #include +#include "duckdb/main/extension_util.hpp" +#include "duckdb/main/database.hpp" namespace duckdb { @@ -939,7 +941,7 @@ inline unique_ptr SheetreaderBindFun(ClientContext &context, Table return std::move(bind_data); } -static void LoadInternal(ExtensionLoader &loader) { +static void LoadInternal(DatabaseInstance &db) { // Register a table function TableFunction sheetreader_table_function("sheetreader", {LogicalType::VARCHAR}, SheetreaderCopyTableFun, SheetreaderBindFun, SRGlobalTableFunctionState::Init, @@ -958,11 +960,11 @@ static void LoadInternal(ExtensionLoader &loader) { sheetreader_table_function.named_parameters["force_types"] = LogicalType::BOOLEAN; sheetreader_table_function.named_parameters["coerce_to_string"] = LogicalType::BOOLEAN; - loader.RegisterFunction(sheetreader_table_function); + ExtensionUtil::RegisterFunction(db, sheetreader_table_function); } -void SheetreaderExtension::Load(ExtensionLoader &loader) { - LoadInternal(loader); +void SheetreaderExtension::Load(DuckDB &db) { + LoadInternal(*db.instance); } std::string SheetreaderExtension::Name() { return "sheetreader"; @@ -972,7 +974,13 @@ std::string SheetreaderExtension::Name() { extern "C" { -DUCKDB_CPP_EXTENSION_ENTRY(sheetreader, loader) { - duckdb::LoadInternal(loader); +DUCKDB_EXTENSION_API void sheetreader_init(duckdb::DatabaseInstance &db) { + duckdb::DuckDB db_wrapper(db); + db_wrapper.LoadExtension(); +} + +DUCKDB_EXTENSION_API const char *sheetreader_version() { + return duckdb::DuckDB::LibraryVersion(); } } + diff --git a/todo.md b/todo.md deleted file mode 100644 index 4b7473e..0000000 --- a/todo.md +++ /dev/null @@ -1,36 +0,0 @@ -# Ubuntu Verification Tasks - -## Context -The `sheetreader-duckdb` extension has been updated for DuckDB v1.4.0 compatibility. -The code changes are applied in `src/` (migrated to `ExtensionLoader` API). -Verification is required on an Ubuntu machine. - -## Tasks - -1. **Install Dependencies** - Ensure the following packages are installed: - ```bash - sudo apt-get update && sudo apt-get install -y git cmake build-essential ninja-build libssl-dev python3-dev - ``` - -2. **Build Extension** - Build the extension from source using `ninja`: - ```bash - # In the sheetreader-duckdb directory - GEN=ninja make - ``` - -3. **Verify Functionality** - Run a test query using the built DuckDB binary (which has the extension pre-loaded): - - ```bash - # Create a simple test script - echo "SELECT * FROM sheetreader('docker-demo/test.xlsx');" > test_verification.sql - - # Run it with the built binary - ./build/release/duckdb < test_verification.sql - ``` - -4. **Confirm Success** - - Verify that the output shows the contents of the Excel file (Numeric0 column with values). - - Confirm that the extension builds and runs correctly with DuckDB v1.4.0+ API. From a1e074ccd84b35d7655cf8db3ac070e93b8a250a Mon Sep 17 00:00:00 2001 From: midhun Date: Fri, 21 Nov 2025 13:54:12 +0100 Subject: [PATCH 05/12] Add DUCKDB_EXTENSION_MAIN validation check for template compliance --- src/sheetreader_extension.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/sheetreader_extension.cpp b/src/sheetreader_extension.cpp index 05c40d0..8bce1d5 100644 --- a/src/sheetreader_extension.cpp +++ b/src/sheetreader_extension.cpp @@ -984,3 +984,6 @@ DUCKDB_EXTENSION_API const char *sheetreader_version() { } } +#ifndef DUCKDB_EXTENSION_MAIN +#error DUCKDB_EXTENSION_MAIN not defined +#endif From 11a8316eb5474209ba91ad289f6c6bb8e593495d Mon Sep 17 00:00:00 2001 From: midhun Date: Fri, 21 Nov 2025 14:05:26 +0100 Subject: [PATCH 06/12] Update CI workflow to build with DuckDB v1.4.0 - Changed from v1.1.0 to v1.4.0 to match extension API compatibility - Updated extension-ci-tools reference to @v1.4.0 - This ensures CI tests the correct API version --- .github/workflows/MainDistributionPipeline.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index f62ae90..2227e61 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -24,11 +24,11 @@ jobs: duckdb-stable-build: - name: Build extension binaries (DuckDB v1.1.0) + name: Build extension binaries (DuckDB v1.4.0) # needs: test-secrets-accessable - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.1.0 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.0 with: - duckdb_version: v1.1.0 + duckdb_version: v1.4.0 extension_name: sheetreader exclude_archs: "windows_amd64_rtools" From 75c4c09ffc72c5fd762f5567830851d361510f31 Mon Sep 17 00:00:00 2001 From: midhun Date: Fri, 21 Nov 2025 14:07:39 +0100 Subject: [PATCH 07/12] Fix CI workflow: Add required ci_tools_version parameter --- .github/workflows/MainDistributionPipeline.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 2227e61..af12661 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -29,6 +29,7 @@ jobs: uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.0 with: duckdb_version: v1.4.0 + ci_tools_version: v1.4.0 extension_name: sheetreader exclude_archs: "windows_amd64_rtools" From 031a8a0b3530fdd8ac00c6c91130554c5eb8eb40 Mon Sep 17 00:00:00 2001 From: midhun Date: Fri, 21 Nov 2025 14:31:17 +0100 Subject: [PATCH 08/12] Add missing Version() method to Extension class - Added Version() method declaration in header - Added Version() method implementation - Fixes 'marked override but does not override' error - Completes DuckDB v1.4.0 Extension API migration --- src/include/sheetreader_extension.hpp | 1 + src/sheetreader_extension.cpp | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/src/include/sheetreader_extension.hpp b/src/include/sheetreader_extension.hpp index 45cda24..fd9c4ba 100644 --- a/src/include/sheetreader_extension.hpp +++ b/src/include/sheetreader_extension.hpp @@ -16,6 +16,7 @@ class SheetreaderExtension : public Extension { public: void Load(DuckDB &db) override; std::string Name() override; + std::string Version() const override; }; //! Contains all data that is determined during the bind function diff --git a/src/sheetreader_extension.cpp b/src/sheetreader_extension.cpp index 8bce1d5..76edd25 100644 --- a/src/sheetreader_extension.cpp +++ b/src/sheetreader_extension.cpp @@ -970,6 +970,14 @@ std::string SheetreaderExtension::Name() { return "sheetreader"; } +std::string SheetreaderExtension::Version() const { +#ifdef EXT_VERSION_SHEETREADER + return EXT_VERSION_SHEETREADER; +#else + return ""; +#endif +} + } // namespace duckdb extern "C" { From 4a518dd2357196409f54858f422a5a86becd12a3 Mon Sep 17 00:00:00 2001 From: midhun Date: Fri, 21 Nov 2025 14:43:40 +0100 Subject: [PATCH 09/12] Update DuckDB submodule to v1.4.0 - Changed from v1.1.0-migration to v1.4.0 - Required for Extension API compatibility --- duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckdb b/duckdb index fa5c2fe..b8a06e4 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit fa5c2fe15f3da5f32397b009196c0895fce60820 +Subproject commit b8a06e4a22672e254cd0baa68a3dbed2eb51c56e From 782e1ce08276ee3bae7f8d9ef7aaf52e977e0183 Mon Sep 17 00:00:00 2001 From: midhun Date: Fri, 21 Nov 2025 16:01:50 +0100 Subject: [PATCH 10/12] Fix RAM exhaustion during build and update DuckDB API compatibility --- docker-demo/README.md | 2 +- docker-demo/docker-compose.yml | 4 ++++ duckdb | 2 +- src/include/sheetreader_extension.hpp | 2 +- src/sheetreader_extension.cpp | 28 ++++++++++++++------------- 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/docker-demo/README.md b/docker-demo/README.md index 33c741a..314db48 100644 --- a/docker-demo/README.md +++ b/docker-demo/README.md @@ -24,7 +24,7 @@ docker compose build **Step 3: Build the extension from source** ```bash -docker compose run --rm sheetreader-dev bash -c "GEN=ninja make" +docker compose run --rm sheetreader-dev bash -c "GEN=ninja NINJA_BUILD_FLAGS='-j2' make" ``` This will: diff --git a/docker-demo/docker-compose.yml b/docker-demo/docker-compose.yml index 28b88cf..ec1db39 100644 --- a/docker-demo/docker-compose.yml +++ b/docker-demo/docker-compose.yml @@ -12,6 +12,10 @@ services: - PATH=/usr/lib/ccache:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin working_dir: /workspace/sheetreader-duckdb command: /bin/bash + deploy: + resources: + limits: + memory: 12G # Limit to 12GB, leaving 4GB for host system volumes: ccache_vol: diff --git a/duckdb b/duckdb index b8a06e4..68d7555 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit b8a06e4a22672e254cd0baa68a3dbed2eb51c56e +Subproject commit 68d7555f68bd25c1a251ccca2e6338949c33986a diff --git a/src/include/sheetreader_extension.hpp b/src/include/sheetreader_extension.hpp index fd9c4ba..4068570 100644 --- a/src/include/sheetreader_extension.hpp +++ b/src/include/sheetreader_extension.hpp @@ -14,7 +14,7 @@ namespace duckdb { class SheetreaderExtension : public Extension { public: - void Load(DuckDB &db) override; + void Load(ExtensionLoader &loader) override; std::string Name() override; std::string Version() const override; }; diff --git a/src/sheetreader_extension.cpp b/src/sheetreader_extension.cpp index 76edd25..8ec7bc4 100644 --- a/src/sheetreader_extension.cpp +++ b/src/sheetreader_extension.cpp @@ -1,7 +1,7 @@ #include "duckdb.h" #include "duckdb/common/assert.hpp" #include "duckdb/common/helper.hpp" -#include "duckdb/common/multi_file_reader.hpp" +#include "duckdb/common/multi_file/multi_file_reader.hpp" #include "duckdb/common/typedefs.hpp" #include "duckdb/common/types.hpp" #include "duckdb/common/types/data_chunk.hpp" @@ -28,7 +28,6 @@ #include "sheetreader_extension.hpp" #include -#include "duckdb/main/extension_util.hpp" #include "duckdb/main/database.hpp" namespace duckdb { @@ -639,14 +638,17 @@ inline unique_ptr SheetreaderBindFun(ClientContext &context, Table // Get the file name from the input parameters & verify it exists auto file_reader = MultiFileReader::Create(input.table_function); auto file_list = file_reader->CreateFileList(context, input.inputs[0]); - auto file_names = file_list->GetAllFiles(); + auto file_infos = file_list->GetAllFiles(); - if (file_names.empty()) { + if (file_infos.empty()) { throw BinderException("No files found in path"); - } else if (file_names.size() > 1) { + } else if (file_infos.size() > 1) { throw BinderException("Only one file can be read at a time"); } + // Extract the file path from OpenFileInfo + string file_name = file_infos[0].path; + //! User specified sheet name string sheet_name; //! User specified sheet index -- starts with 1 @@ -682,12 +684,12 @@ inline unique_ptr SheetreaderBindFun(ClientContext &context, Table try { if (!sheet_name.empty()) { - bind_data = make_uniq(file_names[0], sheet_name); + bind_data = make_uniq(file_name, sheet_name); } else if (sheet_index_set) { - bind_data = make_uniq(file_names[0], sheet_index); + bind_data = make_uniq(file_name, sheet_index); } else { // Default: sheet_index=1 - bind_data = make_uniq(file_names[0]); + bind_data = make_uniq(file_name); } } catch (std::exception &e) { throw BinderException(e.what()); @@ -941,7 +943,7 @@ inline unique_ptr SheetreaderBindFun(ClientContext &context, Table return std::move(bind_data); } -static void LoadInternal(DatabaseInstance &db) { +static void LoadInternal(ExtensionLoader &loader) { // Register a table function TableFunction sheetreader_table_function("sheetreader", {LogicalType::VARCHAR}, SheetreaderCopyTableFun, SheetreaderBindFun, SRGlobalTableFunctionState::Init, @@ -960,11 +962,11 @@ static void LoadInternal(DatabaseInstance &db) { sheetreader_table_function.named_parameters["force_types"] = LogicalType::BOOLEAN; sheetreader_table_function.named_parameters["coerce_to_string"] = LogicalType::BOOLEAN; - ExtensionUtil::RegisterFunction(db, sheetreader_table_function); + loader.RegisterFunction(sheetreader_table_function); } -void SheetreaderExtension::Load(DuckDB &db) { - LoadInternal(*db.instance); +void SheetreaderExtension::Load(ExtensionLoader &loader) { + LoadInternal(loader); } std::string SheetreaderExtension::Name() { return "sheetreader"; @@ -984,7 +986,7 @@ extern "C" { DUCKDB_EXTENSION_API void sheetreader_init(duckdb::DatabaseInstance &db) { duckdb::DuckDB db_wrapper(db); - db_wrapper.LoadExtension(); + db_wrapper.LoadStaticExtension(); } DUCKDB_EXTENSION_API const char *sheetreader_version() { From 125a8c7019d3e3ef4b06b1324ebb9f7e125e6fb2 Mon Sep 17 00:00:00 2001 From: midhun Date: Fri, 21 Nov 2025 16:08:29 +0100 Subject: [PATCH 11/12] Update CI pipeline to use DuckDB v1.4.2 - Update MainDistributionPipeline.yml to use v1.4.2 instead of v1.4.0 - This matches the version in the duckdb submodule and Dockerfile --- .github/workflows/MainDistributionPipeline.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index af12661..43148fa 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -24,12 +24,12 @@ jobs: duckdb-stable-build: - name: Build extension binaries (DuckDB v1.4.0) + name: Build extension binaries (DuckDB v1.4.2) # needs: test-secrets-accessable - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.0 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.2 with: - duckdb_version: v1.4.0 - ci_tools_version: v1.4.0 + duckdb_version: v1.4.2 + ci_tools_version: v1.4.2 extension_name: sheetreader exclude_archs: "windows_amd64_rtools" From 215263cb84b78cea209af36a8857369badb6170b Mon Sep 17 00:00:00 2001 From: midhun Date: Fri, 21 Nov 2025 17:20:01 +0100 Subject: [PATCH 12/12] Fix Wasm threading and macOS linker errors --- src/sheetreader_extension.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/sheetreader_extension.cpp b/src/sheetreader_extension.cpp index 8ec7bc4..9835262 100644 --- a/src/sheetreader_extension.cpp +++ b/src/sheetreader_extension.cpp @@ -34,6 +34,10 @@ namespace duckdb { //! Determine default number of threads inline idx_t DefaultThreads() { +#ifdef __EMSCRIPTEN__ + // WebAssembly doesn't support threading in MVP builds + return 1; +#else // Returns 0 if not able to detect idx_t sys_number_threads = std::thread::hardware_concurrency(); @@ -45,6 +49,7 @@ inline idx_t DefaultThreads() { } return appropriate_number_threads; +#endif } // ===================================== @@ -992,6 +997,11 @@ DUCKDB_EXTENSION_API void sheetreader_init(duckdb::DatabaseInstance &db) { DUCKDB_EXTENSION_API const char *sheetreader_version() { return duckdb::DuckDB::LibraryVersion(); } + +DUCKDB_EXTENSION_API void sheetreader_duckdb_cpp_init(duckdb::ExtensionLoader &loader) { + duckdb::SheetreaderExtension extension; + extension.Load(loader); +} } #ifndef DUCKDB_EXTENSION_MAIN