diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index f62ae90..43148fa 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -24,11 +24,12 @@ jobs: duckdb-stable-build: - name: Build extension binaries (DuckDB v1.1.0) + name: Build extension binaries (DuckDB v1.4.2) # needs: test-secrets-accessable - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.1.0 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.2 with: - duckdb_version: v1.1.0 + duckdb_version: v1.4.2 + ci_tools_version: v1.4.2 extension_name: sheetreader exclude_archs: "windows_amd64_rtools" diff --git a/docker-demo/Dockerfile b/docker-demo/Dockerfile new file mode 100644 index 0000000..919c728 --- /dev/null +++ b/docker-demo/Dockerfile @@ -0,0 +1,32 @@ +# Use Ubuntu as base image +FROM ubuntu:22.04 + +# Install dependencies +RUN apt-get update && apt-get install -y \ + wget \ + unzip \ + git \ + cmake \ + build-essential \ + ninja-build \ + libssl-dev \ + python3-dev \ + ccache \ + && rm -rf /var/lib/apt/lists/* + +# Download and install DuckDB +RUN wget https://github.com/duckdb/duckdb/releases/download/v1.4.2/duckdb_cli-linux-amd64.zip \ + && unzip duckdb_cli-linux-amd64.zip \ + && mv duckdb /usr/local/bin/ \ + && chmod +x /usr/local/bin/duckdb \ + && rm duckdb_cli-linux-amd64.zip + +# Create working directory +WORKDIR /workspace + +# Copy the Excel file and demo script +COPY test.xlsx /workspace/ +COPY demo.sql /workspace/ + +# Set the entrypoint to bash for interactive use +CMD ["/bin/bash"] diff --git a/docker-demo/README.md b/docker-demo/README.md new file mode 100644 index 0000000..314db48 --- /dev/null +++ b/docker-demo/README.md @@ -0,0 +1,97 @@ +# SheetReader DuckDB Docker Demo + +Demo of the **sheetreader-duckdb** extension with DuckDB v1.4.0+ compatibility. + +## Prerequisites + +- Docker and Docker Compose installed and running + +## DuckDB v1.4.0 Extension Verification + +This setup allows you to verify that the sheetreader extension works correctly with DuckDB v1.4.0+. + +### Build and Test the Extension + +**Step 1: Navigate to the demo directory** +```bash +cd docker-demo +``` + +**Step 2: Build the Docker image** +```bash +docker compose build +``` + +**Step 3: Build the extension from source** +```bash +docker compose run --rm sheetreader-dev bash -c "GEN=ninja NINJA_BUILD_FLAGS='-j2' make" +``` + +This will: +- Build DuckDB v1.4.0+ from source +- Compile the sheetreader extension with the new API +- Create a DuckDB binary with the extension pre-loaded + +**Step 4: Run the verification test** +```bash +docker compose run --rm sheetreader-dev bash -c "./build/release/duckdb < docker-demo/test_verification.sql" +``` + +**Expected output:** +``` +┌──────────┐ +│ Numeric0 │ +│ double │ +├──────────┤ +│ 92.0 │ +│ 48.0 │ +│ 99.0 │ +│ 35.0 │ +│ 97.0 │ +└──────────┘ +``` + +If you see this output, the extension is working correctly with DuckDB v1.4.0+! ✅ + +--- + +## Interactive Development + +For interactive development and testing: + +**Start an interactive shell:** +```bash +docker compose run --rm sheetreader-dev bash +``` + +**Inside the container, you can:** +```bash +# Build the extension +GEN=ninja make + + +# start DuckDB interactively +./build/release/duckdb +``` + +**Inside DuckDB, try queries:** +```sql +-- Query the Excel file +SELECT * FROM sheetreader('docker-demo/test.xlsx'); + +``` + +**Exit:** +``` +.exit # Exit DuckDB +exit # Exit container +``` + +--- + +## Files + +- **Dockerfile** - Ubuntu 22.04 with build dependencies (git, cmake, ninja, etc.) +- **docker-compose.yml** - Docker Compose setup with volume mounts and ccache +- **test.xlsx** - Sample Excel file with test data +- **test_verification.sql** - Verification query for testing diff --git a/docker-demo/demo.sql b/docker-demo/demo.sql new file mode 100644 index 0000000..199545e --- /dev/null +++ b/docker-demo/demo.sql @@ -0,0 +1,46 @@ +-- SheetReader DuckDB Extension Demo +-- This script demonstrates how to use the sheetreader extension to query Excel files + +-- Step 1: Install the sheetreader extension from community extensions +INSTALL sheetreader FROM community; + +-- Step 2: Load the extension +LOAD sheetreader; + +-- Step 3: Query the Excel file directly +.print '=== Reading test.xlsx with sheetreader ===' +SELECT * FROM sheetreader('test.xlsx'); + +-- Step 4: Get row count +.print '' +.print '=== Row count ===' +SELECT COUNT(*) as total_rows FROM sheetreader('test.xlsx'); + +-- Step 5: Calculate statistics on the data +.print '' +.print '=== Statistics ===' +SELECT + MIN(Numeric0) as min_value, + MAX(Numeric0) as max_value, + AVG(Numeric0) as avg_value, + SUM(Numeric0) as sum_value +FROM sheetreader('test.xlsx'); + +-- Step 6: Create a table from the Excel data +.print '' +.print '=== Creating table from Excel data ===' +CREATE TABLE excel_data AS +FROM sheetreader('test.xlsx'); + +-- Step 7: Query the created table +.print '' +.print '=== Querying the created table ===' +SELECT * FROM excel_data; + +-- Step 8: Filter data (example: values greater than 50) +.print '' +.print '=== Filtering values > 50 ===' +SELECT * FROM excel_data WHERE Numeric0 > 50; + +.print '' +.print '=== Demo completed successfully! ===' diff --git a/docker-demo/docker-compose.yml b/docker-demo/docker-compose.yml new file mode 100644 index 0000000..ec1db39 --- /dev/null +++ b/docker-demo/docker-compose.yml @@ -0,0 +1,21 @@ +version: '3.8' + +services: + sheetreader-dev: + build: + context: . + dockerfile: Dockerfile + volumes: + - ../:/workspace/sheetreader-duckdb + - ccache_vol:/root/.ccache + environment: + - PATH=/usr/lib/ccache:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + working_dir: /workspace/sheetreader-duckdb + command: /bin/bash + deploy: + resources: + limits: + memory: 12G # Limit to 12GB, leaving 4GB for host system + +volumes: + ccache_vol: diff --git a/docker-demo/test_verification.sql b/docker-demo/test_verification.sql new file mode 100644 index 0000000..cc280a9 --- /dev/null +++ b/docker-demo/test_verification.sql @@ -0,0 +1 @@ +SELECT * FROM sheetreader('docker-demo/test.xlsx'); diff --git a/duckdb b/duckdb index fa5c2fe..68d7555 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit fa5c2fe15f3da5f32397b009196c0895fce60820 +Subproject commit 68d7555f68bd25c1a251ccca2e6338949c33986a diff --git a/src/include/sheetreader_extension.hpp b/src/include/sheetreader_extension.hpp index 45cda24..4068570 100644 --- a/src/include/sheetreader_extension.hpp +++ b/src/include/sheetreader_extension.hpp @@ -14,8 +14,9 @@ namespace duckdb { class SheetreaderExtension : public Extension { public: - void Load(DuckDB &db) override; + void Load(ExtensionLoader &loader) override; std::string Name() override; + std::string Version() const override; }; //! Contains all data that is determined during the bind function diff --git a/src/sheetreader_extension.cpp b/src/sheetreader_extension.cpp index d2a55bf..9835262 100644 --- a/src/sheetreader_extension.cpp +++ b/src/sheetreader_extension.cpp @@ -1,7 +1,7 @@ #include "duckdb.h" #include "duckdb/common/assert.hpp" #include "duckdb/common/helper.hpp" -#include "duckdb/common/multi_file_reader.hpp" +#include "duckdb/common/multi_file/multi_file_reader.hpp" #include "duckdb/common/typedefs.hpp" #include "duckdb/common/types.hpp" #include "duckdb/common/types/data_chunk.hpp" @@ -25,15 +25,19 @@ #include "duckdb/common/exception.hpp" #include "duckdb/common/string_util.hpp" #include "duckdb/function/table_function.hpp" -#include "duckdb/main/extension_util.hpp" #include "sheetreader_extension.hpp" #include +#include "duckdb/main/database.hpp" namespace duckdb { //! Determine default number of threads inline idx_t DefaultThreads() { +#ifdef __EMSCRIPTEN__ + // WebAssembly doesn't support threading in MVP builds + return 1; +#else // Returns 0 if not able to detect idx_t sys_number_threads = std::thread::hardware_concurrency(); @@ -45,6 +49,7 @@ inline idx_t DefaultThreads() { } return appropriate_number_threads; +#endif } // ===================================== @@ -638,14 +643,17 @@ inline unique_ptr SheetreaderBindFun(ClientContext &context, Table // Get the file name from the input parameters & verify it exists auto file_reader = MultiFileReader::Create(input.table_function); auto file_list = file_reader->CreateFileList(context, input.inputs[0]); - auto file_names = file_list->GetAllFiles(); + auto file_infos = file_list->GetAllFiles(); - if (file_names.empty()) { + if (file_infos.empty()) { throw BinderException("No files found in path"); - } else if (file_names.size() > 1) { + } else if (file_infos.size() > 1) { throw BinderException("Only one file can be read at a time"); } + // Extract the file path from OpenFileInfo + string file_name = file_infos[0].path; + //! User specified sheet name string sheet_name; //! User specified sheet index -- starts with 1 @@ -681,12 +689,12 @@ inline unique_ptr SheetreaderBindFun(ClientContext &context, Table try { if (!sheet_name.empty()) { - bind_data = make_uniq(file_names[0], sheet_name); + bind_data = make_uniq(file_name, sheet_name); } else if (sheet_index_set) { - bind_data = make_uniq(file_names[0], sheet_index); + bind_data = make_uniq(file_name, sheet_index); } else { // Default: sheet_index=1 - bind_data = make_uniq(file_names[0]); + bind_data = make_uniq(file_name); } } catch (std::exception &e) { throw BinderException(e.what()); @@ -940,7 +948,7 @@ inline unique_ptr SheetreaderBindFun(ClientContext &context, Table return std::move(bind_data); } -static void LoadInternal(DatabaseInstance &instance) { +static void LoadInternal(ExtensionLoader &loader) { // Register a table function TableFunction sheetreader_table_function("sheetreader", {LogicalType::VARCHAR}, SheetreaderCopyTableFun, SheetreaderBindFun, SRGlobalTableFunctionState::Init, @@ -959,28 +967,41 @@ static void LoadInternal(DatabaseInstance &instance) { sheetreader_table_function.named_parameters["force_types"] = LogicalType::BOOLEAN; sheetreader_table_function.named_parameters["coerce_to_string"] = LogicalType::BOOLEAN; - ExtensionUtil::RegisterFunction(instance, sheetreader_table_function); + loader.RegisterFunction(sheetreader_table_function); } -void SheetreaderExtension::Load(DuckDB &db) { - LoadInternal(*db.instance); +void SheetreaderExtension::Load(ExtensionLoader &loader) { + LoadInternal(loader); } std::string SheetreaderExtension::Name() { return "sheetreader"; } +std::string SheetreaderExtension::Version() const { +#ifdef EXT_VERSION_SHEETREADER + return EXT_VERSION_SHEETREADER; +#else + return ""; +#endif +} + } // namespace duckdb extern "C" { DUCKDB_EXTENSION_API void sheetreader_init(duckdb::DatabaseInstance &db) { duckdb::DuckDB db_wrapper(db); - db_wrapper.LoadExtension(); + db_wrapper.LoadStaticExtension(); } DUCKDB_EXTENSION_API const char *sheetreader_version() { return duckdb::DuckDB::LibraryVersion(); } + +DUCKDB_EXTENSION_API void sheetreader_duckdb_cpp_init(duckdb::ExtensionLoader &loader) { + duckdb::SheetreaderExtension extension; + extension.Load(loader); +} } #ifndef DUCKDB_EXTENSION_MAIN