Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .github/workflows/MainDistributionPipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@ jobs:


duckdb-stable-build:
name: Build extension binaries (DuckDB v1.1.0)
name: Build extension binaries (DuckDB v1.4.2)
# needs: test-secrets-accessable
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.1.0
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.2
with:
duckdb_version: v1.1.0
duckdb_version: v1.4.2
ci_tools_version: v1.4.2
extension_name: sheetreader
exclude_archs: "windows_amd64_rtools"

Expand Down
32 changes: 32 additions & 0 deletions docker-demo/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Use Ubuntu as base image
FROM ubuntu:22.04

# Install dependencies
RUN apt-get update && apt-get install -y \
wget \
unzip \
git \
cmake \
build-essential \
ninja-build \
libssl-dev \
python3-dev \
ccache \
&& rm -rf /var/lib/apt/lists/*

# Download and install DuckDB
RUN wget https://github.com/duckdb/duckdb/releases/download/v1.4.2/duckdb_cli-linux-amd64.zip \
&& unzip duckdb_cli-linux-amd64.zip \
&& mv duckdb /usr/local/bin/ \
&& chmod +x /usr/local/bin/duckdb \
&& rm duckdb_cli-linux-amd64.zip

# Create working directory
WORKDIR /workspace

# Copy the Excel file and demo script
COPY test.xlsx /workspace/
COPY demo.sql /workspace/

# Set the entrypoint to bash for interactive use
CMD ["/bin/bash"]
97 changes: 97 additions & 0 deletions docker-demo/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# SheetReader DuckDB Docker Demo

Demo of the **sheetreader-duckdb** extension with DuckDB v1.4.0+ compatibility.

## Prerequisites

- Docker and Docker Compose installed and running

## DuckDB v1.4.0 Extension Verification

This setup allows you to verify that the sheetreader extension works correctly with DuckDB v1.4.0+.

### Build and Test the Extension

**Step 1: Navigate to the demo directory**
```bash
cd docker-demo
```

**Step 2: Build the Docker image**
```bash
docker compose build
```

**Step 3: Build the extension from source**
```bash
docker compose run --rm sheetreader-dev bash -c "GEN=ninja NINJA_BUILD_FLAGS='-j2' make"
```

This will:
- Build DuckDB v1.4.0+ from source
- Compile the sheetreader extension with the new API
- Create a DuckDB binary with the extension pre-loaded

**Step 4: Run the verification test**
```bash
docker compose run --rm sheetreader-dev bash -c "./build/release/duckdb < docker-demo/test_verification.sql"
```

**Expected output:**
```
┌──────────┐
│ Numeric0 │
│ double │
├──────────┤
│ 92.0 │
│ 48.0 │
│ 99.0 │
│ 35.0 │
│ 97.0 │
└──────────┘
```

If you see this output, the extension is working correctly with DuckDB v1.4.0+! ✅

---

## Interactive Development

For interactive development and testing:

**Start an interactive shell:**
```bash
docker compose run --rm sheetreader-dev bash
```

**Inside the container, you can:**
```bash
# Build the extension
GEN=ninja make


# start DuckDB interactively
./build/release/duckdb
```

**Inside DuckDB, try queries:**
```sql
-- Query the Excel file
SELECT * FROM sheetreader('docker-demo/test.xlsx');

```

**Exit:**
```
.exit # Exit DuckDB
exit # Exit container
```

---

## Files

- **Dockerfile** - Ubuntu 22.04 with build dependencies (git, cmake, ninja, etc.)
- **docker-compose.yml** - Docker Compose setup with volume mounts and ccache
- **test.xlsx** - Sample Excel file with test data
- **test_verification.sql** - Verification query for testing
46 changes: 46 additions & 0 deletions docker-demo/demo.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
-- SheetReader DuckDB Extension Demo
-- This script demonstrates how to use the sheetreader extension to query Excel files

-- Step 1: Install the sheetreader extension from community extensions
INSTALL sheetreader FROM community;

-- Step 2: Load the extension
LOAD sheetreader;

-- Step 3: Query the Excel file directly
.print '=== Reading test.xlsx with sheetreader ==='
SELECT * FROM sheetreader('test.xlsx');

-- Step 4: Get row count
.print ''
.print '=== Row count ==='
SELECT COUNT(*) as total_rows FROM sheetreader('test.xlsx');

-- Step 5: Calculate statistics on the data
.print ''
.print '=== Statistics ==='
SELECT
MIN(Numeric0) as min_value,
MAX(Numeric0) as max_value,
AVG(Numeric0) as avg_value,
SUM(Numeric0) as sum_value
FROM sheetreader('test.xlsx');

-- Step 6: Create a table from the Excel data
.print ''
.print '=== Creating table from Excel data ==='
CREATE TABLE excel_data AS
FROM sheetreader('test.xlsx');

-- Step 7: Query the created table
.print ''
.print '=== Querying the created table ==='
SELECT * FROM excel_data;

-- Step 8: Filter data (example: values greater than 50)
.print ''
.print '=== Filtering values > 50 ==='
SELECT * FROM excel_data WHERE Numeric0 > 50;

.print ''
.print '=== Demo completed successfully! ==='
21 changes: 21 additions & 0 deletions docker-demo/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
version: '3.8'

services:
sheetreader-dev:
build:
context: .
dockerfile: Dockerfile
volumes:
- ../:/workspace/sheetreader-duckdb
- ccache_vol:/root/.ccache
environment:
- PATH=/usr/lib/ccache:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
working_dir: /workspace/sheetreader-duckdb
command: /bin/bash
deploy:
resources:
limits:
memory: 12G # Limit to 12GB, leaving 4GB for host system

volumes:
ccache_vol:
1 change: 1 addition & 0 deletions docker-demo/test_verification.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT * FROM sheetreader('docker-demo/test.xlsx');
2 changes: 1 addition & 1 deletion duckdb
Submodule duckdb updated 7215 files
3 changes: 2 additions & 1 deletion src/include/sheetreader_extension.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ namespace duckdb {

class SheetreaderExtension : public Extension {
public:
void Load(DuckDB &db) override;
void Load(ExtensionLoader &loader) override;
std::string Name() override;
std::string Version() const override;
};

//! Contains all data that is determined during the bind function
Expand Down
47 changes: 34 additions & 13 deletions src/sheetreader_extension.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "duckdb.h"
#include "duckdb/common/assert.hpp"
#include "duckdb/common/helper.hpp"
#include "duckdb/common/multi_file_reader.hpp"
#include "duckdb/common/multi_file/multi_file_reader.hpp"
#include "duckdb/common/typedefs.hpp"
#include "duckdb/common/types.hpp"
#include "duckdb/common/types/data_chunk.hpp"
Expand All @@ -25,15 +25,19 @@
#include "duckdb/common/exception.hpp"
#include "duckdb/common/string_util.hpp"
#include "duckdb/function/table_function.hpp"
#include "duckdb/main/extension_util.hpp"
#include "sheetreader_extension.hpp"

#include <duckdb/parser/parsed_data/create_scalar_function_info.hpp>
#include "duckdb/main/database.hpp"

namespace duckdb {

//! Determine default number of threads
inline idx_t DefaultThreads() {
#ifdef __EMSCRIPTEN__
// WebAssembly doesn't support threading in MVP builds
return 1;
#else
// Returns 0 if not able to detect
idx_t sys_number_threads = std::thread::hardware_concurrency();

Expand All @@ -45,6 +49,7 @@ inline idx_t DefaultThreads() {
}

return appropriate_number_threads;
#endif
}

// =====================================
Expand Down Expand Up @@ -638,14 +643,17 @@ inline unique_ptr<FunctionData> SheetreaderBindFun(ClientContext &context, Table
// Get the file name from the input parameters & verify it exists
auto file_reader = MultiFileReader::Create(input.table_function);
auto file_list = file_reader->CreateFileList(context, input.inputs[0]);
auto file_names = file_list->GetAllFiles();
auto file_infos = file_list->GetAllFiles();

if (file_names.empty()) {
if (file_infos.empty()) {
throw BinderException("No files found in path");
} else if (file_names.size() > 1) {
} else if (file_infos.size() > 1) {
throw BinderException("Only one file can be read at a time");
}

// Extract the file path from OpenFileInfo
string file_name = file_infos[0].path;

//! User specified sheet name
string sheet_name;
//! User specified sheet index -- starts with 1
Expand Down Expand Up @@ -681,12 +689,12 @@ inline unique_ptr<FunctionData> SheetreaderBindFun(ClientContext &context, Table

try {
if (!sheet_name.empty()) {
bind_data = make_uniq<SRBindData>(file_names[0], sheet_name);
bind_data = make_uniq<SRBindData>(file_name, sheet_name);
} else if (sheet_index_set) {
bind_data = make_uniq<SRBindData>(file_names[0], sheet_index);
bind_data = make_uniq<SRBindData>(file_name, sheet_index);
} else {
// Default: sheet_index=1
bind_data = make_uniq<SRBindData>(file_names[0]);
bind_data = make_uniq<SRBindData>(file_name);
}
} catch (std::exception &e) {
throw BinderException(e.what());
Expand Down Expand Up @@ -940,7 +948,7 @@ inline unique_ptr<FunctionData> SheetreaderBindFun(ClientContext &context, Table
return std::move(bind_data);
}

static void LoadInternal(DatabaseInstance &instance) {
static void LoadInternal(ExtensionLoader &loader) {
// Register a table function
TableFunction sheetreader_table_function("sheetreader", {LogicalType::VARCHAR}, SheetreaderCopyTableFun,
SheetreaderBindFun, SRGlobalTableFunctionState::Init,
Expand All @@ -959,28 +967,41 @@ static void LoadInternal(DatabaseInstance &instance) {
sheetreader_table_function.named_parameters["force_types"] = LogicalType::BOOLEAN;
sheetreader_table_function.named_parameters["coerce_to_string"] = LogicalType::BOOLEAN;

ExtensionUtil::RegisterFunction(instance, sheetreader_table_function);
loader.RegisterFunction(sheetreader_table_function);
}

void SheetreaderExtension::Load(DuckDB &db) {
LoadInternal(*db.instance);
void SheetreaderExtension::Load(ExtensionLoader &loader) {
LoadInternal(loader);
}
std::string SheetreaderExtension::Name() {
return "sheetreader";
}

std::string SheetreaderExtension::Version() const {
#ifdef EXT_VERSION_SHEETREADER
return EXT_VERSION_SHEETREADER;
#else
return "";
#endif
}

} // namespace duckdb

extern "C" {

DUCKDB_EXTENSION_API void sheetreader_init(duckdb::DatabaseInstance &db) {
duckdb::DuckDB db_wrapper(db);
db_wrapper.LoadExtension<duckdb::SheetreaderExtension>();
db_wrapper.LoadStaticExtension<duckdb::SheetreaderExtension>();
}

DUCKDB_EXTENSION_API const char *sheetreader_version() {
return duckdb::DuckDB::LibraryVersion();
}

DUCKDB_EXTENSION_API void sheetreader_duckdb_cpp_init(duckdb::ExtensionLoader &loader) {
duckdb::SheetreaderExtension extension;
extension.Load(loader);
}
}

#ifndef DUCKDB_EXTENSION_MAIN
Expand Down
Loading