diff --git a/.dockerignore b/.dockerignore
index 4b0c03a40..36bf089a4 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -41,4 +41,4 @@ coverage/
# Docker
.dockerignore
-Dockerfile
\ No newline at end of file
+Dockerfile
diff --git a/.github/workflows/codeql.yaml b/.github/workflows/codeql.yaml
index c57baf24d..68f900727 100644
--- a/.github/workflows/codeql.yaml
+++ b/.github/workflows/codeql.yaml
@@ -32,4 +32,4 @@ jobs:
uses: github/codeql-action/autobuild@v3
- name: Perform CodeQL Analysis
- uses: github/codeql-action/analyze@v3
\ No newline at end of file
+ uses: github/codeql-action/analyze@v3
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index f8a8dd1ec..51b706c20 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -26,4 +26,4 @@ jobs:
# Run the mkdocs command using Poetry's environment
- name: Deploy documentation
run: |
- poetry run mkdocs gh-deploy --force
\ No newline at end of file
+ poetry run mkdocs gh-deploy --force
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 000000000..1f8b1e094
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,51 @@
+name: Lint
+
+permissions:
+ contents: read
+
+on:
+ push:
+ branches: [main]
+ paths:
+ - '**.py'
+ - 'pyproject.toml'
+ - '.github/workflows/lint.yml'
+ pull_request:
+ branches: [main]
+ paths:
+ - '**.py'
+ - 'pyproject.toml'
+ - '.github/workflows/lint.yml'
+ workflow_dispatch:
+
+jobs:
+ ruff:
+ name: Ruff Linting
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python 3.11
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.11'
+ cache: 'pip'
+
+ - name: Install Poetry
+ run: |
+ curl -sSL https://install.python-poetry.org | python -
+ echo "$HOME/.local/bin" >> $GITHUB_PATH
+
+ - name: Install dependencies
+ run: |
+ poetry install --no-interaction --no-ansi
+
+ - name: Run ruff check
+ run: |
+ poetry run ruff check --output-format=github .
+
+ - name: Run ruff format check
+ run: |
+ poetry run ruff format --check .
diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml
index b7e2242c5..09d2fd542 100644
--- a/.github/workflows/pypi-release.yml
+++ b/.github/workflows/pypi-release.yml
@@ -33,10 +33,10 @@ jobs:
cd flowfile_frontend
npm install
npm run build:web
-
+
# Create the static directory if it doesn't exist
mkdir -p ../flowfile/flowfile/web/static
-
+
# Copy the built files to the Python package
cp -r build/renderer/* ../flowfile/flowfile/web/static/
echo "Contents of web/static directory:"
@@ -81,4 +81,4 @@ jobs:
with:
skip-existing: true
packages-dir: dist/
- verbose: true
\ No newline at end of file
+ verbose: true
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index eb2c54991..2654432e9 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -112,4 +112,4 @@ jobs:
echo "GitHub Release created."
echo "Tag: ${{ github.ref_name }}"
echo "Release ID: ${{ steps.create_release.outputs.id }}"
- echo "Assets have been uploaded to the GitHub Release."
\ No newline at end of file
+ echo "Assets have been uploaded to the GitHub Release."
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index ee10edc9d..d96b00b1a 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -52,7 +52,7 @@ jobs:
cd flowfile_frontend
npm install
npm run build:web
-
+
# Create the static directory if it doesn't exist
mkdir -p ../flowfile/flowfile/web/static
@@ -112,7 +112,7 @@ jobs:
cd flowfile_frontend
npm install
npm run build:web
-
+
# Create the static directory if it doesn't exist
New-Item -ItemType Directory -Force -Path ../flowfile/flowfile/web/static | Out-Null
@@ -283,4 +283,4 @@ jobs:
shell: pwsh
working-directory: flowfile_frontend
run: |
- npm run test
\ No newline at end of file
+ npm run test
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 000000000..6bc22c6d2
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,39 @@
+# Pre-commit hooks configuration
+# See https://pre-commit.com for more information
+
+repos:
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ # Ruff version should match the version in pyproject.toml
+ rev: v0.8.6
+ hooks:
+ # Run the linter
+ - id: ruff
+ args: [--fix]
+ types_or: [python, pyi]
+ # Run the formatter
+ - id: ruff-format
+ types_or: [python, pyi]
+
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v5.0.0
+ hooks:
+ # Identify invalid files
+ - id: check-ast
+ - id: check-yaml
+ - id: check-json
+ exclude: 'tsconfig.*\.json$' # Exclude TypeScript config files (they use JSONC with comments)
+ - id: check-toml
+ # Check for files that would conflict in case-insensitive filesystems
+ - id: check-case-conflict
+ # Check for merge conflicts
+ - id: check-merge-conflict
+ # Check for debugger imports
+ - id: debug-statements
+ # Make sure files end with newline
+ - id: end-of-file-fixer
+ # Trim trailing whitespace
+ - id: trailing-whitespace
+ args: [--markdown-linebreak-ext=md]
+ # Check for large files
+ - id: check-added-large-files
+ args: ['--maxkb=1000']
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 000000000..508ca4ba1
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,142 @@
+# Contributing to Flowfile
+
+Thank you for your interest in contributing to Flowfile! This guide will help you set up your development environment and understand our code quality standards.
+
+## Development Setup
+
+### Prerequisites
+
+- Python 3.10 or higher (but less than 3.14)
+- [Poetry](https://python-poetry.org/docs/#installation) for dependency management
+- Git
+
+### Initial Setup
+
+1. **Clone the repository**
+ ```bash
+ git clone https://github.com/Edwardvaneechoud/Flowfile.git
+ cd Flowfile
+ ```
+
+2. **Install dependencies**
+ ```bash
+ poetry install
+ ```
+
+3. **Install pre-commit hooks** (recommended)
+ ```bash
+ poetry run pre-commit install
+ ```
+
+ This will automatically run linting and formatting checks before each commit.
+
+## Code Quality
+
+### Linting with Ruff
+
+We use [Ruff](https://docs.astral.sh/ruff/) for linting and code formatting. Ruff is configured in `pyproject.toml`.
+
+**Run linting manually:**
+```bash
+# Check for linting issues
+poetry run ruff check .
+
+# Auto-fix linting issues
+poetry run ruff check --fix .
+
+# Check code formatting
+poetry run ruff format --check .
+
+# Format code
+poetry run ruff format .
+```
+
+**Configuration:**
+- Target: Python 3.10+
+- Line length: 120 characters
+- Rules: F (Pyflakes), E/W (pycodestyle), I (isort), UP (pyupgrade), B (flake8-bugbear)
+
+### Pre-commit Hooks
+
+Pre-commit hooks automatically run before each commit to ensure code quality. They will:
+
+1. **Ruff linting** - Check and auto-fix Python code issues
+2. **Ruff formatting** - Format Python code consistently
+3. **File checks** - Validate YAML, JSON, TOML, and Python syntax
+4. **Trailing whitespace** - Remove unnecessary whitespace
+5. **End of file** - Ensure files end with a newline
+6. **Merge conflicts** - Detect merge conflict markers
+7. **Large files** - Prevent committing large files (>1MB)
+
+**Skip pre-commit hooks** (not recommended):
+```bash
+git commit --no-verify -m "Your commit message"
+```
+
+**Run pre-commit manually on all files:**
+```bash
+poetry run pre-commit run --all-files
+```
+
+### Continuous Integration
+
+Our GitHub Actions workflows automatically run:
+
+- **Linting** (`lint.yml`) - Runs ruff check and format validation on all PRs
+- **Tests** (`test-docker-auth.yml`, `e2e-tests.yml`) - Runs test suites
+- **Documentation** (`documentation.yml`) - Builds and deploys docs
+
+All checks must pass before a PR can be merged.
+
+## Running Tests
+
+```bash
+# Run all tests
+poetry run pytest
+
+# Run tests for a specific module
+poetry run pytest flowfile_core/tests/
+poetry run pytest flowfile_worker/tests/
+
+# Run tests with coverage
+poetry run pytest --cov=flowfile_core --cov=flowfile_worker
+```
+
+## Code Style Guidelines
+
+- Follow [PEP 8](https://pep8.org/) style guidelines (enforced by Ruff)
+- Use type hints where appropriate
+- Write descriptive variable and function names
+- Keep functions focused and modular
+- Add docstrings for public functions and classes
+- Keep line length under 120 characters
+
+## Submitting Changes
+
+1. **Create a new branch** for your feature or fix:
+ ```bash
+ git checkout -b feature/your-feature-name
+ ```
+
+2. **Make your changes** and ensure all tests pass
+
+3. **Commit your changes** (pre-commit hooks will run automatically):
+ ```bash
+ git add .
+ git commit -m "Add your descriptive commit message"
+ ```
+
+4. **Push to your fork**:
+ ```bash
+ git push origin feature/your-feature-name
+ ```
+
+5. **Create a Pull Request** on GitHub
+
+## Getting Help
+
+- Check the [documentation](https://edwardvaneechoud.github.io/Flowfile/)
+- Open an issue on GitHub
+- Read the [architecture documentation](docs/for-developers/architecture.md)
+
+Thank you for contributing to Flowfile! 🚀
diff --git a/README.md b/README.md
index 05cb56a5b..392bd3340 100644
--- a/README.md
+++ b/README.md
@@ -104,7 +104,7 @@ For a deeper dive into the technical architecture, check out [this article](http
#### 1. Desktop Application
The desktop version offers the best experience with a native interface and integrated services. You can either:
-**Option A: Download Pre-built Application**
+**Option A: Download Pre-built Application**
- Download the latest release from [GitHub Releases](https://github.com/Edwardvaneechoud/Flowfile/releases)
- Run the installer for your platform (Windows, macOS, or Linux)
> **Note:** You may see security warnings since the app isn't signed with a developer certificate yet.
diff --git a/build_backends/build_backends/main.py b/build_backends/build_backends/main.py
index d8ac6c21e..54ac460d5 100644
--- a/build_backends/build_backends/main.py
+++ b/build_backends/build_backends/main.py
@@ -50,7 +50,7 @@ def get_connectorx_metadata():
for dist_info in glob.glob(dist_info_pattern):
metadata_locations.append(dist_info)
- # Look for egg-info directories
+ # Look for egg-info directories
egg_info_pattern = os.path.join(site_packages, 'connectorx*.egg-info')
for egg_info in glob.glob(egg_info_pattern):
metadata_locations.append(egg_info)
@@ -126,7 +126,7 @@ def patched_version(distribution_name):
"""
# Collect minimal snowflake dependencies
-snowflake_imports = collect_submodules('snowflake.connector',
+snowflake_imports = collect_submodules('snowflake.connector',
filter=lambda name: any(x in name for x in [
'connection',
'errors',
diff --git a/build_backends/build_backends/main_prd.py b/build_backends/build_backends/main_prd.py
index c4f41522c..b9584e0d1 100644
--- a/build_backends/build_backends/main_prd.py
+++ b/build_backends/build_backends/main_prd.py
@@ -22,7 +22,7 @@ def wait_for_endpoint(url, timeout=60):
def shutdown_service():
"""Shutdown the service gracefully using the shutdown endpoint."""
try:
- response = requests.post("http://0.0.0.0:63578/shutdown", headers={"accept": "application/json"}, data="")
+ requests.post("http://0.0.0.0:63578/shutdown", headers={"accept": "application/json"}, data="")
print("Shutdown request sent, waiting for service to stop...")
time.sleep(1) # Wait 10 seconds to ensure the service is fully stopped
return True
diff --git a/docker-compose.yml b/docker-compose.yml
index 7686581fb..8452d4dc3 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -88,4 +88,4 @@ volumes:
secrets:
flowfile_master_key:
- file: ./master_key.txt
\ No newline at end of file
+ file: ./master_key.txt
diff --git a/docs/MakeFile b/docs/MakeFile
index 3d0ce63c6..8cf77a9cf 100644
--- a/docs/MakeFile
+++ b/docs/MakeFile
@@ -20,4 +20,4 @@ clean:
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
- @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/for-developers/architecture.md b/docs/for-developers/architecture.md
index 51556a41c..bc999dc27 100644
--- a/docs/for-developers/architecture.md
+++ b/docs/for-developers/architecture.md
@@ -239,4 +239,4 @@ This design enables Flowfile to:
---
-*For a deep dive into the implementation details, see the [full technical article](https://dev.to/edwardvaneechoud/building-flowfile-architecting-a-visual-etl-tool-with-polars-576c).*
\ No newline at end of file
+*For a deep dive into the implementation details, see the [full technical article](https://dev.to/edwardvaneechoud/building-flowfile-architecting-a-visual-etl-tool-with-polars-576c).*
diff --git a/docs/for-developers/creating-custom-nodes.md b/docs/for-developers/creating-custom-nodes.md
index a9e8a218d..7b2b3179e 100644
--- a/docs/for-developers/creating-custom-nodes.md
+++ b/docs/for-developers/creating-custom-nodes.md
@@ -73,7 +73,7 @@ class GreetingNode(CustomNodeBase):
node_category: str = "Text Processing"
title: str = "Add Personal Greetings"
intro: str = "Transform names into personalized greetings"
-
+
settings_schema: GreetingSettings = GreetingSettings()
def process(self, input_df: pl.LazyFrame) -> pl.LazyFrame:
@@ -81,27 +81,27 @@ class GreetingNode(CustomNodeBase):
name_col = self.settings_schema.main_config.name_column.value
style = self.settings_schema.main_config.greeting_style.value
custom = self.settings_schema.main_config.custom_message.value
-
+
# Define greeting logic
if style == "formal":
greeting_expr = pl.concat_str([
- pl.lit("Hello, "),
- pl.col(name_col),
+ pl.lit("Hello, "),
+ pl.col(name_col),
pl.lit(f". {custom}")
])
elif style == "casual":
greeting_expr = pl.concat_str([
- pl.lit("Hey "),
- pl.col(name_col),
+ pl.lit("Hey "),
+ pl.col(name_col),
pl.lit(f"! {custom}")
])
else: # enthusiastic
greeting_expr = pl.concat_str([
- pl.lit("OMG HI "),
- pl.col(name_col).str.to_uppercase(),
+ pl.lit("OMG HI "),
+ pl.col(name_col).str.to_uppercase(),
pl.lit(f"!!! {custom} 🎉")
])
-
+
return input_df.with_columns([
greeting_expr.alias("greeting")
])
@@ -141,7 +141,7 @@ class MyCustomNode(CustomNodeBase):
# 2. Settings Schema - The UI configuration
settings_schema: MySettings = MySettings()
-
+
# 3. Processing Logic - What the node actually does
def process(self, input_df: pl.LazyFrame) -> pl.LazyFrame:
# Your transformation logic here
@@ -184,7 +184,7 @@ class MyNodeSettings(NodeSettings):
input_column=ColumnSelector(...),
operation_type=SingleSelect(...)
)
-
+
advanced_options: Section = Section(
title="Advanced Options",
description="Fine-tune behavior",
@@ -341,7 +341,7 @@ data_types=[Types.Numeric, Types.Date] # Numbers and dates only
class DataQualityNode(CustomNodeBase):
node_name: str = "Data Quality Checker"
node_category: str = "Data Validation"
-
+
settings_schema: DataQualitySettings = DataQualitySettings(
validation_rules=Section(
title="Validation Rules",
@@ -366,7 +366,7 @@ class DataQualityNode(CustomNodeBase):
def process(self, input_df: pl.LazyFrame) -> pl.LazyFrame:
columns = self.settings_schema.validation_rules.columns_to_check.value
threshold = self.settings_schema.validation_rules.null_threshold.value
-
+
# Calculate quality metrics
quality_checks = []
for col in columns:
@@ -376,7 +376,7 @@ class DataQualityNode(CustomNodeBase):
"null_percentage": null_pct,
"quality_flag": "PASS" if null_pct <= threshold else "FAIL"
})
-
+
# Add quality flags to original data
result_df = input_df
for check in quality_checks:
@@ -384,7 +384,7 @@ class DataQualityNode(CustomNodeBase):
result_df = result_df.with_columns([
pl.col(check["column"]).is_null().alias(f"{check['column']}_has_issues")
])
-
+
return result_df
```
@@ -394,7 +394,7 @@ class DataQualityNode(CustomNodeBase):
class TextCleanerNode(CustomNodeBase):
node_name: str = "Text Cleaner"
node_category: str = "Text Processing"
-
+
settings_schema: TextCleanerSettings = TextCleanerSettings(
cleaning_options=Section(
title="Cleaning Options",
@@ -425,10 +425,10 @@ class TextCleanerNode(CustomNodeBase):
text_col = self.settings_schema.cleaning_options.text_column.value
operations = self.settings_schema.cleaning_options.operations.value
output_col = self.settings_schema.cleaning_options.output_column.value
-
+
# Start with original text
expr = pl.col(text_col)
-
+
# Apply selected operations
if "lowercase" in operations:
expr = expr.str.to_lowercase()
@@ -440,14 +440,14 @@ class TextCleanerNode(CustomNodeBase):
expr = expr.str.replace_all(r"\d+", "")
if "trim" in operations:
expr = expr.str.strip_chars()
-
+
return input_df.with_columns([expr.alias(output_col)])
```
## Best Practices
### 1. Performance
-Try to use Polars expressions and lazy evaluation to keep your nodes efficient.
+Try to use Polars expressions and lazy evaluation to keep your nodes efficient.
A collect will be executed in the core process and can cause issues when using remote compute.
@@ -484,4 +484,4 @@ The following features are planned for future releases:
---
-Ready to build? Start with the [Custom Node Tutorial](custom-node-tutorial.md) for a step-by-step walkthrough!
\ No newline at end of file
+Ready to build? Start with the [Custom Node Tutorial](custom-node-tutorial.md) for a step-by-step walkthrough!
diff --git a/docs/for-developers/custom-node-tutorial.md b/docs/for-developers/custom-node-tutorial.md
index a348fad9e..4dc1c5ad3 100644
--- a/docs/for-developers/custom-node-tutorial.md
+++ b/docs/for-developers/custom-node-tutorial.md
@@ -44,7 +44,7 @@ from typing import List
from flowfile_core.flowfile.node_designer import (
CustomNodeBase,
- Section,
+ Section,
NodeSettings,
TextInput,
NumericInput,
@@ -70,7 +70,7 @@ class EmojiMoodSection(Section):
required=True,
data_types=Types.Numeric # Only show numeric columns
)
-
+
mood_type: SingleSelect = SingleSelect(
label="Emoji Mood Logic",
options=[
@@ -84,14 +84,14 @@ class EmojiMoodSection(Section):
],
default="performance"
)
-
+
threshold_value: NumericInput = NumericInput(
label="Mood Threshold",
default=50.0,
min_value=0,
max_value=100
)
-
+
emoji_column_name: TextInput = TextInput(
label="New Emoji Column Name",
default="mood_emoji",
@@ -113,13 +113,13 @@ class EmojiStyleSection(Section):
],
default="normal"
)
-
+
add_random_sparkle: ToggleSwitch = ToggleSwitch(
label="Add Random Sparkles ✨",
default=True,
description="Randomly sprinkle ✨ for extra pizzazz"
)
-
+
emoji_categories: MultiSelect = MultiSelect(
label="Allowed Emoji Categories",
options=[
@@ -143,7 +143,7 @@ class EmojiSettings(NodeSettings):
title="Mood Detection 😊",
description="Configure how to detect the vibe of your data"
)
-
+
style_options: EmojiStyleSection = EmojiStyleSection(
title="Emoji Style 🎨",
description="Fine-tune your emoji experience"
@@ -160,11 +160,11 @@ class EmojiGenerator(CustomNodeBase):
node_group: str = "custom"
title: str = "Emoji Generator"
intro: str = "Transform boring data into fun emoji-filled datasets! 🚀"
-
+
# I/O configuration
number_of_inputs: int = 1
number_of_outputs: int = 1
-
+
# Link to our settings schema
settings_schema: EmojiSettings = EmojiSettings()
@@ -573,10 +573,9 @@ Save this as `~/.flowfile/user_defined_nodes/emoji_generator.py`, restart Flowfi
## Congratulations! 🎉
-You've successfully created a fully functional custom node
+You've successfully created a fully functional custom node
- ✅ Multi-section UI with 6 different component types
- ✅ Complex processing logic with multiple mood themes
- ✅ Advanced features like intensity control and random effects
- ✅ Professional documentation and structure
-
diff --git a/docs/for-developers/design-philosophy.md b/docs/for-developers/design-philosophy.md
index e79bc84e7..59188fba2 100644
--- a/docs/for-developers/design-philosophy.md
+++ b/docs/for-developers/design-philosophy.md
@@ -84,7 +84,7 @@ graph: FlowGraph = ff.create_flow_graph()
df_1 = ff.FlowFrame(raw_data, flow_graph=graph)
df_2 = df_1.with_columns(
- flowfile_formulas=['[quantity] * [price]'],
+ flowfile_formulas=['[quantity] * [price]'],
output_column_names=["total"]
)
@@ -103,9 +103,9 @@ df_4 = df_3.group_by(['region']).agg([
```python
# Access all nodes that were created in the graph
print(graph._node_db)
-# {1: Node id: 1 (manual_input),
-# 3: Node id: 3 (formula),
-# 4: Node id: 4 (filter),
+# {1: Node id: 1 (manual_input),
+# 3: Node id: 3 (formula),
+# 4: Node id: 4 (filter),
# 5: Node id: 5 (group_by)}
# Find the starting node(s) of the graph
@@ -118,7 +118,7 @@ print(graph.get_node(1).leads_to_nodes)
# The other way around works too
print(graph.get_node(3).node_inputs)
-# NodeStepInputs(Left Input: None, Right Input: None,
+# NodeStepInputs(Left Input: None, Right Input: None,
# Main Inputs: [Node id: 1 (manual_input)])
# Access the settings and type of any node
@@ -138,7 +138,7 @@ flow = ff.create_flow_graph()
# Node 1: Manual input
node_manual_input = node_interface.NodeManualInput(
- flow_id=flow.flow_id,
+ flow_id=flow.flow_id,
node_id=1,
raw_data_format=RawData.from_pylist(raw_data)
)
@@ -150,14 +150,14 @@ formula_node = node_interface.NodeFormula(
node_id=2,
function=transformation_settings.FunctionInput(
field=transformation_settings.FieldInput(
- name="total",
+ name="total",
data_type="Double"
),
function="[quantity] * [price]"
)
)
flow.add_formula(formula_node)
-add_connection(flow,
+add_connection(flow,
node_interface.NodeConnection.create_from_simple_input(1, 2))
# Node 3: Filter high value transactions
@@ -170,7 +170,7 @@ filter_node = node_interface.NodeFilter(
)
)
flow.add_filter(filter_node)
-add_connection(flow,
+add_connection(flow,
node_interface.NodeConnection.create_from_simple_input(2, 3))
# Node 4: Group by region
@@ -186,7 +186,7 @@ group_by_node = node_interface.NodeGroupBy(
)
)
flow.add_group_by(group_by_node)
-add_connection(flow,
+add_connection(flow,
node_interface.NodeConnection.create_from_simple_input(3, 4))
```
@@ -194,8 +194,8 @@ add_connection(flow,
```python
# Check the schema at any node
print([s.get_minimal_field_info() for s in flow.get_node(4).schema])
-# [MinimalFieldInfo(name='region', data_type='String'),
-# MinimalFieldInfo(name='total_revenue', data_type='Float64'),
+# [MinimalFieldInfo(name='region', data_type='String'),
+# MinimalFieldInfo(name='total_revenue', data_type='Float64'),
# MinimalFieldInfo(name='avg_transaction', data_type='Float64')]
```
@@ -206,7 +206,7 @@ This is the polars query plan generated by both methods:
```
AGGREGATE[maintain_order: false]
- [col("total").sum().alias("total_revenue"),
+ [col("total").sum().alias("total_revenue"),
col("total").mean().alias("avg_transaction")] BY [col("region")]
FROM
FILTER [(col("total")) > (1500)]
@@ -270,7 +270,7 @@ class AggColl:
!!! tip "Settings Power The Backend"
This dual structure—Nodes for graph metadata, Settings for transformation logic—drives the backend:
-
+
- 🔧 **Code generation** (method signatures match settings)
- 💾 **Serialization** (graphs can be saved/loaded)
- 🔮 **Schema prediction** (output types are inferred from AggColl)
@@ -295,21 +295,21 @@ The `FlowNode` class is the heart of each transformation in the graph. Each node
!!! info "Core FlowNode Components"
**Essential State:**
-
+
- **`_function`**: The closure containing the transformation logic
- **`leads_to_nodes`**: List of downstream nodes that depend on this one
- **`node_information`**: Metadata (id, type, position, connections)
- **`_hash`**: Unique identifier based on settings and parent hashes
-
+
**Runtime State:**
-
+
- **`results`**: Holds the resulting data, errors, and example data paths
- **`node_stats`**: Tracks execution status (has_run, is_canceled, etc.)
- **`node_settings`**: Runtime settings (cache_results, streamable, etc.)
- **`state_needs_reset`**: Flag indicating if the node needs recalculation
-
+
**Schema Information:**
-
+
- **`node_schema`**: Input/output columns and predicted schemas
- **`schema_callback`**: Function to calculate schema without execution
@@ -370,7 +370,7 @@ def add_group_by(self, group_by_settings: input_schema.NodeGroupBy):
# The closure: captures group_by_settings
def _func(fl: FlowDataEngine) -> FlowDataEngine:
return fl.do_group_by(group_by_settings.groupby_input, False)
-
+
self.add_node_step(
node_id=group_by_settings.node_id,
function=_func, # This closure remembers group_by_settings!
@@ -384,7 +384,7 @@ def add_union(self, union_settings: input_schema.NodeUnion):
def _func(*flowfile_tables: FlowDataEngine):
dfs = [flt.data_frame for flt in flowfile_tables]
return FlowDataEngine(pl.concat(dfs, how='diagonal_relaxed'))
-
+
self.add_node_step(
node_id=union_settings.node_id,
function=_func, # This closure has everything it needs
@@ -416,7 +416,7 @@ print(result.data_frame.collect_schema())
2. **Functions only need FlowDataEngine as input** (or multiple for joins/unions)
3. **LazyFrame tracks schema changes** through the entire chain
4. **No data is processed**—Polars just builds the query plan
-
+
The result: instant schema feedback without running expensive computations!
### Fallback: Schema Callbacks
@@ -506,16 +506,16 @@ graph LR
subgraph "Frontend"
A[Designer
Vue/Electron]
end
-
+
subgraph "Backend"
B[Core Service
FastAPI]
C[Worker Service
FastAPI]
end
-
+
subgraph "Storage"
D[Arrow IPC
Cache]
end
-
+
A <-->|Settings/Schema| B
B <-->|Execution| C
C <-->|Data| D
@@ -526,12 +526,12 @@ graph LR
- Visual graph building interface
- Node configuration forms (manually implemented)
- Real-time schema feedback
-
+
**Core:**
- DAG management
- Execution orchestration
- Schema prediction
-
+
**Worker:**
- Polars transformations
- Data caching (Arrow IPC)
@@ -557,7 +557,7 @@ flowfile/
!!! warning "Current State of Node Development"
While the backend architecture elegantly uses settings-driven nodes, adding new nodes requires work across multiple layers. The frontend currently requires manual implementation for each node type—the visual editor doesn't automatically generate forms from Pydantic schemas yet.
-
+
However, there are also opportunities for more focused contributions! Integration with databases and cloud services is needed—these are smaller, more targeted tasks since the core structure is already in place. There's a lot of active development happening, so it's an exciting time to contribute!
### Adding a New Node: The Full Picture
@@ -580,7 +580,7 @@ def add_custom_transform(self, transform_settings: input_schema.NodeCustomTransf
# Create the closure that captures settings
def _func(fl: FlowDataEngine) -> FlowDataEngine:
return fl.do_custom_transform(transform_settings.transform_input)
-
+
# Register with the graph
self.add_node_step(
node_id=transform_settings.node_id,
@@ -589,7 +589,7 @@ def add_custom_transform(self, transform_settings: input_schema.NodeCustomTransf
setting_input=transform_settings,
input_node_ids=[transform_settings.depending_on_id]
)
-
+
# Don't forget schema prediction!
node = self.get_node(transform_settings.node_id)
# ... schema callback setup ...
@@ -610,7 +610,7 @@ This manual process ensures full control over the UI/UX but requires significant
The goal is to eventually auto-generate UI from Pydantic schemas, which would complete the settings-driven architecture. This would make adding new nodes closer to just defining the backend settings and transformation logic, with the UI automatically following.
-The beauty of Flowfile's architecture—discovered through the organic evolution from a UI-first approach—is that even though adding nodes requires work across multiple layers today, the settings-based design provides a clear contract between visual and code interfaces.
+The beauty of Flowfile's architecture—discovered through the organic evolution from a UI-first approach—is that even though adding nodes requires work across multiple layers today, the settings-based design provides a clear contract between visual and code interfaces.
I hope you enjoyed learning about Flowfile's architecture and found the dual-interface approach as exciting as I do! If you have questions, ideas, or want to contribute, ]
-feel free to reach out via [GitHub](https://github.com/edwardvaneechoud/Flowfile) or check our [Core Developer Guide](flowfile-core.md). Happy building!
\ No newline at end of file
+feel free to reach out via [GitHub](https://github.com/edwardvaneechoud/Flowfile) or check our [Core Developer Guide](flowfile-core.md). Happy building!
diff --git a/docs/for-developers/flowfile-core.md b/docs/for-developers/flowfile-core.md
index c14edef5f..f9dad6f55 100644
--- a/docs/for-developers/flowfile-core.md
+++ b/docs/for-developers/flowfile-core.md
@@ -72,7 +72,7 @@ Settings: #<-- The FlowSettings object you provid
```
-```python
+```python
print(graph.run_graph())
# flow_id=1 start_time=datetime.datetime(...) end_time=datetime.datetime(...) success=True nodes_completed=0 number_of_nodes=0 node_step_result=[]
diff --git a/docs/for-developers/index.md b/docs/for-developers/index.md
index b4d92535a..c2e1189f3 100644
--- a/docs/for-developers/index.md
+++ b/docs/for-developers/index.md
@@ -80,4 +80,3 @@ We welcome contributions! Adding a new node requires changes across the stack:
- **Frontend**: Currently, you must also manually create a Vue component for the node's configuration form in the visual editor.
For a more detailed breakdown, please read the **[Contributing section in our Design Philosophy guide](design-philosophy.md#contributing)**.
-
diff --git a/docs/for-developers/python-api-reference.md b/docs/for-developers/python-api-reference.md
index c171bf120..ce1161d3d 100644
--- a/docs/for-developers/python-api-reference.md
+++ b/docs/for-developers/python-api-reference.md
@@ -17,7 +17,7 @@ The `FlowGraph` is the central object that orchestrates the execution of data tr
show_signature: true
show_source: true
heading_level: 4
- show_symbol_type_heading: true
+ show_symbol_type_heading: true
show_root_members_full_path: false
summary: true
unwrap_annotated: true
@@ -32,7 +32,7 @@ The `FlowNode` represents a single operation in the `FlowGraph`. Each node corre
show_signature: true
show_source: true
heading_level: 4
- show_symbol_type_heading: true
+ show_symbol_type_heading: true
show_root_members_full_path: false
summary: true
unwrap_annotated: true
@@ -47,7 +47,7 @@ The `FlowDataEngine` is the primary engine of the library, providing a rich API
show_signature: true
show_source: true
heading_level: 4
- show_symbol_type_heading: true
+ show_symbol_type_heading: true
show_root_members_full_path: false
summary: true
unwrap_annotated: true
@@ -83,7 +83,7 @@ This section documents the Pydantic models that define the structure of settings
show_signature: true
show_source: true
heading_level: 4
- show_symbol_type_heading: true
+ show_symbol_type_heading: true
show_root_members_full_path: false
summary: true
unwrap_annotated: true
@@ -97,7 +97,7 @@ This section documents the Pydantic models that define the structure of settings
show_signature: true
show_source: true
heading_level: 4
- show_symbol_type_heading: true
+ show_symbol_type_heading: true
show_root_members_full_path: false
summary: true
unwrap_annotated: true
@@ -111,7 +111,7 @@ This section documents the Pydantic models that define the structure of settings
show_signature: true
show_source: true
heading_level: 4
- show_symbol_type_heading: true
+ show_symbol_type_heading: true
show_root_members_full_path: false
summary: true
unwrap_annotated: true
@@ -125,7 +125,7 @@ This section documents the Pydantic models that define the structure of settings
show_signature: true
show_source: true
heading_level: 4
- show_symbol_type_heading: true
+ show_symbol_type_heading: true
show_root_members_full_path: false
summary: true
unwrap_annotated: true
@@ -139,7 +139,7 @@ This section documents the Pydantic models that define the structure of settings
show_signature: true
show_source: true
heading_level: 4
- show_symbol_type_heading: true
+ show_symbol_type_heading: true
show_root_members_full_path: false
summary: true
unwrap_annotated: true
@@ -158,7 +158,7 @@ This section documents the FastAPI routes that expose `flowfile-core`'s function
show_signature: true
show_source: true
heading_level: 4
- show_symbol_type_heading: true
+ show_symbol_type_heading: true
show_root_members_full_path: false
summary: true
unwrap_annotated: true
@@ -171,7 +171,7 @@ This section documents the FastAPI routes that expose `flowfile-core`'s function
show_signature: true
show_source: true
heading_level: 4
- show_symbol_type_heading: true
+ show_symbol_type_heading: true
show_root_members_full_path: false
summary: true
unwrap_annotated: true
@@ -185,7 +185,7 @@ This section documents the FastAPI routes that expose `flowfile-core`'s function
show_signature: true
show_source: true
heading_level: 4
- show_symbol_type_heading: true
+ show_symbol_type_heading: true
show_root_members_full_path: false
summary: true
unwrap_annotated: true
@@ -198,7 +198,7 @@ This section documents the FastAPI routes that expose `flowfile-core`'s function
show_signature: true
show_source: true
heading_level: 4
- show_symbol_type_heading: true
+ show_symbol_type_heading: true
show_root_members_full_path: false
summary: true
unwrap_annotated: true
@@ -211,7 +211,7 @@ This section documents the FastAPI routes that expose `flowfile-core`'s function
show_signature: true
show_source: true
heading_level: 4
- show_symbol_type_heading: true
+ show_symbol_type_heading: true
show_root_members_full_path: false
summary: true
unwrap_annotated: true
@@ -224,9 +224,9 @@ This section documents the FastAPI routes that expose `flowfile-core`'s function
show_signature: true
show_source: true
heading_level: 4
- show_symbol_type_heading: true
+ show_symbol_type_heading: true
show_root_members_full_path: false
summary: true
unwrap_annotated: true
show_symbol_type_toc: true
----
\ No newline at end of file
+---
diff --git a/docs/index.html b/docs/index.html
index a957f70b4..6855dc381 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -957,4 +957,4 @@