Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [1.4.5] - 2026-02-02

### Added
- **Inferencer**: Row count estimates and row samples
- **Discard disconnected vertices**: Option to discard disconnected vertices during graph operations

### Changed
- **clean_start**: Refactored into `recreate_schema` and `clear_data` for clearer separation of schema and data reset
- **output_config**: Renamed to `target_db_config`

## [1.4.3] - 2026-01-25

### Added
Expand Down
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,27 +129,27 @@ from graflo.hq import GraphEngine
# Option 1: Use GraphEngine for schema definition and ingestion (recommended)
engine = GraphEngine()
ingestion_params = IngestionParams(
clean_start=False, # Set to True to wipe existing database
recreate_schema=False, # Set to True to drop and redefine schema (script halts if schema exists)
# max_items=1000, # Optional: limit number of items to process
# batch_size=10000, # Optional: customize batch size
)

engine.define_and_ingest(
schema=schema,
output_config=conn_conf, # Target database config
target_db_config=conn_conf, # Target database config
patterns=patterns, # Source data patterns
ingestion_params=ingestion_params,
clean_start=False, # Set to True to wipe existing database
recreate_schema=False, # Set to True to drop and redefine schema (script halts if schema exists)
)

# Option 2: Use Caster directly (schema must be defined separately)
# from graflo.hq import GraphEngine
# engine = GraphEngine()
# engine.define_schema(schema=schema, output_config=conn_conf, clean_start=False)
# engine.define_schema(schema=schema, target_db_config=conn_conf, recreate_schema=False)
#
# caster = Caster(schema)
# caster.ingest(
# output_config=conn_conf,
# target_db_config=conn_conf,
# patterns=patterns,
# ingestion_params=ingestion_params,
# )
Expand Down Expand Up @@ -178,8 +178,8 @@ schema = engine.infer_schema(
target_config = ArangoConfig.from_docker_env()
engine.define_schema(
schema=schema,
output_config=target_config,
clean_start=False,
target_db_config=target_config,
recreate_schema=False,
)

# Use the inferred schema with Caster for ingestion
Expand Down
4 changes: 2 additions & 2 deletions docs/examples/example-1.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,12 @@ from graflo.hq.caster import IngestionParams
caster = Caster(schema)

ingestion_params = IngestionParams(
clean_start=False, # Set to True to wipe existing database
recreate_schema=False, # Set to True to drop and redefine schema (script halts if schema exists)
# max_items=1000, # Optional: limit number of items to process
)

caster.ingest(
output_config=conn_conf, # Target database config
target_db_config=conn_conf, # Target database config
patterns=patterns, # Source data patterns
ingestion_params=ingestion_params,
)
Expand Down
4 changes: 2 additions & 2 deletions docs/examples/example-2.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,11 @@ patterns.add_file_pattern(
from graflo.hq.caster import IngestionParams

ingestion_params = IngestionParams(
clean_start=True, # Wipe existing database before ingestion
recreate_schema=True, # Wipe existing schema before defining and ingesting
)

caster.ingest(
output_config=conn_conf, # Target database config
target_db_config=conn_conf, # Target database config
patterns=patterns, # Source data patterns
ingestion_params=ingestion_params,
)
Expand Down
4 changes: 2 additions & 2 deletions docs/examples/example-3.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,11 @@ from graflo.hq.caster import IngestionParams
caster = Caster(schema)

ingestion_params = IngestionParams(
clean_start=True, # Wipe existing database before ingestion
recreate_schema=True, # Wipe existing schema before defining and ingesting
)

caster.ingest(
output_config=conn_conf, # Target database config
target_db_config=conn_conf, # Target database config
patterns=patterns, # Source data patterns
ingestion_params=ingestion_params,
)
Expand Down
4 changes: 2 additions & 2 deletions docs/examples/example-4.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,11 +214,11 @@ from graflo.hq.caster import IngestionParams
caster = Caster(schema)

ingestion_params = IngestionParams(
clean_start=True, # Wipe existing database before ingestion
recreate_schema=True, # Wipe existing schema before defining and ingesting
)

caster.ingest(
output_config=conn_conf, # Target database config
target_db_config=conn_conf, # Target database config
patterns=patterns, # Source data patterns
ingestion_params=ingestion_params,
)
Expand Down
12 changes: 6 additions & 6 deletions docs/examples/example-5.md
Original file line number Diff line number Diff line change
Expand Up @@ -373,15 +373,15 @@ from graflo.hq.caster import IngestionParams
# Use GraphEngine for schema definition and ingestion
engine = GraphEngine()
ingestion_params = IngestionParams(
clean_start=True, # Clear existing data first
recreate_schema=True, # Drop existing schema and define new one before ingesting
)

engine.define_and_ingest(
schema=schema,
output_config=target_config, # Target graph database config
target_db_config=target_config, # Target graph database config
patterns=patterns, # PostgreSQL table patterns
ingestion_params=ingestion_params,
clean_start=True, # Clear existing data first
recreate_schema=True, # Drop existing schema and define new one before ingesting
)
```

Expand Down Expand Up @@ -436,16 +436,16 @@ patterns = engine.create_patterns(postgres_conf, schema_name="public")

# Step 7: Define schema and ingest data
ingestion_params = IngestionParams(
clean_start=True, # Clear existing data first
recreate_schema=True, # Drop existing schema and define new one before ingesting
)

# Use GraphEngine to define schema and ingest data
engine.define_and_ingest(
schema=schema,
output_config=target_config,
target_db_config=target_config,
patterns=patterns,
ingestion_params=ingestion_params,
clean_start=True, # Clear existing data first
recreate_schema=True, # Drop existing schema and define new one before ingesting
)

print("\n" + "=" * 80)
Expand Down
22 changes: 11 additions & 11 deletions docs/getting_started/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,24 +82,24 @@ from graflo.hq import GraphEngine
# Option 1: Use GraphEngine for schema definition and ingestion (recommended)
engine = GraphEngine()
ingestion_params = IngestionParams(
clean_start=False, # Set to True to wipe existing database
recreate_schema=False, # Set to True to drop and redefine schema (script halts if schema exists)
)

engine.define_and_ingest(
schema=schema,
output_config=conn_conf, # Target database config
target_db_config=conn_conf, # Target database config
patterns=patterns, # Source data patterns
ingestion_params=ingestion_params,
clean_start=False, # Set to True to wipe existing database
recreate_schema=False, # Set to True to drop and redefine schema (script halts if schema exists)
)

# Option 2: Use Caster directly (schema must be defined separately)
# engine = GraphEngine()
# engine.define_schema(schema=schema, output_config=conn_conf, clean_start=False)
# engine.define_schema(schema=schema, target_db_config=conn_conf, recreate_schema=False)
#
# caster = Caster(schema)
# caster.ingest(
# output_config=conn_conf,
# target_db_config=conn_conf,
# patterns=patterns,
# ingestion_params=ingestion_params,
# )
Expand All @@ -112,7 +112,7 @@ The `Patterns` class maps resource names (from `Schema`) to their physical data
- **TablePattern**: For PostgreSQL table resources with connection configuration

The `ingest()` method takes:
- `output_config`: Target graph database configuration (where to write the graph)
- `target_db_config`: Target graph database configuration (where to write the graph)
- `patterns`: Source data patterns (where to read data from - files or database tables)

## 🚀 Using PostgreSQL Tables as Data Sources
Expand Down Expand Up @@ -161,15 +161,15 @@ arango_config = ArangoConfig.from_docker_env() # Target graph database
# Use GraphEngine for schema definition and ingestion
engine = GraphEngine()
ingestion_params = IngestionParams(
clean_start=False, # Set to True to wipe existing database
recreate_schema=False, # Set to True to drop and redefine schema (script halts if schema exists)
)

engine.define_and_ingest(
schema=schema,
output_config=arango_config, # Target graph database
target_db_config=arango_config, # Target graph database
patterns=patterns, # Source PostgreSQL tables
ingestion_params=ingestion_params,
clean_start=False, # Set to True to wipe existing database
recreate_schema=False, # Set to True to drop and redefine schema (script halts if schema exists)
)
```

Expand Down Expand Up @@ -211,8 +211,8 @@ from graflo.hq import GraphEngine
engine = GraphEngine()
engine.define_schema(
schema=schema,
output_config=conn_conf,
clean_start=False,
target_db_config=conn_conf,
recreate_schema=False,
)

# Then ingest using Caster
Expand Down
2 changes: 1 addition & 1 deletion docs/reference/data_source/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ caster = Caster(schema)

ingestion_params = IngestionParams(
batch_size=1000, # Process 1000 items per batch
clean_start=False, # Set to True to wipe existing database
recreate_schema=False, # Set to True to drop and redefine schema
)

caster.ingest_data_sources(
Expand Down
4 changes: 2 additions & 2 deletions examples/1-ingest-csv/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@

# Create GraphEngine and define schema + ingest in one operation
engine = GraphEngine(target_db_flavor=db_type)
ingestion_params = IngestionParams(clean_start=True)
ingestion_params = IngestionParams(clear_data=True)
engine.define_and_ingest(
schema=schema,
output_config=conn_conf,
target_db_config=conn_conf,
patterns=patterns,
ingestion_params=ingestion_params,
)
4 changes: 2 additions & 2 deletions examples/2-ingest-self-references/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@

# Create GraphEngine and define schema + ingest in one operation
engine = GraphEngine(target_db_flavor=db_type)
ingestion_params = IngestionParams(clean_start=True)
ingestion_params = IngestionParams(clear_data=True)
engine.define_and_ingest(
schema=schema,
output_config=conn_conf,
target_db_config=conn_conf,
patterns=patterns,
ingestion_params=ingestion_params,
)
4 changes: 2 additions & 2 deletions examples/3-ingest-csv-edge-weights/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@

# Create GraphEngine and define schema + ingest in one operation
engine = GraphEngine(target_db_flavor=db_type)
ingestion_params = IngestionParams(clean_start=True)
ingestion_params = IngestionParams(clear_data=True)
engine.define_and_ingest(
schema=schema,
output_config=conn_conf,
target_db_config=conn_conf,
patterns=patterns,
ingestion_params=ingestion_params,
)
4 changes: 2 additions & 2 deletions examples/4-ingest-neo4j/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@
# Create GraphEngine and define schema + ingest in one operation
engine = GraphEngine(target_db_flavor=db_type)
ingestion_params = IngestionParams(
clean_start=True,
# max_items=5,
)
engine.define_and_ingest(
schema=schema,
output_config=conn_conf, # Target database config
target_db_config=conn_conf, # Target database config
patterns=patterns, # Source data patterns
ingestion_params=ingestion_params,
recreate_schema=True, # Wipe existing schema before defining and ingesting
)
8 changes: 3 additions & 5 deletions examples/5-ingest-postgres/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,10 @@
# Note: ingestion will create its own PostgreSQL connections per table internally
engine.define_and_ingest(
schema=schema,
output_config=conn_conf,
target_db_config=conn_conf,
patterns=patterns,
ingestion_params=IngestionParams(
clean_start=False
), # clean_start handled by define_and_ingest
clean_start=True, # Clean existing data before defining schema
ingestion_params=IngestionParams(clear_data=False),
recreate_schema=True, # Drop existing schema and define new one before ingesting
)

print("\n" + "=" * 80)
Expand Down
2 changes: 1 addition & 1 deletion graflo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
Example:
>>> from graflo.db.manager import ConnectionManager
>>> with ConnectionManager(config) as conn:
... conn.init_db(schema, clean_start=True)
... conn.init_db(schema, recreate_schema=True)
... conn.upsert_docs_batch(docs, "users")
"""

Expand Down
Loading