Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions docs/examples/example-5.md
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ Automatically generate a graflo Schema from your PostgreSQL database. This is th

from graflo.hq import GraphEngine
from graflo.onto import DBFlavor
from graflo.db.connection.onto import ArangoConfig, Neo4jConfig, TigergraphConfig, FalkordbConfig
from graflo.db.connection.onto import ArangoConfig, Neo4jConfig, TigergraphConfig, FalkordbConfig, PostgresConfig
from graflo.db import DBType

# Connect to target graph database to determine flavor
Expand All @@ -281,9 +281,11 @@ db_flavor = (
)

# Create GraphEngine and infer schema automatically
# Connection is automatically managed inside infer_schema()
postgres_conf = PostgresConfig.from_docker_env()
engine = GraphEngine(target_db_flavor=db_flavor)
schema = engine.infer_schema(
postgres_conn,
postgres_conf,
schema_name="public", # PostgreSQL schema name
)
```
Expand Down Expand Up @@ -405,17 +407,13 @@ import yaml
from graflo import Caster
from graflo.onto import DBFlavor
from graflo.db import DBType
from graflo.db.postgres import (
PostgresConnection,
)
from graflo.hq import GraphEngine
from graflo.db.connection.onto import ArangoConfig, PostgresConfig

logger = logging.getLogger(__name__)

# Step 1: Connect to PostgreSQL (source database)
postgres_conf = PostgresConfig.from_docker_env()
postgres_conn = PostgresConnection(postgres_conf)

# Step 2: Initialize database with mock schema if needed
# (Implementation details omitted - see full example in examples/5-ingest-postgres/ingest.py)
Expand All @@ -427,6 +425,7 @@ from graflo.db.connection.onto import ArangoConfig, Neo4jConfig, TigergraphConfi
target_config = ArangoConfig.from_docker_env() # or Neo4jConfig, TigergraphConfig, FalkordbConfig

# Step 4: Infer Schema from PostgreSQL database structure
# Connection is automatically managed inside infer_schema()
db_type = target_config.connection_type
db_flavor = (
DBFlavor(db_type.value)
Expand All @@ -437,7 +436,7 @@ db_flavor = (
# Create GraphEngine and infer schema
engine = GraphEngine(target_db_flavor=db_flavor)
schema = engine.infer_schema(
postgres_conn,
postgres_conf,
schema_name="public",
)

Expand Down Expand Up @@ -715,8 +714,10 @@ After inference, you can modify the schema:

```python
# Create GraphEngine and infer schema
# Connection is automatically managed inside infer_schema()
postgres_conf = PostgresConfig.from_docker_env()
engine = GraphEngine()
schema = engine.infer_schema(postgres_conn, schema_name="public")
schema = engine.infer_schema(postgres_conf, schema_name="public")

# Modify schema as needed
# Add custom transforms, filters, or additional edges
Expand Down
5 changes: 2 additions & 3 deletions docs/getting_started/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,17 +108,16 @@ The `ingest()` method takes:
You can ingest data directly from PostgreSQL tables. First, infer the schema from your PostgreSQL database:

```python
from graflo.db.postgres import PostgresConnection
from graflo.hq import GraphEngine
from graflo.db.connection.onto import PostgresConfig

# Connect to PostgreSQL
pg_config = PostgresConfig.from_docker_env() # Or from_env(), or create directly
pg_conn = PostgresConnection(pg_config)

# Create GraphEngine and infer schema from PostgreSQL (automatically detects vertices and edges)
# Connection is automatically managed inside infer_schema()
engine = GraphEngine()
schema = engine.infer_schema(pg_conn, schema_name="public")
schema = engine.infer_schema(pg_config, schema_name="public")

# Create patterns from PostgreSQL tables
engine = GraphEngine()
Expand Down
3 changes: 0 additions & 3 deletions docs/reference/db/postgres/fuzzy_matcher.md

This file was deleted.

3 changes: 3 additions & 0 deletions docs/reference/hq/fuzzy_matcher.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# `graflo.hq.fuzzy_matcher`

::: graflo.hq.fuzzy_matcher
3 changes: 2 additions & 1 deletion examples/3-ingest-csv-edge-weights/ingest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from suthing import FileHandle
from graflo import Caster, Patterns, Schema
from graflo.db.connection.onto import Neo4jConfig
from graflo.hq.caster import IngestionParams
from graflo.db.connection.onto import Neo4jConfig

import logging


Expand Down
14 changes: 7 additions & 7 deletions examples/5-ingest-postgres/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@

from graflo.onto import DBFlavor
from graflo.db import DBType
from graflo.db.postgres import (
PostgresConnection,
)
from graflo.hq import GraphEngine, IngestionParams
from graflo.db.postgres.util import load_schema_from_sql_file
from graflo.db.connection.onto import PostgresConfig, TigergraphConfig
Expand All @@ -29,7 +26,7 @@
# Configure logging: INFO level for graflo module, WARNING for others
logging.basicConfig(level=logging.WARNING, handlers=[logging.StreamHandler()])
# Set graflo module to INFO level
logging.getLogger("graflo").setLevel(logging.INFO)
logging.getLogger("graflo").setLevel(logging.DEBUG)

# Step 1: Connect to PostgreSQL (source database)
# Load PostgreSQL config from docker/postgres/.env (recommended)
Expand Down Expand Up @@ -100,9 +97,12 @@
# This automatically detects vertex-like and edge-like tables based on:
# - Vertex tables: Have a primary key and descriptive columns
# - Edge tables: Have 2+ foreign keys (representing relationships)
# Connection is automatically closed when exiting the context
with PostgresConnection(postgres_conf) as postgres_conn:
schema = engine.infer_schema(postgres_conn, schema_name="public")
# Connection is automatically managed inside infer_schema()
# Optionally specify fuzzy_threshold (0.0 to 1.0) to control fuzzy matching sensitivity:
# - Higher values (e.g., 0.9) = stricter matching, fewer matches
# - Lower values (e.g., 0.7) = more lenient matching, more matches
# Default is 0.8
schema = engine.infer_schema(postgres_conf, schema_name="public", fuzzy_threshold=0.8)

schema.general.name = "accounting"
# Step 3.5: Dump inferred schema to YAML file
Expand Down
13 changes: 4 additions & 9 deletions examples/5-ingest-postgres/tigergraph/triples.gsql
Original file line number Diff line number Diff line change
@@ -1,22 +1,17 @@
CREATE OR REPLACE QUERY checkAll() FOR GRAPH public {
CREATE OR REPLACE QUERY triples() FOR GRAPH accounting {

TYPEDEF TUPLE <STRING u, STRING relation, STRING v> TRIPLE;
TYPEDEF TUPLE <VERTEX u, EDGE relation, VERTEX v> TRIPLE;
ListAccum<TRIPLE> @@triple_list;

/* 2. Start with users */
Seed = {users.*};

/* 3. Capture follows (User -> User) */
S1 = SELECT t
FROM Seed:s -(follows:e)-> users:t
ACCUM @@triple_list += TRIPLE(s.name, "follows", t.name);
ACCUM @@triple_list += TRIPLE(s, e, t);

/* 4. Capture purchases (User -> Product) */
S2 = SELECT t
FROM Seed:s -(purchases:e)-> products:t
ACCUM @@triple_list += TRIPLE(s.name, "purchases", t.name);
ACCUM @@triple_list += TRIPLE(s, e, t);

/* 5. Print only the list */
PRINT
PRINT @@triple_list;
}
16 changes: 9 additions & 7 deletions graflo/db/postgres/conn.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@
)
from graflo.db.connection.onto import PostgresConfig

from graflo.hq.fuzzy_matcher import FuzzyMatcher

from .inference_utils import (
FuzzyMatchCache,
infer_edge_vertices_from_table_name,
infer_vertex_from_column_name,
)
Expand Down Expand Up @@ -747,8 +748,9 @@ def detect_edge_tables(
vertex_tables = self.detect_vertex_tables(schema_name)
vertex_table_names = [vt.name for vt in vertex_tables]

# Create fuzzy match cache once for all tables (significant performance improvement)
match_cache = FuzzyMatchCache(vertex_table_names)
# Create fuzzy matcher once for all tables (significant performance improvement)
# Caching is enabled by default for better performance
matcher = FuzzyMatcher(vertex_table_names, threshold=0.6, enable_cache=True)

tables = self.get_tables(schema_name)
edge_tables = []
Expand Down Expand Up @@ -819,7 +821,7 @@ def detect_edge_tables(
for fk in fk_infos
]
_, _, relation_name = infer_edge_vertices_from_table_name(
table_name, pk_columns, fk_dicts, vertex_table_names, match_cache
table_name, pk_columns, fk_dicts, vertex_table_names, matcher
)
# If we have 2 or more primary keys, try to infer from table name and structure
elif len(pk_columns) >= 2:
Expand All @@ -839,7 +841,7 @@ def detect_edge_tables(
pk_columns,
fk_dicts,
vertex_table_names,
match_cache,
matcher,
)
)

Expand Down Expand Up @@ -882,10 +884,10 @@ def detect_edge_tables(
)
# Use robust inference logic to extract vertex names from column names
source_table = infer_vertex_from_column_name(
source_column, vertex_table_names, match_cache
source_column, vertex_table_names, matcher
)
target_table = infer_vertex_from_column_name(
target_column, vertex_table_names, match_cache
target_column, vertex_table_names, matcher
)

# Only add if we have source and target information
Expand Down
Loading