From a03905dd6d649df9a78311a54558648705d1b94d Mon Sep 17 00:00:00 2001 From: RepoRover Date: Fri, 2 Jan 2026 18:22:12 +0200 Subject: [PATCH] refactor(spec): refactored all the schema specifications to reflect the current state of what application is built around --- README.md | 94 ++- database_schema_spec/cli/generator.py | 112 +-- database_schema_spec/core/config.py | 9 +- database_schema_spec/io/output_manager.py | 169 ++++- database_schema_spec/resolution/resolver.py | 183 ++++- docs/README.md | 120 ---- docs/_examples_/configs/postgresql.yaml | 66 -- .../_examples_/configs/postgresql/config.yaml | 104 +++ docs/_examples_/manifest.yaml | 53 -- .../manifests/postgresql/manifest.yml | 89 +++ .../postgresql/v15.0/schema_1-stored.yaml | 63 ++ .../schemas/postgresql/v15.0/schema_1.yaml | 135 ++-- .../postgresql/v15.0/schema_2-stored.yaml | 295 ++++++++ .../schemas/postgresql/v15.0/schema_2.yaml | 576 +++++++-------- .../postgresql/v15.0/schema_3-stored.yaml | 5 + .../schemas/postgresql/v15.0/schema_3.yaml | 360 +--------- .../postgresql/v15.0/components/column.json | 667 +++++++++--------- .../v15.0/components/constraint.json | 6 +- .../postgresql/v15.0/components/table.json | 1 + .../postgresql/v15.0/snapshot/stored.json | 22 + .../postgresql/v15.0/snapshot/working.json | 55 ++ .../engines/postgresql/v15.0/spec.json | 28 - .../engines/postgresql/v15.0/tables.json | 9 + docs/schemas/project/config/base.json | 125 ++-- .../project/config/engines/postgresql.json | 227 +----- docs/schemas/project/manifest.json | 133 ++-- tests/conftest.py | 157 ++++- tests/test_config.py | 55 +- tests/test_integration_production.py | 143 +++- tests/test_output_manager.py | 396 ++++++++--- tests/test_resolver.py | 380 +++++++++- 31 files changed, 2901 insertions(+), 1936 deletions(-) delete mode 100644 docs/README.md delete mode 100644 docs/_examples_/configs/postgresql.yaml create mode 100644 docs/_examples_/configs/postgresql/config.yaml delete mode 100644 docs/_examples_/manifest.yaml create mode 100644 docs/_examples_/manifests/postgresql/manifest.yml create mode 100644 docs/_examples_/schemas/postgresql/v15.0/schema_1-stored.yaml create mode 100644 docs/_examples_/schemas/postgresql/v15.0/schema_2-stored.yaml create mode 100644 docs/_examples_/schemas/postgresql/v15.0/schema_3-stored.yaml create mode 100644 docs/schemas/engines/postgresql/v15.0/snapshot/stored.json create mode 100644 docs/schemas/engines/postgresql/v15.0/snapshot/working.json delete mode 100644 docs/schemas/engines/postgresql/v15.0/spec.json create mode 100644 docs/schemas/engines/postgresql/v15.0/tables.json diff --git a/README.md b/README.md index 03d68da..44ef6de 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,36 @@ A Python package for generating unified JSON documentation files for database schemas by resolving JSON Schema references and handling oneOf variants. This tool processes modular database schema specifications and generates consolidated documentation for different database engines and versions. +## User Project Structure + +The generated schemas are designed to validate user projects with this structure: + +``` +my-project/ +├── .bfloo/ # Hidden config directory (like .git) +│ ├── config.yml # All schemas configuration +│ ├── orders/ # Schema: "orders" +│ │ ├── manifest.yml # Snapshot registry +│ │ └── 2024-01-15_v1.0.0.yml # Snapshot files +│ ├── users/ # Schema: "users" +│ │ └── manifest.yml +│ └── analytics/ # Schema: "analytics" +│ └── manifest.yml +├── schemas/ # Custom directory (via dir: "schemas") +│ ├── orders.yml # Working schema for "orders" +│ └── users.yml # Working schema for "users" +└── db-schemas/ + └── analytics.yml # Working schema at root (dir omitted) +``` + +**Key concepts:** + +- **Schema names are user-defined** - `orders`, `users`, `analytics`, etc. +- **Flat structure** - Each schema is a top-level entry (no nested hierarchy) +- **One manifest per schema** - Each schema has its own snapshot history in `.bfloo//` +- **Configurable working directory** - Use `dir` to specify where `.yml` is stored (default: `.db-schemas/`) +- **Per-schema API keys** - Each schema has its own API key for sync + ## 🚀 Quick Start ### Prerequisites @@ -76,24 +106,28 @@ database-schema-spec/ │ ├── project/ │ │ ├── manifest.json # Snapshot manifest schema │ │ └── config/ -│ │ ├── base.json # Common config schema +│ │ ├── base.json # Common config schema (with $defs) │ │ └── engines/ -│ │ └── postgresql.json # PostgreSQL connection config +│ │ └── postgresql.json # PostgreSQL-specific config (references base.json) │ └── engines/ │ └── postgresql/ -│ └── v15.0/ # Version-specific spec -│ ├── spec.json +│ └── v15.0/ # Version-specific schemas +│ ├── tables.json # Tables array schema (AI-focused) +│ ├── snapshot/ +│ │ ├── stored.json # Stored snapshot schema +│ │ └── working.json # Working snapshot schema │ └── components/ └── output/ # Generated output files ├── smap.json # Schema map (discovery file) ├── manifest.json # Manifest schema with $id ├── config/ - │ ├── base.json # Base config with $id - │ └── engines/ - │ └── postgresql.json # PostgreSQL config with $id + │ └── postgresql.json # Fully-resolved PostgreSQL config (self-contained) └── postgresql/ └── v15.0/ - └── spec.json # Fully resolved spec with $id + ├── tables.json # Tables array schema (AI-focused) + └── snapshot/ + ├── stored.json # Stored snapshot schema (CLI) + └── working.json # Working snapshot schema (CLI) ``` ## 🧪 Development @@ -168,33 +202,41 @@ output/ ├── smap.json # Schema map for discovery ├── manifest.json # Manifest schema ├── config/ -│ ├── base.json # Base config schema -│ └── engines/ -│ └── postgresql.json # PostgreSQL config schema +│ └── postgresql.json # Fully-resolved PostgreSQL config (self-contained) └── postgresql/ └── v15.0/ - └── spec.json # PostgreSQL 15.0 spec + ├── tables.json # Tables array schema (AI-focused) + └── snapshot/ + ├── stored.json # Stored snapshot schema (CLI) + └── working.json # Working snapshot schema (CLI) ``` +**Note:** Each engine config file (e.g., `postgresql.json`) is fully resolved with all `$ref` references inlined, making it completely self-contained. This eliminates the need for separate `base.json` and engine-specific files in the output. + ### Schema Map (smap.json) The schema map provides a structured index of all generated schemas: ```json { - "project": { - "manifest": "https://example.com/schemas/manifest.json", - "config": { - "base": "https://example.com/schemas/config/base.json", - "engines": { - "postgresql": "https://example.com/schemas/config/engines/postgresql.json" - } - } - }, - "engines": { - "postgresql": { - "v15.0": "https://example.com/schemas/postgresql/v15.0/spec.json" - } - } + "project": { + "manifest": "https://example.com/schemas/manifest.json", + "config": { + "postgresql": "https://example.com/schemas/config/postgresql.json" + } + }, + "engines": { + "postgresql": { + "v15.0": { + "tables": "https://example.com/schemas/postgresql/v15.0/tables.json", + "snapshot": { + "stored": "https://example.com/schemas/postgresql/v15.0/snapshot/stored.json", + "working": "https://example.com/schemas/postgresql/v15.0/snapshot/working.json" + } + } + } + } } ``` + +The `config` section maps engine names directly to their fully-resolved schema URLs, making it easy to fetch the appropriate config schema for any supported database engine. diff --git a/database_schema_spec/cli/generator.py b/database_schema_spec/cli/generator.py index 1fd6e61..0fa019f 100644 --- a/database_schema_spec/cli/generator.py +++ b/database_schema_spec/cli/generator.py @@ -94,34 +94,22 @@ def generate_all_variants(self) -> list[Path]: # Collect unique engine names for config generation engines: list[str] = list({v.engine for v in variants}) - # Generate schema for each variant + # Generate schemas for each variant generated_files: list[Path] = [] for variant in variants: - logger.info("Generating schema for %s %s", variant.engine, variant.version) - file_path = self.generate_variant(variant) - generated_files.append(file_path) + logger.info("Generating schemas for %s %s", variant.engine, variant.version) + file_paths = self.generate_variant(variant) + generated_files.extend(file_paths) # Generate project schemas logger.info("Generating project schemas...") - # Generate base config schema - base_config_path = self.output_manager.write_project_schema( - config.file_names.project_config_base_schema, - "config/base.json", - config.base_url, - ) - generated_files.append(base_config_path) - logger.info("Base config schema written to: %s", base_config_path) - - # Generate engine-specific config schemas + # Generate fully-resolved engine config schemas for engine in engines: - engine_lower = engine.lower() - source_path = config.file_names.project_config_engine_pattern.format( - engine=engine_lower - ) - output_path = f"config/engines/{engine_lower}.json" - engine_config_path = self.output_manager.write_project_schema( - source_path, output_path, config.base_url + engine_config_path = self.output_manager.write_resolved_engine_config( + engine, + config.file_names.project_config_base_schema, + config.base_url, ) generated_files.append(engine_config_path) logger.info( @@ -143,51 +131,69 @@ def generate_all_variants(self) -> list[Path]: return generated_files - def generate_variant(self, variant: DatabaseVariantSpec) -> Path: - """Generate unified schema for a specific database variant. + def generate_variant(self, variant: DatabaseVariantSpec) -> list[Path]: + """Generate unified schemas for a specific database variant. + + Generates three schema files per variant: + - tables.json: Tables array schema (for AI agents) + - snapshot/stored.json: Stored snapshot schema (for CLI) + - snapshot/working.json: Working snapshot schema (for CLI) Args: variant: Database variant to generate schema for Returns: - Path where the schema was written + List of paths where schemas were written """ - # Build path to engine-specific spec file - spec_path = config.file_names.engine_spec_pattern.format( - engine=variant.engine.lower(), - version=variant.version, - ) + generated_files: list[Path] = [] + + # Schema types to generate: (source_pattern_attr, output_type) + schema_types = [ + ("engine_tables_pattern", "tables"), + ("engine_snapshot_stored_pattern", "snapshot/stored"), + ("engine_snapshot_working_pattern", "snapshot/working"), + ] + + for pattern_attr, schema_type in schema_types: + # Build path to source schema file + pattern = getattr(config.file_names, pattern_attr) + source_path = pattern.format( + engine=variant.engine.lower(), + version=variant.version, + ) - # Create a variant-aware resolver and load the spec directly - variant_resolver = JSONRefResolver(self.docs_path, variant) - unified_schema = variant_resolver.resolve_file(spec_path) + # Create a variant-aware resolver and load the schema + variant_resolver = JSONRefResolver(self.docs_path, variant) + unified_schema = variant_resolver.resolve_file(source_path) - # Inject dynamic $id derived from BASE_URL for the final output - id_field = config.json_schema_fields.id_field - schema_field = config.json_schema_fields.schema_field - spec_url = self.output_manager._get_spec_url( - variant.engine, variant.version, config.base_url - ) + # Inject dynamic $id derived from BASE_URL for the final output + id_field = config.json_schema_fields.id_field + schema_field = config.json_schema_fields.schema_field + schema_url = self.output_manager._get_engine_schema_url( + variant.engine, variant.version, schema_type, config.base_url + ) - # Set/override $id - unified_schema[id_field] = spec_url + # Set/override $id + unified_schema[id_field] = schema_url - # Reorder top-level keys to ensure `$id` appears immediately after `$schema` - unified_schema = self._reorder_schema_keys( - unified_schema, id_field, schema_field - ) + # Reorder top-level keys to ensure `$id` appears immediately after `$schema` + unified_schema = self._reorder_schema_keys( + unified_schema, id_field, schema_field + ) - # Validate the resulting schema - validation_result = self.validator.validate_schema(unified_schema) - if not validation_result.is_valid: - raise ValidationError(validation_result.errors) + # Validate the resulting schema + validation_result = self.validator.validate_schema(unified_schema) + if not validation_result.is_valid: + raise ValidationError(validation_result.errors) - # Write the schema to output file - output_path = self.output_manager.write_schema( - unified_schema, variant.engine, variant.version - ) + # Write the schema to output file + output_path = self.output_manager.write_engine_schema( + unified_schema, variant.engine, variant.version, schema_type + ) + generated_files.append(output_path) + logger.info(" %s schema written to: %s", schema_type, output_path) - return output_path + return generated_files def _reorder_schema_keys( self, schema: dict, id_field: str, schema_field: str diff --git a/database_schema_spec/core/config.py b/database_schema_spec/core/config.py index 6f8b9da..eee8407 100644 --- a/database_schema_spec/core/config.py +++ b/database_schema_spec/core/config.py @@ -12,7 +12,14 @@ class FileNamesConfig(BaseModel): """Configuration for file names.""" database_registry_file: str = "schemas/_registry_.json" - engine_spec_pattern: str = "schemas/engines/{engine}/{version}/spec.json" + # Engine schema patterns (tables for AI, snapshot schemas for CLI) + engine_tables_pattern: str = "schemas/engines/{engine}/{version}/tables.json" + engine_snapshot_stored_pattern: str = ( + "schemas/engines/{engine}/{version}/snapshot/stored.json" + ) + engine_snapshot_working_pattern: str = ( + "schemas/engines/{engine}/{version}/snapshot/working.json" + ) project_config_base_schema: str = "schemas/project/config/base.json" project_config_engine_pattern: str = "schemas/project/config/engines/{engine}.json" project_manifest_schema: str = "schemas/project/manifest.json" diff --git a/database_schema_spec/io/output_manager.py b/database_schema_spec/io/output_manager.py index f2d67be..c0d9dd3 100644 --- a/database_schema_spec/io/output_manager.py +++ b/database_schema_spec/io/output_manager.py @@ -7,6 +7,7 @@ from typing import Any from database_schema_spec.core.config import config +from database_schema_spec.resolution.resolver import JSONRefResolver class OutputManager: @@ -43,13 +44,20 @@ def create_output_structure(self) -> None: f"Failed to create output directory {self.output_dir}: {e}" ) from e - def write_schema(self, schema: dict[str, Any], engine: str, version: str) -> Path: - """Write a resolved schema to the appropriate output file. + def write_engine_schema( + self, + schema: dict[str, Any], + engine: str, + version: str, + schema_type: str, + ) -> Path: + """Write a resolved engine schema to the appropriate output file. Args: schema: Fully resolved schema to write engine: Database engine name version: Database version + schema_type: Type of schema ('tables', 'snapshot/stored', 'snapshot/working') Returns: Path where the file was written @@ -57,7 +65,7 @@ def write_schema(self, schema: dict[str, Any], engine: str, version: str) -> Pat Raises: PermissionError: If unable to write file """ - output_path = self._get_output_path(engine, version) + output_path = self._get_engine_schema_path(engine, version, schema_type) try: # Create directory structure if it doesn't exist @@ -74,44 +82,52 @@ def write_schema(self, schema: dict[str, Any], engine: str, version: str) -> Pat f"Failed to write schema to {output_path}: {e}" ) from e - def _get_output_path(self, engine: str, version: str) -> Path: - """Get the output path for a specific engine/version combination. + def _get_engine_schema_path( + self, engine: str, version: str, schema_type: str + ) -> Path: + """Get the output path for a specific engine/version/type combination. Args: engine: Database engine name version: Database version + schema_type: Type of schema ('tables', 'snapshot/stored', 'snapshot/working') Returns: - Path where the spec should be written + Path where the schema should be written """ - return self.output_dir / engine.lower() / version / "spec.json" + return self.output_dir / engine.lower() / version / f"{schema_type}.json" - def _get_spec_url(self, engine: str, version: str, base_url: str = "") -> str: - """Get the URL for a specific engine/version spec file. + def _get_engine_schema_url( + self, engine: str, version: str, schema_type: str, base_url: str = "" + ) -> str: + """Get the URL for a specific engine/version/type schema file. Args: engine: Database engine name version: Database version + schema_type: Type of schema ('tables', 'snapshot/stored', 'snapshot/working') base_url: Base URL to prepend (optional) Returns: - URL pointing to the spec file + URL pointing to the schema file """ - relative_path = f"{engine.lower()}/{version}/spec.json" + relative_path = f"{engine.lower()}/{version}/{schema_type}.json" if base_url: return f"{base_url.rstrip('/')}/{relative_path}" return relative_path - def _generate_engine_map(self, base_url: str = "") -> dict[str, dict[str, str]]: + def _generate_engine_map( + self, base_url: str = "" + ) -> dict[str, dict[str, dict[str, Any]]]: """Generate a map of all available engines and versions. Args: - base_url: Base URL to prepend to spec URLs (optional) + base_url: Base URL to prepend to schema URLs (optional) Returns: - Dictionary mapping engines to versions to URLs + Dictionary mapping engines to versions to schema type URLs """ - engine_map: dict[str, dict[str, str]] = {} + engine_map: dict[str, dict[str, dict[str, Any]]] = {} if not self.output_dir.exists(): return engine_map @@ -127,18 +143,46 @@ def _generate_engine_map(self, base_url: str = "") -> dict[str, dict[str, str]]: and engine_dir.name not in reserved_dirs ): engine_name = engine_dir.name - versions: dict[str, str] = {} + versions: dict[str, dict[str, Any]] = {} # Iterate through all version directories for this engine for version_dir in engine_dir.iterdir(): if version_dir.is_dir(): - spec_file = version_dir / "spec.json" - if spec_file.exists(): + tables_file = version_dir / "tables.json" + if tables_file.exists(): version_name = version_dir.name - spec_url = self._get_spec_url( - engine_name, version_name, base_url - ) - versions[version_name] = spec_url + version_schemas: dict[str, Any] = { + "tables": self._get_engine_schema_url( + engine_name, version_name, "tables", base_url + ), + } + + # Check for snapshot schemas + snapshot_dir = version_dir / "snapshot" + if snapshot_dir.is_dir(): + snapshot_schemas: dict[str, str] = {} + if (snapshot_dir / "stored.json").exists(): + snapshot_schemas["stored"] = ( + self._get_engine_schema_url( + engine_name, + version_name, + "snapshot/stored", + base_url, + ) + ) + if (snapshot_dir / "working.json").exists(): + snapshot_schemas["working"] = ( + self._get_engine_schema_url( + engine_name, + version_name, + "snapshot/working", + base_url, + ) + ) + if snapshot_schemas: + version_schemas["snapshot"] = snapshot_schemas + + versions[version_name] = version_schemas # Only add engine if it has at least one version if versions: @@ -203,20 +247,82 @@ def write_project_schema( f"Failed to write project schema to {full_output_path}: {e}" ) from e + def write_resolved_engine_config( + self, engine: str, base_config_path: str, base_url: str = "" + ) -> Path: + """Write a fully-resolved engine config schema. + + Takes the base config schema and resolves all $ref references including + engine-specific references (e.g., engines/postgresql.json#/$defs/envs). + + Args: + engine: Engine name (e.g., "postgresql") + base_config_path: Relative path to base config schema (from docs_dir) + base_url: Base URL for $id injection + + Returns: + Path where the file was written + + Raises: + PermissionError: If unable to write file + FileNotFoundError: If source files don't exist + """ + engine_lower = engine.lower() + output_path = f"config/{engine_lower}.json" + full_output_path = self.output_dir / output_path + + try: + # Ensure output directory exists + full_output_path.parent.mkdir(parents=True, exist_ok=True) + + # Use the resolver to fully resolve all $ref references + resolver = JSONRefResolver(self.docs_dir) + resolved_schema = resolver.resolve_file(base_config_path) + + # Update title to be engine-specific + if "title" in resolved_schema: + resolved_schema["title"] = f"{engine} Project Configuration" + + # Inject $id + if base_url: + schema_url = f"{base_url.rstrip('/')}/{output_path}" + # Ensure $id comes after $schema + reordered: dict[str, Any] = {} + if "$schema" in resolved_schema: + reordered["$schema"] = resolved_schema["$schema"] + reordered["$id"] = schema_url + for k, v in resolved_schema.items(): + if k not in ("$schema", "$id"): + reordered[k] = v + resolved_schema = reordered + + # Write to output + with open(full_output_path, "w", encoding="utf-8") as f: + json.dump(resolved_schema, f, indent=2, ensure_ascii=False) + + return full_output_path + + except Exception as e: + raise PermissionError( + f"Failed to write resolved engine config to {full_output_path}: {e}" + ) from e + def write_schema_map(self, engines: list[str], base_url: str = "") -> Path: """Write the schema map to smap.json in the output root. The schema map contains: - project: - manifest: URL to manifest.json schema - - config: - - base: URL to config/base.json schema - - engines: Map of engine name -> config URL - - engines: Map of engine -> version -> spec URL + - config: Map of engine name -> config URL (fully-resolved per-engine configs) + - engines: Map of engine -> version -> schema URLs + - tables: URL to tables.json (AI-focused) + - snapshot: + - stored: URL to snapshot/stored.json (CLI) + - working: URL to snapshot/working.json (CLI) Args: engines: List of engine names to include in config mapping - base_url: Base URL to prepend to spec URLs (optional) + base_url: Base URL to prepend to schema URLs (optional) Returns: Path where the smap.json file was written @@ -226,11 +332,11 @@ def write_schema_map(self, engines: list[str], base_url: str = "") -> Path: """ base = base_url.rstrip("/") if base_url else "" - # Build engine config map + # Build engine config map (now directly under config, not config.engines) engine_configs: dict[str, str] = {} for engine in engines: engine_lower = engine.lower() - config_path = f"config/engines/{engine_lower}.json" + config_path = f"config/{engine_lower}.json" engine_configs[engine_lower] = ( f"{base}/{config_path}" if base else config_path ) @@ -238,10 +344,7 @@ def write_schema_map(self, engines: list[str], base_url: str = "") -> Path: schema_map: dict[str, Any] = { "project": { "manifest": f"{base}/manifest.json" if base else "manifest.json", - "config": { - "base": f"{base}/config/base.json" if base else "config/base.json", - "engines": engine_configs, - }, + "config": engine_configs, }, "engines": self._generate_engine_map(base_url), } diff --git a/database_schema_spec/resolution/resolver.py b/database_schema_spec/resolution/resolver.py index 62c1e44..c42bcd8 100644 --- a/database_schema_spec/resolution/resolver.py +++ b/database_schema_spec/resolution/resolver.py @@ -20,6 +20,11 @@ class JSONRefResolver: This resolver processes JSON Schema files and resolves all $ref references, inlining the referenced content to produce a self-contained schema. + + Supports: + - Local references: #/$defs/foo + - External file references: other.json + - External file with JSON pointer: other.json#/$defs/foo """ def __init__( @@ -36,6 +41,7 @@ def __init__( self.base_path = base_path self.current_variant = current_variant self.resolution_stack: list[str] = [] + self._file_cache: dict[str, dict[str, Any]] = {} def resolve_references( self, schema: dict[str, Any], current_file: str | None = None @@ -51,13 +57,49 @@ def resolve_references( def _resolve_ref( self, schema: dict[str, Any], current_file: str | None ) -> dict[str, Any]: + """Resolve a $ref reference. + + Handles: + - Local references: #/$defs/foo (within current file) + - External file references: other.json + - External file with JSON pointer: other.json#/$defs/foo + """ ref_path = schema[config.json_schema_fields.ref_field] - if self.detect_circular_reference(ref_path): - raise CircularReferenceError(self.resolution_stack + [ref_path]) - self.resolution_stack.append(ref_path) + + # Parse the reference into file path and JSON pointer + file_path, json_pointer = self._parse_ref(ref_path) + + # Determine the resolution key for circular reference detection + resolution_key = ref_path if file_path else f"{current_file or ''}:{ref_path}" + + if self.detect_circular_reference(resolution_key): + raise CircularReferenceError(self.resolution_stack + [resolution_key]) + + self.resolution_stack.append(resolution_key) try: - referenced_content = self.load_referenced_file(ref_path, current_file) - new_current_file = self._get_new_current_file(current_file, ref_path) + if file_path: + # External reference (with or without JSON pointer) + referenced_content = self.load_referenced_file(file_path, current_file) + new_current_file = self._get_new_current_file(current_file, file_path) + + if json_pointer: + # Extract the specific part using JSON pointer + referenced_content = self._resolve_json_pointer( + referenced_content, json_pointer, ref_path + ) + else: + # Local reference (JSON pointer only, starts with #) + # We need access to the root document + if not json_pointer: + raise ReferenceResolutionError( + ref_path, + ValueError("Invalid local reference: missing JSON pointer"), + ) + referenced_content = self._resolve_local_pointer( + json_pointer, current_file, ref_path + ) + new_current_file = current_file + resolved_content = self.resolve_references( referenced_content, new_current_file ) @@ -65,6 +107,133 @@ def _resolve_ref( finally: self.resolution_stack.pop() + def _parse_ref(self, ref_path: str) -> tuple[str | None, str | None]: + """Parse a $ref value into file path and JSON pointer components. + + Args: + ref_path: The $ref value (e.g., "other.json#/$defs/foo" or "#/$defs/foo") + + Returns: + Tuple of (file_path, json_pointer). Either can be None. + """ + if "#" in ref_path: + parts = ref_path.split("#", 1) + file_path = parts[0] if parts[0] else None + json_pointer = parts[1] if len(parts) > 1 and parts[1] else None + return file_path, json_pointer + return ref_path, None + + def _resolve_json_pointer( + self, document: dict[str, Any], pointer: str, original_ref: str + ) -> dict[str, Any]: + """Resolve a JSON pointer within a document. + + Args: + document: The JSON document to traverse + pointer: JSON pointer string (e.g., "/$defs/envs") + original_ref: Original reference for error messages + + Returns: + The referenced portion of the document + + Raises: + ReferenceResolutionError: If the pointer cannot be resolved + """ + if not pointer or pointer == "/": + return document + + # Remove leading slash and split into parts + parts = pointer.lstrip("/").split("/") + current = document + + for part in parts: + # Handle JSON pointer escaping (~0 = ~, ~1 = /) + part = part.replace("~1", "/").replace("~0", "~") + + if isinstance(current, dict): + if part not in current: + raise ReferenceResolutionError( + original_ref, + KeyError( + f"JSON pointer '{pointer}' not found: key '{part}' " + f"does not exist" + ), + ) + current = current[part] + elif isinstance(current, list): + try: + index = int(part) + current = current[index] + except (ValueError, IndexError) as e: + raise ReferenceResolutionError( + original_ref, + KeyError( + f"JSON pointer '{pointer}' not found: " + f"invalid array index '{part}'" + ), + ) from e + else: + raise ReferenceResolutionError( + original_ref, + TypeError( + f"JSON pointer '{pointer}' cannot traverse " + f"non-container type at '{part}'" + ), + ) + + if not isinstance(current, dict): + raise ReferenceResolutionError( + original_ref, + TypeError(f"JSON pointer '{pointer}' resolved to non-object type"), + ) + + return current + + def _resolve_local_pointer( + self, pointer: str, current_file: str | None, original_ref: str + ) -> dict[str, Any]: + """Resolve a local JSON pointer (starting with #) within the current file. + + Args: + pointer: JSON pointer string (e.g., "/$defs/envs") + current_file: Path to the current file being processed + original_ref: Original reference for error messages + + Returns: + The referenced portion of the document + + Raises: + ReferenceResolutionError: If the pointer cannot be resolved + """ + if not current_file: + raise ReferenceResolutionError( + original_ref, + ValueError( + "Cannot resolve local reference without current file context" + ), + ) + + # Load the current file's root document + root_document = self._load_file_cached(current_file) + return self._resolve_json_pointer(root_document, pointer, original_ref) + + def _load_file_cached(self, file_path: str) -> dict[str, Any]: + """Load a JSON file with caching. + + Args: + file_path: Relative path to the file from base_path + + Returns: + Parsed JSON content + """ + if file_path not in self._file_cache: + full_path = (self.base_path / file_path).resolve() + if not full_path.exists(): + raise FileNotFoundError(f"File not found: {full_path}") + with open(full_path, "r", encoding="utf-8") as f: + self._file_cache[file_path] = json.load(f) + return self._file_cache[file_path] + def _get_new_current_file(self, current_file: str | None, ref_path: str) -> str: if current_file: current_dir = (self.base_path / current_file).parent @@ -171,6 +340,7 @@ def resolve_file(self, file_path: str) -> dict[str, Any]: Raises: ReferenceResolutionError: If file cannot be loaded or resolved + CircularReferenceError: If circular references are detected """ try: # Load the file @@ -186,6 +356,9 @@ def resolve_file(self, file_path: str) -> dict[str, Any]: resolved_schema = self.resolve_references(schema, file_path) return resolved_schema + except CircularReferenceError: + # Re-raise circular reference errors directly + raise except Exception as e: logger.exception("Error resolving file '%s': %s", file_path, e) raise ReferenceResolutionError(file_path, e) from e diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 6032edc..0000000 --- a/docs/README.md +++ /dev/null @@ -1,120 +0,0 @@ -# Database Schema Specification - -**Standardized, modular JSON Schema specification** for database structure definition and validation. Designed for scalability, maintainability, and seamless integration with AI systems. - -## Architecture - -Our modular architecture prevents code duplication and enables effortless database version management: - -``` -schemas/ -├── _registry_.json # Engine/version registry -├── project/ -│ ├── manifest.json # Snapshot manifest schema -│ └── config/ -│ ├── base.json # Common project config schema -│ └── engines/ -│ └── postgresql.json # PostgreSQL connection config -└── engines/ - └── postgresql/ - └── v15.0/ # Version-specific isolation - ├── spec.json # Self-contained PostgreSQL 15.0 spec - └── components/ # Reusable schema components - ├── table.json # Table definitions - ├── column.json # Column types - └── constraint.json # Constraints -``` - -## Schema Types - -### Project Schemas - -| Schema | Purpose | -|--------|---------| -| `config/base.json` | Common config: schema_id, database.engine, api settings (including key) | -| `config/engines/postgresql.json` | PostgreSQL-specific connection parameters | -| `manifest.json` | Snapshot registry with version tracking | - -### Engine Specs - -Each engine/version combination has a self-contained `spec.json` that defines the complete schema for validating `schema.yaml` files. - -## Environment Variable Resolution - -The CLI supports environment variable references in configuration values using `${VAR_NAME}` syntax. This allows sensitive data like API keys and database passwords to be kept out of committed files. - -### Syntax - -```yaml -api: - key: "${BFLOO_API_KEY}" - -environments: - production: - password: "${PROD_DB_PASSWORD}" -``` - -### Resolution Order - -The CLI resolves environment variables from: - -1. **System environment** - Variables already set in the shell -2. **`.env` file** - Searched in these locations (first found wins): - - Same directory as the config file - - Project root (directory containing `.git`) - - Current working directory - -### CLI Override - -```bash -# Specify a custom .env file location -db sync push --env-file /path/to/.env.production -``` - -### Security Notes - -- Never commit `.env` files containing secrets (add to `.gitignore`) -- The `${VAR_NAME}` syntax is only resolved at CLI runtime -- Missing variables cause the CLI to fail with an error (no silent fallbacks) - -## Generated Output - -The generator produces resolved schemas with injected `$id` fields: - -``` -output/ -├── smap.json # Schema map (discovery file) -├── manifest.json # Manifest schema with $id -├── config/ -│ ├── base.json # Base config with $id -│ └── engines/ -│ └── postgresql.json # PostgreSQL config with $id -└── postgresql/ - └── v15.0/ - └── spec.json # Fully resolved spec with $id -``` - -### Schema Map (smap.json) - -```json -{ - "project": { - "manifest": "https://example.com/schemas/manifest.json", - "config": { - "base": "https://example.com/schemas/config/base.json", - "engines": { - "postgresql": "https://example.com/schemas/config/engines/postgresql.json" - } - } - }, - "engines": { - "postgresql": { - "v15.0": "https://example.com/schemas/postgresql/v15.0/spec.json" - } - } -} -``` - -## FSD - -- **FSD**: [Full Specification Document](https://www.notion.so/Database-Engines-Support-237bed96279c80ee85c1e69cf2abc42f) - Comprehensive guide to the database schema specification. diff --git a/docs/_examples_/configs/postgresql.yaml b/docs/_examples_/configs/postgresql.yaml deleted file mode 100644 index 9d62038..0000000 --- a/docs/_examples_/configs/postgresql.yaml +++ /dev/null @@ -1,66 +0,0 @@ -# Example config.yaml for PostgreSQL -# This file contains project configuration and database connection settings. -# Place this file at the root of your database/ directory. - -# Unique identifier linking this project to the web application -# Generated automatically on first `db sync push` if not present -schema_id: "550e8400-e29b-41d4-a716-446655440000" - -# Database engine configuration -# Version is tracked per-snapshot in manifest.yaml, not here -database: - engine: "PostgreSQL" - -# API configuration for web app synchronization -api: - base_url: "https://api.bfloo.app" - key: "${BFLOO_API_KEY}" # Resolved from environment or .env file - -# PostgreSQL connection settings per environment -# Uses official libpq parameter names -# See: https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS -environments: - # Production environment - use strict SSL and read-write primary - production: - host: "${PROD_DB_HOST}" - port: 5432 - dbname: "${PROD_DB_NAME}" - user: "${PROD_DB_USER}" - password: "${PROD_DB_PASSWORD}" - sslmode: "verify-full" - sslrootcert: "/etc/ssl/certs/ca-certificates.crt" - connect_timeout: 10 - application_name: "myapp-prod" - target_session_attrs: "read-write" - - # Staging environment - require SSL but less strict verification - staging: - host: "${STAGING_DB_HOST}" - port: 5432 - dbname: "${STAGING_DB_NAME}" - user: "${STAGING_DB_USER}" - password: "${STAGING_DB_PASSWORD}" - sslmode: "require" - connect_timeout: 10 - application_name: "myapp-staging" - - # Development environment - local database, relaxed settings - development: - host: "localhost" - port: 5432 - dbname: "myapp_dev" - user: "postgres" - password: "${DEV_DB_PASSWORD}" - sslmode: "prefer" - connect_timeout: 5 - application_name: "myapp-dev" - - # CI/Test environment - ephemeral database for testing - test: - host: "localhost" - port: 5432 - dbname: "myapp_test" - user: "postgres" - password: "test_password" - sslmode: "disable" - connect_timeout: 5 diff --git a/docs/_examples_/configs/postgresql/config.yaml b/docs/_examples_/configs/postgresql/config.yaml new file mode 100644 index 0000000..002b9ed --- /dev/null +++ b/docs/_examples_/configs/postgresql/config.yaml @@ -0,0 +1,104 @@ +# Example config.yaml for PostgreSQL +# This file contains project configuration and database connection settings. +# Place this file at the root of your .bfloo/ directory. + +schemas: + # Example schema: main application database + myapp: + # Directory where the schema YAML file is stored (relative to project root) + # Defaults to 'db-schemas' if not specified, can be './' or '.' for project root + dir: 'db-schemas' + + # API key for web app synchronization + # Resolved from the schema-level env-file (defaults to .env) + key: '${BFLOO_API_KEY}' + + # Database engine configuration + engine: 'PostgreSQL' + + # Default env file for this schema (used for API key and as fallback for envs) + # Each environment can override this with its own env-file + env-file: '.env' + + # PostgreSQL connection settings per environment + # Uses official libpq parameter names + # See: https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS + envs: + # Production environment - use strict SSL + production: + # Environment-specific env file for production secrets + env-file: '.env.production' + host: '${PROD_DB_HOST}' + port: 5432 + dbname: '${PROD_DB_NAME}' + target-schema: 'public' + user: '${PROD_DB_USER}' + password: '${PROD_DB_PASSWORD}' + sslmode: 'verify-full' + connect_timeout: 10 + + # Staging environment - require SSL but less strict verification + staging: + # Environment-specific env file for staging secrets + env-file: '.env.staging' + host: '${STAGING_DB_HOST}' + port: 5432 + dbname: '${STAGING_DB_NAME}' + target-schema: 'public' + user: '${STAGING_DB_USER}' + password: '${STAGING_DB_PASSWORD}' + sslmode: 'require' + connect_timeout: 10 + + # Development environment - local database, relaxed settings + development: + # Uses schema-level env-file (.env) since env-file is not specified + host: 'localhost' + port: 5432 + dbname: 'myapp_dev' + target-schema: 'public' + user: 'postgres' + password: '${DEV_DB_PASSWORD}' + sslmode: 'prefer' + connect_timeout: 5 + + # CI/Test environment - ephemeral database for testing + test: + # No env-file needed - uses hardcoded test values + host: 'localhost' + port: 5432 + dbname: 'myapp_test' + target-schema: 'public' + user: 'postgres' + password: 'test_password' + sslmode: 'disable' + connect_timeout: 5 + + # Example schema: analytics database (uses project root for schema file) + analytics: + # Store schema file in project root instead of default 'db-schemas' + dir: './' + + key: '${ANALYTICS_API_KEY}' + engine: 'PostgreSQL' + env-file: '.env.analytics' + + envs: + production: + env-file: '.env.analytics.production' + host: '${ANALYTICS_DB_HOST}' + port: 5432 + dbname: 'analytics' + target-schema: 'reporting' + user: '${ANALYTICS_DB_USER}' + password: '${ANALYTICS_DB_PASSWORD}' + sslmode: 'verify-full' + + development: + host: 'localhost' + port: 5432 + dbname: 'analytics_dev' + target-schema: 'reporting' + user: 'postgres' + password: '${DEV_DB_PASSWORD}' + sslmode: 'disable' diff --git a/docs/_examples_/manifest.yaml b/docs/_examples_/manifest.yaml deleted file mode 100644 index a20deb5..0000000 --- a/docs/_examples_/manifest.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# Example manifest.yaml for snapshot tracking -# This file tracks all schema snapshots with their metadata. -# Located in the _history_/ directory of your database/ folder. -# Managed by the CLI - typically not edited manually. - -# Currently active snapshot label -current: "v2.0.0" - -# Ordered list of snapshots (oldest first) -# Forms a tree structure via parent_id relationships -snapshots: - # Initial schema release - - label: "v1.0.0" - id: "123e4567-e89b-12d3-a456-426614174000" - parent_id: null - database_version: "15.0" - created_at: "2024-01-15T10:30:00Z" - file: "2024-01-15_v1.0.0.yaml" - synced: true - - # Added products table - - label: "v1.1.0" - id: "234e5678-f90a-23e4-b567-537725285111" - parent_id: "123e4567-e89b-12d3-a456-426614174000" - database_version: "15.0" - created_at: "2024-02-20T14:15:00Z" - file: "2024-02-20_v1.1.0.yaml" - synced: true - - # Added orders and order_items tables - - label: "v2.0.0" - id: "345f6789-a01b-34f5-c678-648836396222" - parent_id: "234e5678-f90a-23e4-b567-537725285111" - database_version: "15.0" - created_at: "2024-03-10T09:45:00Z" - file: "2024-03-10_v2.0.0.yaml" - synced: false - - # Experimental branch from v1.0.0 (demonstrates branching) - - label: "v1.0.1-experiment" - id: "456a7890-b12c-45a6-d789-759947407333" - parent_id: "123e4567-e89b-12d3-a456-426614174000" - database_version: "15.0" - created_at: "2024-01-20T11:00:00Z" - file: "2024-01-20_v1.0.1-experiment.yaml" - synced: false - -# Snapshot tree visualization: -# -# v1.0.0 (PostgreSQL 15.0) -# ├── v1.1.0 (PostgreSQL 15.0) -# │ └── v2.0.0 (PostgreSQL 15.0) ← current -# └── v1.0.1-experiment (PostgreSQL 15.0) diff --git a/docs/_examples_/manifests/postgresql/manifest.yml b/docs/_examples_/manifests/postgresql/manifest.yml new file mode 100644 index 0000000..a44d428 --- /dev/null +++ b/docs/_examples_/manifests/postgresql/manifest.yml @@ -0,0 +1,89 @@ +# Example manifest.yml for snapshot tracking +# This file tracks all schema snapshots with their metadata. +# Located at .bfloo//manifest.yml +# Managed by the CLI - typically not edited manually. +# +# This example shows manifests for three different schemas: +# - users: User Management (schema_1) +# - ecommerce: E-commerce Platform (schema_2) +# - inventory: Inventory Management (schema_3) + +# ============================================================================ +# USERS SCHEMA MANIFEST (.bfloo/users/manifest.yml) +# ============================================================================ +# Simple schema with single table, showing basic snapshot progression + +snapshots: + # Initial users schema (synced, done) + "123e4567-e89b-12d3-a456-426614174000": + label: "v1.0.0" + parent-id: null + status: "done" + database-version: "v15.0" + created-at: "2024-01-15T10:30:00Z" + file: "2024-01-15_v1.0.0.yml" + content-hash: "sha256:a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2" + sync-state: "synced" + synced-at: "2024-01-15T10:35:00Z" + +# ============================================================================ +# E-COMMERCE SCHEMA MANIFEST (.bfloo/ecommerce/manifest.yml) +# ============================================================================ +# Complex schema showing version progression and branching + +# snapshots: +# # Initial e-commerce schema with users and products (synced, done) +# "234e5678-f90a-23e4-b567-537725285111": +# label: "v1.0.0" +# parent-id: null +# status: "done" +# database-version: "v15.0" +# created-at: "2024-02-01T09:00:00Z" +# file: "2024-02-01_v1.0.0.yml" +# content-hash: "sha256:b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3" +# sync-state: "synced" +# synced-at: "2024-02-01T09:05:00Z" +# +# # Full e-commerce with orders (current, draft) +# "345f6789-a01b-34f5-c678-648836396222": +# label: "v2.0.0" +# parent-id: "234e5678-f90a-23e4-b567-537725285111" +# status: "draft" +# database-version: "v15.0" +# created-at: "2024-02-20T14:15:00Z" +# file: "current" +# content-hash: "sha256:c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4" +# sync-state: "local-only" +# synced-at: null + +# ============================================================================ +# INVENTORY SCHEMA MANIFEST (.bfloo/inventory/manifest.yml) +# ============================================================================ +# Multi-table schema with foreign key relationships + +# snapshots: +# # Initial inventory system (synced, done) +# "456f7890-b12c-45f6-d789-759947407333": +# label: "v1.0.0" +# parent-id: null +# status: "done" +# database-version: "v15.0" +# created-at: "2024-03-10T09:45:00Z" +# file: "2024-03-10_v1.0.0.yml" +# content-hash: "sha256:d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5" +# sync-state: "synced" +# synced-at: "2024-03-10T09:50:00Z" + +# ============================================================================ +# SNAPSHOT TREE VISUALIZATION +# ============================================================================ +# +# users: +# v1.0.0 (done, synced) ← current +# +# ecommerce: +# v1.0.0 (done, synced) +# └── v2.0.0 (draft, local-only) ← current +# +# inventory: +# v1.0.0 (done, synced) ← current diff --git a/docs/_examples_/schemas/postgresql/v15.0/schema_1-stored.yaml b/docs/_examples_/schemas/postgresql/v15.0/schema_1-stored.yaml new file mode 100644 index 0000000..d7318bd --- /dev/null +++ b/docs/_examples_/schemas/postgresql/v15.0/schema_1-stored.yaml @@ -0,0 +1,63 @@ +# Example 1: User Management Schema (Stored Snapshot) +# File location: .bfloo/users/2024-01-15_v1.0.0.yml +# Stored snapshots contain only hashable content. +# This example omits optional description to show minimal valid structure. + +tables: + - id: 1 + name: users + columns: + - id: 1 + name: id + type: serial + constraints: + nullable: false + - id: 2 + name: username + type: text + constraints: + nullable: false + min_length: + name: "chk_username_min_len" + value: 3 + max_length: + name: "chk_username_max_len" + value: 50 + - id: 3 + name: email + type: text + constraints: + nullable: false + max_length: + name: "chk_email_max_len" + value: 255 + - id: 4 + name: password_hash + type: text + constraints: + nullable: false + - id: 5 + name: is_active + type: boolean + default: true + constraints: + nullable: false + - id: 6 + name: created_at + type: timestamp + default: "current_timestamp" + constraints: + nullable: false + constraints: + - id: 1 + name: "pk_users" + type: primary_key + columns: ["id"] + - id: 2 + name: "uq_users_username" + type: unique + columns: ["username"] + - id: 3 + name: "uq_users_email" + type: unique + columns: ["email"] diff --git a/docs/_examples_/schemas/postgresql/v15.0/schema_1.yaml b/docs/_examples_/schemas/postgresql/v15.0/schema_1.yaml index d8c48c0..72f11ae 100644 --- a/docs/_examples_/schemas/postgresql/v15.0/schema_1.yaml +++ b/docs/_examples_/schemas/postgresql/v15.0/schema_1.yaml @@ -1,71 +1,68 @@ -# Example 1: Simple User Management Schema -# A minimal schema demonstrating basic table structure with users only. +# Example 1: User Management Schema (Working Snapshot) +# File location: db-schemas/users.yml +# A simple schema with user accounts table. +# This example omits optional descriptions to show minimal valid structure. -name: "User Management" -description: "Simple user authentication and profile management" +schema: + name: "User Management" -tables: - - id: 1 - name: users - description: "User accounts and authentication data" - columns: - - id: 1 - name: id - type: serial - description: "Primary key" - constraints: - nullable: false - - id: 2 - name: username - type: text - description: "Unique username for login" - constraints: - nullable: false - min_length: - name: "chk_username_min_len" - value: 3 - max_length: - name: "chk_username_max_len" - value: 50 - - id: 3 - name: email - type: text - description: "User email address" - constraints: - nullable: false - max_length: - name: "chk_email_max_len" - value: 255 - - id: 4 - name: password_hash - type: text - description: "Hashed password" - constraints: - nullable: false - - id: 5 - name: is_active - type: boolean - description: "Whether the account is active" - default: true - constraints: - nullable: false - - id: 6 - name: created_at - type: timestamp - description: "Account creation timestamp" - default: "current_timestamp" - constraints: - nullable: false - constraints: - - id: 1 - name: "pk_users" - type: primary_key - columns: ["id"] - - id: 2 - name: "uq_users_username" - type: unique - columns: ["username"] - - id: 3 - name: "uq_users_email" - type: unique - columns: ["email"] +snapshot: + label: "v1.0.0" + tables: + - id: 1 + name: users + columns: + - id: 1 + name: id + type: serial + constraints: + nullable: false + - id: 2 + name: username + type: text + constraints: + nullable: false + min_length: + name: "chk_username_min_len" + value: 3 + max_length: + name: "chk_username_max_len" + value: 50 + - id: 3 + name: email + type: text + constraints: + nullable: false + max_length: + name: "chk_email_max_len" + value: 255 + - id: 4 + name: password_hash + type: text + constraints: + nullable: false + - id: 5 + name: is_active + type: boolean + default: true + constraints: + nullable: false + - id: 6 + name: created_at + type: timestamp + default: "current_timestamp" + constraints: + nullable: false + constraints: + - id: 1 + name: "pk_users" + type: primary_key + columns: ["id"] + - id: 2 + name: "uq_users_username" + type: unique + columns: ["username"] + - id: 3 + name: "uq_users_email" + type: unique + columns: ["email"] diff --git a/docs/_examples_/schemas/postgresql/v15.0/schema_2-stored.yaml b/docs/_examples_/schemas/postgresql/v15.0/schema_2-stored.yaml new file mode 100644 index 0000000..f24f5b0 --- /dev/null +++ b/docs/_examples_/schemas/postgresql/v15.0/schema_2-stored.yaml @@ -0,0 +1,295 @@ +# Example 2: E-commerce Platform Schema (Stored Snapshot) +# File location: .bfloo/ecommerce/2024-02-20_v2.0.0.yml +# Stored snapshots contain only hashable content: description + tables + +description: "Full e-commerce schema with categories, products, orders" +tables: + - id: 1 + name: users + description: "Customer accounts" + columns: + - id: 1 + name: id + type: serial + constraints: + nullable: false + - id: 2 + name: email + type: text + description: "User email address" + constraints: + nullable: false + max_length: + name: "chk_email_max_len" + value: 255 + - id: 3 + name: full_name + type: text + description: "User's full name" + constraints: + nullable: false + max_length: + name: "chk_fullname_max_len" + value: 100 + - id: 4 + name: created_at + type: timestamp + default: "current_timestamp" + constraints: + nullable: false + constraints: + - id: 1 + name: "pk_users" + type: primary_key + columns: ["id"] + - id: 2 + name: "uq_users_email" + type: unique + columns: ["email"] + + - id: 2 + name: categories + description: "Product categories with hierarchical support" + columns: + - id: 1 + name: id + type: serial + constraints: + nullable: false + - id: 2 + name: name + type: text + description: "Category name" + constraints: + nullable: false + max_length: + name: "chk_category_name_max_len" + value: 100 + - id: 3 + name: parent_id + type: integer + description: "Parent category for hierarchy" + constraints: + nullable: true + constraints: + - id: 1 + name: "pk_categories" + type: primary_key + columns: ["id"] + - id: 2 + name: "fk_categories_parent" + type: foreign_key + description: "Self-referencing for category hierarchy" + columns: ["parent_id"] + references: + table: "categories" + columns: ["id"] + on_delete: "set_null" + on_update: "no_action" + + - id: 3 + name: products + description: "Product catalog" + columns: + - id: 1 + name: id + type: serial + constraints: + nullable: false + - id: 2 + name: sku + type: text + description: "Stock Keeping Unit" + constraints: + nullable: false + max_length: + name: "chk_sku_max_len" + value: 50 + - id: 3 + name: name + type: text + description: "Product name" + constraints: + nullable: false + max_length: + name: "chk_product_name_max_len" + value: 200 + - id: 4 + name: description + type: text + description: "Product description" + constraints: + nullable: true + - id: 5 + name: price_cents + type: integer + description: "Price in cents" + constraints: + nullable: false + min_value: + name: "chk_price_positive" + value: 0 + - id: 6 + name: category_id + type: integer + description: "Product category" + constraints: + nullable: true + - id: 7 + name: stock_quantity + type: integer + default: 0 + constraints: + nullable: false + min_value: + name: "chk_stock_non_negative" + value: 0 + - id: 8 + name: is_available + type: boolean + default: true + constraints: + nullable: false + - id: 9 + name: created_at + type: timestamp + default: "current_timestamp" + constraints: + nullable: false + constraints: + - id: 1 + name: "pk_products" + type: primary_key + columns: ["id"] + - id: 2 + name: "uq_products_sku" + type: unique + columns: ["sku"] + - id: 3 + name: "fk_products_category" + type: foreign_key + columns: ["category_id"] + references: + table: "categories" + columns: ["id"] + on_delete: "set_null" + on_update: "no_action" + + - id: 4 + name: orders + description: "Customer orders" + columns: + - id: 1 + name: id + type: serial + constraints: + nullable: false + - id: 2 + name: user_id + type: integer + description: "Customer who placed the order" + constraints: + nullable: false + - id: 3 + name: status + type: text + default: "pending" + constraints: + nullable: false + max_length: + name: "chk_status_max_len" + value: 20 + - id: 4 + name: total_cents + type: integer + description: "Order total in cents" + constraints: + nullable: false + min_value: + name: "chk_total_non_negative" + value: 0 + - id: 5 + name: created_at + type: timestamp + default: "current_timestamp" + constraints: + nullable: false + constraints: + - id: 1 + name: "pk_orders" + type: primary_key + columns: ["id"] + - id: 2 + name: "fk_orders_user" + type: foreign_key + columns: ["user_id"] + references: + table: "users" + columns: ["id"] + on_delete: "cascade" + on_update: "no_action" + + - id: 5 + name: order_items + description: "Individual items within orders" + columns: + - id: 1 + name: id + type: serial + constraints: + nullable: false + - id: 2 + name: order_id + type: integer + constraints: + nullable: false + - id: 3 + name: product_id + type: integer + constraints: + nullable: false + - id: 4 + name: quantity + type: integer + constraints: + nullable: false + min_value: + name: "chk_quantity_positive" + value: 1 + - id: 5 + name: unit_price_cents + type: integer + description: "Price at time of purchase" + constraints: + nullable: false + min_value: + name: "chk_unit_price_non_negative" + value: 0 + constraints: + - id: 1 + name: "pk_order_items" + type: primary_key + columns: ["id"] + - id: 2 + name: "fk_order_items_order" + type: foreign_key + columns: ["order_id"] + references: + table: "orders" + columns: ["id"] + on_delete: "cascade" + on_update: "no_action" + - id: 3 + name: "fk_order_items_product" + type: foreign_key + columns: ["product_id"] + references: + table: "products" + columns: ["id"] + on_delete: "restrict" + on_update: "no_action" + - id: 4 + name: "uq_order_product" + type: unique + description: "Each product appears once per order" + columns: ["order_id", "product_id"] diff --git a/docs/_examples_/schemas/postgresql/v15.0/schema_2.yaml b/docs/_examples_/schemas/postgresql/v15.0/schema_2.yaml index db325ad..568629d 100644 --- a/docs/_examples_/schemas/postgresql/v15.0/schema_2.yaml +++ b/docs/_examples_/schemas/postgresql/v15.0/schema_2.yaml @@ -1,298 +1,300 @@ -# Example 2: E-commerce Platform Schema +# Example 2: E-commerce Platform Schema (Working Snapshot) +# File location: db-schemas/ecommerce.yml # A comprehensive schema with users, products, orders, and order items. -# Demonstrates foreign keys, cascading deletes, and various constraints. +# This example includes descriptions to show full documentation capability. -name: "E-commerce Platform" -description: "Complete e-commerce database with users, products, and orders" +schema: + name: "E-commerce Platform" + description: "Complete e-commerce database with users, products, and orders" -tables: - - id: 1 - name: users - description: "Customer accounts" - columns: - - id: 1 - name: id - type: serial - description: "Primary key" - constraints: - nullable: false - - id: 2 - name: email - type: text - description: "User email address" - constraints: - nullable: false - max_length: - name: "chk_email_max_len" - value: 255 - - id: 3 - name: full_name - type: text - description: "User's full name" - constraints: - nullable: false - max_length: - name: "chk_fullname_max_len" - value: 100 - - id: 4 - name: created_at - type: timestamp - default: "current_timestamp" - constraints: - nullable: false - constraints: - - id: 1 - name: "pk_users" - type: primary_key - columns: ["id"] - - id: 2 - name: "uq_users_email" - type: unique - columns: ["email"] - - - id: 2 - name: categories - description: "Product categories with hierarchical support" - columns: - - id: 1 - name: id - type: serial - constraints: - nullable: false - - id: 2 - name: name - type: text - description: "Category name" - constraints: - nullable: false - max_length: - name: "chk_category_name_max_len" - value: 100 - - id: 3 - name: parent_id - type: integer - description: "Parent category for hierarchy" - constraints: - nullable: true - constraints: - - id: 1 - name: "pk_categories" - type: primary_key - columns: ["id"] - - id: 2 - name: "fk_categories_parent" - type: foreign_key - description: "Self-referencing for category hierarchy" - columns: ["parent_id"] - references: - table: "categories" +snapshot: + label: "v2.0.0" + description: "Full e-commerce schema with categories, products, orders" + tables: + - id: 1 + name: users + description: "Customer accounts" + columns: + - id: 1 + name: id + type: serial + constraints: + nullable: false + - id: 2 + name: email + type: text + description: "User email address" + constraints: + nullable: false + max_length: + name: "chk_email_max_len" + value: 255 + - id: 3 + name: full_name + type: text + description: "User's full name" + constraints: + nullable: false + max_length: + name: "chk_fullname_max_len" + value: 100 + - id: 4 + name: created_at + type: timestamp + default: "current_timestamp" + constraints: + nullable: false + constraints: + - id: 1 + name: "pk_users" + type: primary_key columns: ["id"] - on_delete: "set_null" - on_update: "no_action" + - id: 2 + name: "uq_users_email" + type: unique + columns: ["email"] - - id: 3 - name: products - description: "Product catalog" - columns: - - id: 1 - name: id - type: serial - constraints: - nullable: false - - id: 2 - name: sku - type: text - description: "Stock Keeping Unit" - constraints: - nullable: false - max_length: - name: "chk_sku_max_len" - value: 50 - - id: 3 - name: name - type: text - description: "Product name" - constraints: - nullable: false - max_length: - name: "chk_product_name_max_len" - value: 200 - - id: 4 - name: description - type: text - description: "Product description" - constraints: - nullable: true - - id: 5 - name: price_cents - type: integer - description: "Price in cents" - constraints: - nullable: false - min_value: - name: "chk_price_positive" - value: 0 - - id: 6 - name: category_id - type: integer - description: "Product category" - constraints: - nullable: true - - id: 7 - name: stock_quantity - type: integer - default: 0 - constraints: - nullable: false - min_value: - name: "chk_stock_non_negative" - value: 0 - - id: 8 - name: is_available - type: boolean - default: true - constraints: - nullable: false - - id: 9 - name: created_at - type: timestamp - default: "current_timestamp" - constraints: - nullable: false - constraints: - - id: 1 - name: "pk_products" - type: primary_key - columns: ["id"] - - id: 2 - name: "uq_products_sku" - type: unique - columns: ["sku"] - - id: 3 - name: "fk_products_category" - type: foreign_key - columns: ["category_id"] - references: - table: "categories" + - id: 2 + name: categories + description: "Product categories with hierarchical support" + columns: + - id: 1 + name: id + type: serial + constraints: + nullable: false + - id: 2 + name: name + type: text + description: "Category name" + constraints: + nullable: false + max_length: + name: "chk_category_name_max_len" + value: 100 + - id: 3 + name: parent_id + type: integer + description: "Parent category for hierarchy" + constraints: + nullable: true + constraints: + - id: 1 + name: "pk_categories" + type: primary_key columns: ["id"] - on_delete: "set_null" - on_update: "no_action" + - id: 2 + name: "fk_categories_parent" + type: foreign_key + description: "Self-referencing for category hierarchy" + columns: ["parent_id"] + references: + table: "categories" + columns: ["id"] + on_delete: "set_null" + on_update: "no_action" - - id: 4 - name: orders - description: "Customer orders" - columns: - - id: 1 - name: id - type: serial - constraints: - nullable: false - - id: 2 - name: user_id - type: integer - description: "Customer who placed the order" - constraints: - nullable: false - - id: 3 - name: status - type: text - default: "pending" - constraints: - nullable: false - max_length: - name: "chk_status_max_len" - value: 20 - - id: 4 - name: total_cents - type: integer - description: "Order total in cents" - constraints: - nullable: false - min_value: - name: "chk_total_non_negative" - value: 0 - - id: 5 - name: created_at - type: timestamp - default: "current_timestamp" - constraints: - nullable: false - constraints: - - id: 1 - name: "pk_orders" - type: primary_key - columns: ["id"] - - id: 2 - name: "fk_orders_user" - type: foreign_key - columns: ["user_id"] - references: - table: "users" + - id: 3 + name: products + description: "Product catalog" + columns: + - id: 1 + name: id + type: serial + constraints: + nullable: false + - id: 2 + name: sku + type: text + description: "Stock Keeping Unit" + constraints: + nullable: false + max_length: + name: "chk_sku_max_len" + value: 50 + - id: 3 + name: name + type: text + description: "Product name" + constraints: + nullable: false + max_length: + name: "chk_product_name_max_len" + value: 200 + - id: 4 + name: description + type: text + constraints: + nullable: true + - id: 5 + name: price_cents + type: integer + description: "Price in cents" + constraints: + nullable: false + min_value: + name: "chk_price_positive" + value: 0 + - id: 6 + name: category_id + type: integer + constraints: + nullable: true + - id: 7 + name: stock_quantity + type: integer + default: 0 + constraints: + nullable: false + min_value: + name: "chk_stock_non_negative" + value: 0 + - id: 8 + name: is_available + type: boolean + default: true + constraints: + nullable: false + - id: 9 + name: created_at + type: timestamp + default: "current_timestamp" + constraints: + nullable: false + constraints: + - id: 1 + name: "pk_products" + type: primary_key columns: ["id"] - on_delete: "cascade" - on_update: "no_action" + - id: 2 + name: "uq_products_sku" + type: unique + columns: ["sku"] + - id: 3 + name: "fk_products_category" + type: foreign_key + columns: ["category_id"] + references: + table: "categories" + columns: ["id"] + on_delete: "set_null" + on_update: "no_action" - - id: 5 - name: order_items - description: "Individual items within orders" - columns: - - id: 1 - name: id - type: serial - constraints: - nullable: false - - id: 2 - name: order_id - type: integer - constraints: - nullable: false - - id: 3 - name: product_id - type: integer - constraints: - nullable: false - - id: 4 - name: quantity - type: integer - constraints: - nullable: false - min_value: - name: "chk_quantity_positive" - value: 1 - - id: 5 - name: unit_price_cents - type: integer - description: "Price at time of purchase" - constraints: - nullable: false - min_value: - name: "chk_unit_price_non_negative" - value: 0 - constraints: - - id: 1 - name: "pk_order_items" - type: primary_key - columns: ["id"] - - id: 2 - name: "fk_order_items_order" - type: foreign_key - columns: ["order_id"] - references: - table: "orders" + - id: 4 + name: orders + description: "Customer orders" + columns: + - id: 1 + name: id + type: serial + constraints: + nullable: false + - id: 2 + name: user_id + type: integer + description: "Customer who placed the order" + constraints: + nullable: false + - id: 3 + name: status + type: text + default: "pending" + constraints: + nullable: false + max_length: + name: "chk_status_max_len" + value: 20 + - id: 4 + name: total_cents + type: integer + description: "Order total in cents" + constraints: + nullable: false + min_value: + name: "chk_total_non_negative" + value: 0 + - id: 5 + name: created_at + type: timestamp + default: "current_timestamp" + constraints: + nullable: false + constraints: + - id: 1 + name: "pk_orders" + type: primary_key columns: ["id"] - on_delete: "cascade" - on_update: "no_action" - - id: 3 - name: "fk_order_items_product" - type: foreign_key - columns: ["product_id"] - references: - table: "products" + - id: 2 + name: "fk_orders_user" + type: foreign_key + columns: ["user_id"] + references: + table: "users" + columns: ["id"] + on_delete: "cascade" + on_update: "no_action" + + - id: 5 + name: order_items + description: "Individual items within orders" + columns: + - id: 1 + name: id + type: serial + constraints: + nullable: false + - id: 2 + name: order_id + type: integer + constraints: + nullable: false + - id: 3 + name: product_id + type: integer + constraints: + nullable: false + - id: 4 + name: quantity + type: integer + constraints: + nullable: false + min_value: + name: "chk_quantity_positive" + value: 1 + - id: 5 + name: unit_price_cents + type: integer + description: "Price at time of purchase" + constraints: + nullable: false + min_value: + name: "chk_unit_price_non_negative" + value: 0 + constraints: + - id: 1 + name: "pk_order_items" + type: primary_key columns: ["id"] - on_delete: "restrict" - on_update: "no_action" - - id: 4 - name: "uq_order_product" - type: unique - description: "Each product appears once per order" - columns: ["order_id", "product_id"] + - id: 2 + name: "fk_order_items_order" + type: foreign_key + columns: ["order_id"] + references: + table: "orders" + columns: ["id"] + on_delete: "cascade" + on_update: "no_action" + - id: 3 + name: "fk_order_items_product" + type: foreign_key + columns: ["product_id"] + references: + table: "products" + columns: ["id"] + on_delete: "restrict" + on_update: "no_action" + - id: 4 + name: "uq_order_product" + type: unique + description: "Each product appears once per order" + columns: ["order_id", "product_id"] diff --git a/docs/_examples_/schemas/postgresql/v15.0/schema_3-stored.yaml b/docs/_examples_/schemas/postgresql/v15.0/schema_3-stored.yaml new file mode 100644 index 0000000..f685b45 --- /dev/null +++ b/docs/_examples_/schemas/postgresql/v15.0/schema_3-stored.yaml @@ -0,0 +1,5 @@ +# Example 3: New Schema (Stored Snapshot) - Empty Tables +# File location: .bfloo/new_project/2024-03-10_v0.0.1.yml +# Shows a stored snapshot with no tables yet. + +tables: [] diff --git a/docs/_examples_/schemas/postgresql/v15.0/schema_3.yaml b/docs/_examples_/schemas/postgresql/v15.0/schema_3.yaml index b8f1baf..862b0bc 100644 --- a/docs/_examples_/schemas/postgresql/v15.0/schema_3.yaml +++ b/docs/_examples_/schemas/postgresql/v15.0/schema_3.yaml @@ -1,353 +1,11 @@ -# Example 3: Inventory & Warehouse Management Schema -# A complex schema for inventory tracking across multiple warehouses. -# Demonstrates advanced constraints and multiple foreign key relationships. +# Example 3: New Schema (Working Snapshot) - Empty Tables +# File location: db-schemas/new_project.yml +# Shows a schema that was just initialized with no tables yet. +# Tables can be added incrementally as the schema evolves. -name: "Inventory Management System" -description: "Multi-warehouse inventory tracking with suppliers and products" +schema: + name: "New Project" -tables: - - id: 1 - name: suppliers - description: "Product suppliers and vendors" - columns: - - id: 1 - name: id - type: serial - constraints: - nullable: false - - id: 2 - name: code - type: text - description: "Unique supplier code" - constraints: - nullable: false - min_length: - name: "chk_supplier_code_min_len" - value: 3 - max_length: - name: "chk_supplier_code_max_len" - value: 20 - - id: 3 - name: name - type: text - description: "Company name" - constraints: - nullable: false - max_length: - name: "chk_supplier_name_max_len" - value: 200 - - id: 4 - name: contact_email - type: text - constraints: - nullable: true - max_length: - name: "chk_contact_email_max_len" - value: 255 - - id: 5 - name: is_active - type: boolean - default: true - constraints: - nullable: false - - id: 6 - name: created_at - type: timestamp - default: "current_timestamp" - constraints: - nullable: false - constraints: - - id: 1 - name: "pk_suppliers" - type: primary_key - columns: ["id"] - - id: 2 - name: "uq_suppliers_code" - type: unique - columns: ["code"] - - - id: 2 - name: warehouses - description: "Physical warehouse locations" - columns: - - id: 1 - name: id - type: serial - constraints: - nullable: false - - id: 2 - name: code - type: text - description: "Unique warehouse code" - constraints: - nullable: false - min_length: - name: "chk_warehouse_code_min_len" - value: 2 - max_length: - name: "chk_warehouse_code_max_len" - value: 10 - - id: 3 - name: name - type: text - description: "Warehouse name" - constraints: - nullable: false - max_length: - name: "chk_warehouse_name_max_len" - value: 100 - - id: 4 - name: address - type: text - constraints: - nullable: true - - id: 5 - name: capacity_units - type: integer - description: "Maximum storage capacity in units" - constraints: - nullable: false - min_value: - name: "chk_capacity_positive" - value: 1 - - id: 6 - name: is_active - type: boolean - default: true - constraints: - nullable: false - constraints: - - id: 1 - name: "pk_warehouses" - type: primary_key - columns: ["id"] - - id: 2 - name: "uq_warehouses_code" - type: unique - columns: ["code"] - - - id: 3 - name: products - description: "Product master data" - columns: - - id: 1 - name: id - type: serial - constraints: - nullable: false - - id: 2 - name: sku - type: text - description: "Stock Keeping Unit" - constraints: - nullable: false - min_length: - name: "chk_sku_min_len" - value: 3 - max_length: - name: "chk_sku_max_len" - value: 50 - - id: 3 - name: name - type: text - constraints: - nullable: false - max_length: - name: "chk_product_name_max_len" - value: 200 - - id: 4 - name: supplier_id - type: integer - description: "Primary supplier" - constraints: - nullable: false - - id: 5 - name: unit_cost_cents - type: integer - description: "Cost per unit in cents" - constraints: - nullable: false - min_value: - name: "chk_unit_cost_non_negative" - value: 0 - - id: 6 - name: reorder_point - type: integer - description: "Minimum stock level before reorder" - default: 10 - constraints: - nullable: false - min_value: - name: "chk_reorder_point_non_negative" - value: 0 - - id: 7 - name: is_active - type: boolean - default: true - constraints: - nullable: false - - id: 8 - name: created_at - type: timestamp - default: "current_timestamp" - constraints: - nullable: false - constraints: - - id: 1 - name: "pk_products" - type: primary_key - columns: ["id"] - - id: 2 - name: "uq_products_sku" - type: unique - columns: ["sku"] - - id: 3 - name: "fk_products_supplier" - type: foreign_key - columns: ["supplier_id"] - references: - table: "suppliers" - columns: ["id"] - on_delete: "restrict" - on_update: "no_action" - - - id: 4 - name: inventory - description: "Current stock levels per product per warehouse" - columns: - - id: 1 - name: id - type: serial - constraints: - nullable: false - - id: 2 - name: product_id - type: integer - constraints: - nullable: false - - id: 3 - name: warehouse_id - type: integer - constraints: - nullable: false - - id: 4 - name: quantity_on_hand - type: integer - default: 0 - constraints: - nullable: false - min_value: - name: "chk_quantity_non_negative" - value: 0 - - id: 5 - name: quantity_reserved - type: integer - description: "Quantity reserved for pending orders" - default: 0 - constraints: - nullable: false - min_value: - name: "chk_reserved_non_negative" - value: 0 - - id: 6 - name: bin_location - type: text - description: "Physical location in warehouse" - constraints: - nullable: true - max_length: - name: "chk_bin_location_max_len" - value: 50 - - id: 7 - name: last_counted_at - type: timestamp - description: "Last physical inventory count" - constraints: - nullable: true - constraints: - - id: 1 - name: "pk_inventory" - type: primary_key - columns: ["id"] - - id: 2 - name: "uq_inventory_product_warehouse" - type: unique - description: "One inventory record per product per warehouse" - columns: ["product_id", "warehouse_id"] - - id: 3 - name: "fk_inventory_product" - type: foreign_key - columns: ["product_id"] - references: - table: "products" - columns: ["id"] - on_delete: "cascade" - on_update: "no_action" - - id: 4 - name: "fk_inventory_warehouse" - type: foreign_key - columns: ["warehouse_id"] - references: - table: "warehouses" - columns: ["id"] - on_delete: "cascade" - on_update: "no_action" - - - id: 5 - name: stock_movements - description: "Audit trail of inventory changes" - columns: - - id: 1 - name: id - type: serial - constraints: - nullable: false - - id: 2 - name: inventory_id - type: integer - constraints: - nullable: false - - id: 3 - name: quantity_change - type: integer - description: "Positive for additions, negative for removals" - constraints: - nullable: false - - id: 4 - name: reason - type: text - description: "Reason for stock movement" - constraints: - nullable: false - max_length: - name: "chk_reason_max_len" - value: 100 - - id: 5 - name: reference_number - type: text - description: "External reference (PO number, shipment ID, etc.)" - constraints: - nullable: true - max_length: - name: "chk_reference_max_len" - value: 50 - - id: 6 - name: created_at - type: timestamp - default: "current_timestamp" - constraints: - nullable: false - constraints: - - id: 1 - name: "pk_stock_movements" - type: primary_key - columns: ["id"] - - id: 2 - name: "fk_stock_movements_inventory" - type: foreign_key - columns: ["inventory_id"] - references: - table: "inventory" - columns: ["id"] - on_delete: "cascade" - on_update: "no_action" +snapshot: + label: "v0.0.1" + tables: [] diff --git a/docs/schemas/engines/postgresql/v15.0/components/column.json b/docs/schemas/engines/postgresql/v15.0/components/column.json index 172e115..237c934 100644 --- a/docs/schemas/engines/postgresql/v15.0/components/column.json +++ b/docs/schemas/engines/postgresql/v15.0/components/column.json @@ -1,335 +1,336 @@ { - "title": "PostgreSQL Column Definition", - "type": "object", - "properties": { - "id": { - "type": "integer", - "description": "Unique local identifier for the column", - "examples": [1, 2, 3, 4, 5] - }, - "name": { - "type": "string", - "description": "Name of the column", - "pattern": "^[a-z][a-z0-9_]*$", - "minLength": 1, - "maxLength": 63, - "examples": [ - "id", - "username", - "email", - "created_at", - "supplier_code", - "company_name", - "credit_limit", - "warehouse_code", - "capacity_cubic_meters", - "temperature_min", - "category_code", - "sku", - "unit_cost_cents", - "weight_grams", - "length_mm", - "shelf_life_days", - "quantity_on_hand", - "bin_location" - ] - }, - "type": { - "type": "string", - "description": "Data type of the column", - "enum": ["text", "integer", "serial", "boolean", "date", "timestamp"] - }, - "description": { - "type": "string", - "description": "Description of the column", - "maxLength": 256, - "examples": [ - "Primary key", - "Username of the user", - "Email address", - "Creation timestamp", - "Unique supplier identification code", - "Official company name of the supplier", - "Maximum credit limit in cents", - "Unique warehouse identification code", - "Total storage capacity in cubic meters", - "Minimum temperature in Celsius", - "Stock Keeping Unit - unique product identifier", - "Unit cost in cents", - "Product weight in grams", - "Product length in millimeters", - "Product shelf life in days (null for non-perishable)", - "Current quantity available", - "Specific bin/shelf location in warehouse" - ] - }, - "default": { - "type": ["string", "integer", "boolean", "null"], - "description": "Default value for the column" - }, - "constraints": { - "type": "object", - "description": "Constraints applied to the column", - "properties": { - "nullable": { - "type": "boolean", - "description": "Whether the column can be null", - "default": true - } - }, - "additionalProperties": false - } - }, - "required": ["id", "name", "type"], - "allOf": [ - { - "if": { - "properties": { - "type": { - "const": "text" - } - } - }, - "then": { - "properties": { - "constraints": { - "properties": { - "min_length": { - "type": "object", - "description": "Minimum length for text columns", - "properties": { - "name": { - "type": "string", - "description": "Name of the constraint", - "pattern": "^[a-z][a-z0-9_]*$", - "minLength": 1, - "maxLength": 63 - }, - "value": { - "type": "integer", - "description": "Minimum length value" - } - }, - "required": ["name", "value"], - "additionalProperties": false - }, - "max_length": { - "type": "object", - "description": "Maximum length for text columns", - "properties": { - "name": { - "type": "string", - "description": "Name of the constraint", - "pattern": "^[a-z][a-z0-9_]*$", - "minLength": 1, - "maxLength": 63 - }, - "value": { - "type": "integer", - "description": "Maximum length value" - } - }, - "required": ["name", "value"], - "additionalProperties": false - } - } - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "integer" - } - } - }, - "then": { - "properties": { - "constraints": { - "properties": { - "min_value": { - "type": "object", - "description": "Minimum value for integer columns", - "properties": { - "name": { - "type": "string", - "description": "Name of the constraint", - "pattern": "^[a-z][a-z0-9_]*$", - "minLength": 1, - "maxLength": 63 - }, - "value": { - "type": "integer", - "description": "Minimum value" - } - }, - "required": ["name", "value"], - "additionalProperties": false - }, - "max_value": { - "type": "object", - "description": "Maximum value for integer columns", - "properties": { - "name": { - "type": "string", - "description": "Name of the constraint", - "pattern": "^[a-z][a-z0-9_]*$", - "minLength": 1, - "maxLength": 63 - }, - "value": { - "type": "integer", - "description": "Maximum value" - } - }, - "required": ["name", "value"], - "additionalProperties": false - } - } - } - } - } - }, - { - "if": { - "properties": { - "default": { - "const": null - } - }, - "required": ["default"] - }, - "then": { - "properties": { - "constraints": { - "properties": { - "nullable": { - "const": true - } - } - } - } - } - }, - { - "if": { - "properties": { - "constraints": { - "properties": { - "nullable": { - "const": false - } - }, - "required": ["nullable"] - } - } - }, - "then": { - "properties": { - "default": { - "not": { - "const": null - } - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "boolean" - } - } - }, - "then": { - "properties": { - "default": { - "oneOf": [{ "type": "boolean" }, { "const": null }] - } - } - } - }, - { - "if": { - "properties": { - "type": { - "enum": ["integer", "serial"] - } - } - }, - "then": { - "properties": { - "default": { - "oneOf": [{ "type": "integer" }, { "const": null }] - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "text" - } - } - }, - "then": { - "properties": { - "default": { - "oneOf": [{ "type": "string" }, { "const": null }] - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "date" - } - } - }, - "then": { - "properties": { - "default": { - "oneOf": [{ "const": "current_date" }, { "const": null }] - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "timestamp" - } - } - }, - "then": { - "properties": { - "default": { - "oneOf": [{ "const": "current_timestamp" }, { "const": null }] - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "serial" - } - } - }, - "then": { - "properties": { - "default": false - } - } - } - ], - "additionalProperties": false + "title": "PostgreSQL Column Definition", + "type": "object", + "properties": { + "id": { + "type": "integer", + "description": "Unique local identifier for the column", + "minimum": 1, + "examples": [1, 2, 3, 4, 5] + }, + "name": { + "type": "string", + "description": "Name of the column", + "pattern": "^[a-z][a-z0-9_]*$", + "minLength": 1, + "maxLength": 63, + "examples": [ + "id", + "username", + "email", + "created_at", + "supplier_code", + "company_name", + "credit_limit", + "warehouse_code", + "capacity_cubic_meters", + "temperature_min", + "category_code", + "sku", + "unit_cost_cents", + "weight_grams", + "length_mm", + "shelf_life_days", + "quantity_on_hand", + "bin_location" + ] + }, + "type": { + "type": "string", + "description": "Data type of the column", + "enum": ["text", "integer", "serial", "boolean", "date", "timestamp"] + }, + "description": { + "type": "string", + "description": "Description of the column", + "maxLength": 256, + "examples": [ + "Primary key", + "Username of the user", + "Email address", + "Creation timestamp", + "Unique supplier identification code", + "Official company name of the supplier", + "Maximum credit limit in cents", + "Unique warehouse identification code", + "Total storage capacity in cubic meters", + "Minimum temperature in Celsius", + "Stock Keeping Unit - unique product identifier", + "Unit cost in cents", + "Product weight in grams", + "Product length in millimeters", + "Product shelf life in days (null for non-perishable)", + "Current quantity available", + "Specific bin/shelf location in warehouse" + ] + }, + "default": { + "type": ["string", "integer", "boolean", "null"], + "description": "Default value for the column" + }, + "constraints": { + "type": "object", + "description": "Constraints applied to the column", + "properties": { + "nullable": { + "type": "boolean", + "description": "Whether the column can be null", + "default": true + } + }, + "additionalProperties": false + } + }, + "required": ["id", "name", "type"], + "allOf": [ + { + "if": { + "properties": { + "type": { + "const": "text" + } + } + }, + "then": { + "properties": { + "constraints": { + "properties": { + "min_length": { + "type": "object", + "description": "Minimum length for text columns", + "properties": { + "name": { + "type": "string", + "description": "Name of the constraint", + "pattern": "^[a-z][a-z0-9_]*$", + "minLength": 1, + "maxLength": 63 + }, + "value": { + "type": "integer", + "description": "Minimum length value" + } + }, + "required": ["name", "value"], + "additionalProperties": false + }, + "max_length": { + "type": "object", + "description": "Maximum length for text columns", + "properties": { + "name": { + "type": "string", + "description": "Name of the constraint", + "pattern": "^[a-z][a-z0-9_]*$", + "minLength": 1, + "maxLength": 63 + }, + "value": { + "type": "integer", + "description": "Maximum length value" + } + }, + "required": ["name", "value"], + "additionalProperties": false + } + } + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "integer" + } + } + }, + "then": { + "properties": { + "constraints": { + "properties": { + "min_value": { + "type": "object", + "description": "Minimum value for integer columns", + "properties": { + "name": { + "type": "string", + "description": "Name of the constraint", + "pattern": "^[a-z][a-z0-9_]*$", + "minLength": 1, + "maxLength": 63 + }, + "value": { + "type": "integer", + "description": "Minimum value" + } + }, + "required": ["name", "value"], + "additionalProperties": false + }, + "max_value": { + "type": "object", + "description": "Maximum value for integer columns", + "properties": { + "name": { + "type": "string", + "description": "Name of the constraint", + "pattern": "^[a-z][a-z0-9_]*$", + "minLength": 1, + "maxLength": 63 + }, + "value": { + "type": "integer", + "description": "Maximum value" + } + }, + "required": ["name", "value"], + "additionalProperties": false + } + } + } + } + } + }, + { + "if": { + "properties": { + "default": { + "const": null + } + }, + "required": ["default"] + }, + "then": { + "properties": { + "constraints": { + "properties": { + "nullable": { + "const": true + } + } + } + } + } + }, + { + "if": { + "properties": { + "constraints": { + "properties": { + "nullable": { + "const": false + } + }, + "required": ["nullable"] + } + } + }, + "then": { + "properties": { + "default": { + "not": { + "const": null + } + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "boolean" + } + } + }, + "then": { + "properties": { + "default": { + "oneOf": [{ "type": "boolean" }, { "const": null }] + } + } + } + }, + { + "if": { + "properties": { + "type": { + "enum": ["integer", "serial"] + } + } + }, + "then": { + "properties": { + "default": { + "oneOf": [{ "type": "integer" }, { "const": null }] + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "text" + } + } + }, + "then": { + "properties": { + "default": { + "oneOf": [{ "type": "string" }, { "const": null }] + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "date" + } + } + }, + "then": { + "properties": { + "default": { + "oneOf": [{ "const": "current_date" }, { "const": null }] + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "timestamp" + } + } + }, + "then": { + "properties": { + "default": { + "oneOf": [{ "const": "current_timestamp" }, { "const": null }] + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "serial" + } + } + }, + "then": { + "properties": { + "default": false + } + } + } + ], + "additionalProperties": false } diff --git a/docs/schemas/engines/postgresql/v15.0/components/constraint.json b/docs/schemas/engines/postgresql/v15.0/components/constraint.json index d7d9f24..f088147 100644 --- a/docs/schemas/engines/postgresql/v15.0/components/constraint.json +++ b/docs/schemas/engines/postgresql/v15.0/components/constraint.json @@ -5,6 +5,7 @@ "id": { "type": "integer", "description": "Unique local identifier for the constraint", + "minimum": 1, "examples": [1, 2, 3, 4, 5] }, "name": { @@ -79,8 +80,7 @@ "type": "string", "description": "Name of the referenced table", "pattern": "^[a-z][a-z0-9_]*$", - "minLength": 1, - "maxLength": 63, + "minLength": 1, "maxLength": 63, "examples": ["users", "orders", "products"] }, "columns": { @@ -102,12 +102,14 @@ "type": "string", "description": "Action to take when referenced row is deleted", "enum": ["cascade", "set_null", "set_default", "restrict", "no_action"], + "default": "no_action", "examples": ["cascade", "set_null", "restrict"] }, "on_update": { "type": "string", "description": "Action to take when referenced row is updated", "enum": ["cascade", "set_null", "set_default", "restrict", "no_action"], + "default": "no_action", "examples": ["cascade", "set_null", "restrict"] } }, diff --git a/docs/schemas/engines/postgresql/v15.0/components/table.json b/docs/schemas/engines/postgresql/v15.0/components/table.json index 0792110..a645796 100644 --- a/docs/schemas/engines/postgresql/v15.0/components/table.json +++ b/docs/schemas/engines/postgresql/v15.0/components/table.json @@ -5,6 +5,7 @@ "id": { "type": "integer", "description": "Unique local identifier for the table", + "minimum": 1, "examples": [1, 2, 3, 4, 5] }, "name": { diff --git a/docs/schemas/engines/postgresql/v15.0/snapshot/stored.json b/docs/schemas/engines/postgresql/v15.0/snapshot/stored.json new file mode 100644 index 0000000..58cb1d0 --- /dev/null +++ b/docs/schemas/engines/postgresql/v15.0/snapshot/stored.json @@ -0,0 +1,22 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Stored Snapshot", + "description": "Schema for stored snapshot files (.bfloo//*.yml). Contains the hashable content: description and tables.", + "type": "object", + "properties": { + "description": { + "type": "string", + "description": "Snapshot description", + "maxLength": 256 + }, + "tables": { + "type": "array", + "description": "List of tables in the snapshot", + "items": { + "$ref": "../components/table.json" + } + } + }, + "required": ["tables"], + "additionalProperties": false +} diff --git a/docs/schemas/engines/postgresql/v15.0/snapshot/working.json b/docs/schemas/engines/postgresql/v15.0/snapshot/working.json new file mode 100644 index 0000000..2bf33d3 --- /dev/null +++ b/docs/schemas/engines/postgresql/v15.0/snapshot/working.json @@ -0,0 +1,55 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Working Snapshot", + "description": "Schema for working snapshot file (db-schemas/.yml). Contains schema metadata and current snapshot content.", + "type": "object", + "properties": { + "schema": { + "type": "object", + "description": "Schema metadata. Editable locally but not synced to remote. Source of truth is the working file after import.", + "properties": { + "name": { + "type": "string", + "description": "Human-readable schema name", + "minLength": 1, + "maxLength": 64 + }, + "description": { + "type": "string", + "description": "Schema description", + "maxLength": 256 + } + }, + "required": ["name"], + "additionalProperties": false + }, + "snapshot": { + "type": "object", + "description": "Current snapshot content. The 'label' field is read-only (managed via CLI). The 'description' and 'tables' fields are editable and included in content hash.", + "properties": { + "label": { + "type": "string", + "description": "Snapshot version label (read-only, source of truth is manifest)", + "minLength": 1, + "maxLength": 64 + }, + "description": { + "type": "string", + "description": "Snapshot description (included in content hash)", + "maxLength": 256 + }, + "tables": { + "type": "array", + "description": "List of tables in the snapshot (included in content hash)", + "items": { + "$ref": "../components/table.json" + } + } + }, + "required": ["label", "tables"], + "additionalProperties": false + } + }, + "required": ["schema"], + "additionalProperties": false +} diff --git a/docs/schemas/engines/postgresql/v15.0/spec.json b/docs/schemas/engines/postgresql/v15.0/spec.json deleted file mode 100644 index 0964805..0000000 --- a/docs/schemas/engines/postgresql/v15.0/spec.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "PostgreSQL 15.0 Schema Specification", - "description": "JSON Schema for validating schema.yaml files targeting PostgreSQL 15.0", - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "Human-readable name of the schema", - "minLength": 1, - "maxLength": 64 - }, - "description": { - "type": ["string", "null"], - "description": "Description of the schema", - "maxLength": 256 - }, - "tables": { - "type": "array", - "description": "List of tables in the schema", - "items": { - "$ref": "components/table.json" - } - } - }, - "required": ["name", "tables"], - "additionalProperties": false -} diff --git a/docs/schemas/engines/postgresql/v15.0/tables.json b/docs/schemas/engines/postgresql/v15.0/tables.json new file mode 100644 index 0000000..11b3777 --- /dev/null +++ b/docs/schemas/engines/postgresql/v15.0/tables.json @@ -0,0 +1,9 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "PostgreSQL v15.0 Tables Schema", + "description": "Schema for validating tables array structure for PostgreSQL v15.0. Used by AI agents to understand table syntax.", + "type": "array", + "items": { + "$ref": "components/table.json" + } +} diff --git a/docs/schemas/project/config/base.json b/docs/schemas/project/config/base.json index 758b709..81c1f50 100644 --- a/docs/schemas/project/config/base.json +++ b/docs/schemas/project/config/base.json @@ -1,52 +1,77 @@ { - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Base Project Configuration", - "description": "Schema for the common configuration properties shared across all database engines. This schema is used in conjunction with engine-specific configuration schemas.", - "type": "object", - "definitions": { - "envVarString": { - "type": "string", - "description": "A string value that may contain environment variable references using ${VAR_NAME} syntax. The CLI resolves these at runtime from the environment or .env file.", - "examples": ["${API_KEY}", "literal-value", "${MY_VAR}_suffix"] - } - }, - "properties": { - "schema_id": { - "type": "string", - "format": "uuid", - "description": "Unique identifier linking this project to the web application. Generated on first `db sync push` if not present." - }, - "database": { - "type": "object", - "description": "Database engine configuration (version is tracked per-snapshot in manifest.yaml)", - "properties": { - "engine": { - "type": "string", - "description": "Database engine name", - "enum": ["PostgreSQL"] - } - }, - "required": ["engine"], - "additionalProperties": false - }, - "api": { - "type": "object", - "description": "API configuration for web app synchronization", - "properties": { - "base_url": { - "type": "string", - "format": "uri", - "description": "Base URL for the API endpoint", - "default": "https://api.yourapp.com" - }, - "key": { - "$ref": "#/definitions/envVarString", - "description": "API key for authentication. Supports environment variable references (e.g., ${BFLOO_API_KEY}). The CLI resolves env vars from the environment or a .env file located in the config directory or project root." - } - }, - "additionalProperties": false - } - }, - "required": ["database"], - "additionalProperties": false + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Base Project Configuration", + "description": "Schema for the project configuration file (.bfloo/config.yml). Defines all schemas with their API keys, database engines, and environment connections.", + "type": "object", + "$defs": { + "envVarString": { + "type": "string", + "description": "A string value that may contain environment variable references using ${VAR_NAME} syntax. The CLI resolves these at runtime from the environment or the specified env-file.", + "examples": ["${API_KEY}", "literal-value", "${MY_VAR}_suffix"] + }, + "envFilePath": { + "type": "string", + "description": "Path to an environment file (relative to project root) containing variable definitions. Variables in this file are used to resolve ${VAR_NAME} references. If not specified, defaults to '.env' in the project root.", + "default": ".env", + "examples": [".env", ".env.production", "config/.env.staging", "envs/dev.env"] + }, + "schemaDefinition": { + "type": "object", + "description": "Configuration for a single schema", + "allOf": [ + { + "properties": { + "dir": { + "type": "string", + "description": "Directory path (relative to project root) where the working schema file (.yml) is stored. Defaults to 'db-schemas' if not specified.", + "default": "db-schemas", + "examples": ["db-schemas", ".", "./", "schemas", "db/schemas", "src/database"] + }, + "key": { + "$ref": "#/$defs/envVarString", + "description": "API key for this schema. Supports environment variable references (e.g., ${ORDERS_API_KEY}). Each schema requires its own API key." + }, + "engine": { + "type": "string", + "description": "Database engine for this schema", + "enum": ["PostgreSQL"] + }, + "env-file": { + "$ref": "#/$defs/envFilePath", + "description": "Default environment file for this schema. Used to resolve ${VAR_NAME} references in the API key and as fallback for environments that don't specify their own env-file." + } + }, + "required": ["key", "engine", "envs"] + }, + { + "if": { + "properties": { + "engine": { "const": "PostgreSQL" } + }, + "required": ["engine"] + }, + "then": { + "properties": { + "envs": { + "$ref": "engines/postgresql.json#/$defs/envs" + } + } + } + } + ], + "unevaluatedProperties": false + } + }, + "properties": { + "schemas": { + "type": "object", + "description": "Map of schema definitions. Each key is a user-defined schema name (e.g., 'orders', 'users', 'analytics').", + "additionalProperties": { + "$ref": "#/$defs/schemaDefinition" + }, + "minProperties": 1 + } + }, + "required": ["schemas"], + "additionalProperties": false } diff --git a/docs/schemas/project/config/engines/postgresql.json b/docs/schemas/project/config/engines/postgresql.json index de32305..4531371 100644 --- a/docs/schemas/project/config/engines/postgresql.json +++ b/docs/schemas/project/config/engines/postgresql.json @@ -4,18 +4,21 @@ "description": "Schema for PostgreSQL-specific configuration including environment connection parameters. All connection parameters follow the official PostgreSQL libpq specification.", "type": "object", "properties": { - "environments": { + "envs": { + "$ref": "#/$defs/envs" + } + }, + "required": ["envs"], + "additionalProperties": false, + "$defs": { + "envs": { "type": "object", "description": "Environment-specific connection configurations (e.g., development, staging, production)", "additionalProperties": { "$ref": "#/$defs/environment" }, "minProperties": 1 - } - }, - "required": ["environments"], - "additionalProperties": false, - "$defs": { + }, "environment": { "type": "object", "description": "Connection configuration for a single environment", @@ -49,6 +52,14 @@ "maxLength": 63, "pattern": "^[a-zA-Z_][a-zA-Z0-9_$]*$|^\\$\\{[A-Z_][A-Z0-9_]*\\}$" }, + "target-schema": { + "type": "string", + "description": "PostgreSQL schema name. Defaults to 'public' if not specified.", + "minLength": 1, + "maxLength": 63, + "pattern": "^[a-zA-Z_][a-zA-Z0-9_$]*$|^\\$\\{[A-Z_][A-Z0-9_]*\\}$", + "default": "public" + }, "user": { "type": "string", "description": "PostgreSQL user name to connect as. Supports environment variable interpolation with ${VAR_NAME} syntax.", @@ -60,11 +71,6 @@ "description": "Password for authentication. IMPORTANT: Use environment variable interpolation (${VAR_NAME}) - never store plaintext passwords.", "minLength": 1 }, - "passfile": { - "type": "string", - "description": "Path to password file (~/.pgpass format). Defaults to ~/.pgpass on Unix.", - "minLength": 1 - }, "require_auth": { "type": "string", "description": "Authentication method required from server. Comma-separated list allowed. Prefix with '!' to negate.", @@ -83,12 +89,6 @@ "!none" ] }, - "channel_binding": { - "type": "string", - "description": "Controls client's use of channel binding for SCRAM authentication.", - "enum": ["disable", "prefer", "require"], - "default": "prefer" - }, "connect_timeout": { "type": "integer", "description": "Maximum time to wait while connecting, in seconds. Zero means wait indefinitely.", @@ -96,205 +96,20 @@ "maximum": 2147483647, "default": 0 }, - "client_encoding": { - "type": "string", - "description": "Client encoding for the connection. Use 'auto' to detect from locale.", - "minLength": 1, - "examples": ["UTF8", "LATIN1", "auto"] - }, - "options": { - "type": "string", - "description": "Command-line options to send to the server at connection start (e.g., '-c search_path=myschema').", - "maxLength": 1024 - }, - "application_name": { - "type": "string", - "description": "Application name for the connection (visible in pg_stat_activity).", - "maxLength": 63 - }, - "keepalives": { - "type": "integer", - "description": "Controls whether client-side TCP keepalives are used. 1=on, 0=off.", - "enum": [0, 1], - "default": 1 - }, - "keepalives_idle": { - "type": "integer", - "description": "Seconds of inactivity before sending a TCP keepalive. Zero uses system default.", - "minimum": 0, - "maximum": 2147483647, - "default": 0 - }, - "keepalives_interval": { - "type": "integer", - "description": "Seconds between TCP keepalive retransmits. Zero uses system default.", - "minimum": 0, - "maximum": 2147483647, - "default": 0 - }, - "keepalives_count": { - "type": "integer", - "description": "Maximum number of TCP keepalive retransmits before connection is dead. Zero uses system default.", - "minimum": 0, - "maximum": 2147483647, - "default": 0 - }, - "tcp_user_timeout": { - "type": "integer", - "description": "Milliseconds that transmitted data may remain unacknowledged before connection is closed. Zero uses system default.", - "minimum": 0, - "maximum": 2147483647, - "default": 0 - }, "sslmode": { "type": "string", "description": "Determines SSL connection negotiation priority with the server.", "enum": ["disable", "allow", "prefer", "require", "verify-ca", "verify-full"], "default": "prefer" }, - "sslnegotiation": { + "env-file": { "type": "string", - "description": "Controls how SSL encryption is negotiated. 'postgres' uses PostgreSQL protocol negotiation, 'direct' starts SSL handshake immediately.", - "enum": ["postgres", "direct"], - "default": "postgres" - }, - "sslcompression": { - "type": "integer", - "description": "Enable SSL compression. 1=on, 0=off. Compression is considered insecure and disabled by default.", - "enum": [0, 1], - "default": 0 - }, - "sslcert": { - "type": "string", - "description": "Path to client SSL certificate file. Default is ~/.postgresql/postgresql.crt.", - "minLength": 1 - }, - "sslkey": { - "type": "string", - "description": "Path to client SSL private key file. Default is ~/.postgresql/postgresql.key.", - "minLength": 1 - }, - "sslpassword": { - "type": "string", - "description": "Password for encrypted SSL private key. Use environment variable interpolation (${VAR_NAME}).", - "minLength": 1 - }, - "sslcertmode": { - "type": "string", - "description": "Whether a client certificate may be sent to the server.", - "enum": ["disable", "allow", "require"], - "default": "allow" - }, - "sslrootcert": { - "type": "string", - "description": "Path to SSL CA certificate file. Use 'system' for system-wide trusted CAs. Default is ~/.postgresql/root.crt.", - "minLength": 1, - "examples": ["~/.postgresql/root.crt", "system", "/etc/ssl/certs/ca-certificates.crt"] - }, - "sslcrl": { - "type": "string", - "description": "Path to SSL certificate revocation list file.", - "minLength": 1 - }, - "sslcrldir": { - "type": "string", - "description": "Directory containing SSL certificate revocation list files (must be prepared with openssl rehash).", - "minLength": 1 - }, - "sslsni": { - "type": "integer", - "description": "Enable Server Name Indication (SNI) on SSL connections. 1=on, 0=off.", - "enum": [0, 1], - "default": 1 - }, - "ssl_min_protocol_version": { - "type": "string", - "description": "Minimum SSL/TLS protocol version to allow.", - "enum": ["TLSv1", "TLSv1.1", "TLSv1.2", "TLSv1.3"], - "default": "TLSv1.2" - }, - "ssl_max_protocol_version": { - "type": "string", - "description": "Maximum SSL/TLS protocol version to allow. If not set, uses backend maximum.", - "enum": ["TLSv1", "TLSv1.1", "TLSv1.2", "TLSv1.3"] - }, - "gssencmode": { - "type": "string", - "description": "Controls GSS encryption negotiation with the server.", - "enum": ["disable", "prefer", "require"], - "default": "prefer" - }, - "krbsrvname": { - "type": "string", - "description": "Kerberos service name for GSSAPI authentication. Default is 'postgres'.", - "minLength": 1, - "default": "postgres" - }, - "gsslib": { - "type": "string", - "description": "GSS library to use for GSSAPI authentication (Windows only).", - "enum": ["gssapi"] - }, - "gssdelegation": { - "type": "integer", - "description": "Forward GSS credentials to the server. 1=on, 0=off.", - "enum": [0, 1], - "default": 0 - }, - "service": { - "type": "string", - "description": "Service name in pg_service.conf for additional connection parameters.", - "minLength": 1, - "maxLength": 64 - }, - "target_session_attrs": { - "type": "string", - "description": "Session properties required for connection to be acceptable. Useful with multiple hosts.", - "enum": ["any", "read-write", "read-only", "primary", "standby", "prefer-standby"], - "default": "any" - }, - "load_balance_hosts": { - "type": "string", - "description": "Controls the order in which multiple hosts are tried.", - "enum": ["disable", "random"], - "default": "disable" + "description": "Path to an environment file (relative to project root) for this specific environment. Overrides the schema-level env-file. Used to resolve ${VAR_NAME} references in connection parameters.", + "examples": [".env.production", ".env.staging", "config/.env.dev"] } }, "required": ["host", "dbname", "user"], - "additionalProperties": false, - "allOf": [ - { - "if": { - "properties": { - "sslnegotiation": { "const": "direct" } - }, - "required": ["sslnegotiation"] - }, - "then": { - "properties": { - "sslmode": { - "enum": ["require", "verify-ca", "verify-full"] - } - } - } - }, - { - "if": { - "properties": { - "sslrootcert": { "const": "system" } - }, - "required": ["sslrootcert"] - }, - "then": { - "properties": { - "sslmode": { - "enum": ["verify-full"], - "default": "verify-full" - } - } - } - } - ] + "additionalProperties": false } } } diff --git a/docs/schemas/project/manifest.json b/docs/schemas/project/manifest.json index 50c95f7..9fb8573 100644 --- a/docs/schemas/project/manifest.json +++ b/docs/schemas/project/manifest.json @@ -1,66 +1,85 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "title": "Snapshot Manifest", - "description": "Schema for _history_/manifest.yaml - snapshot registry tracking all schema versions with their metadata, parent relationships, and database versions.", + "description": "Schema for .bfloo//manifest.yml - snapshot registry tracking all schema versions with their metadata, parent relationships, sync state, and content hashes. Uses a map structure keyed by snapshot ID for git merge-friendliness.", "type": "object", "properties": { - "current": { - "type": "string", - "description": "Label of the currently active snapshot", - "minLength": 1, - "maxLength": 64 - }, "snapshots": { - "type": "array", - "description": "Ordered list of snapshots (oldest first)", - "items": { - "type": "object", - "properties": { - "label": { - "type": "string", - "description": "Human-readable version label (unique within project)", - "minLength": 1, - "maxLength": 64, - "examples": ["v1.0.0", "v2.0.0", "v2.1.0-hotfix"] - }, - "id": { - "type": "string", - "format": "uuid", - "description": "Unique identifier for the snapshot (generated by CLI)" - }, - "parent_id": { - "type": ["string", "null"], - "format": "uuid", - "description": "UUID of parent snapshot. Null for initial snapshot." - }, - "database_version": { - "type": "string", - "description": "Database engine version for this snapshot (e.g., 'v15.0', 'v16.0')", - "pattern": "^v[0-9]+\\.[0-9]+(\\.[0-9]+)?$", - "examples": ["v15.0", "v16.0", "v15.4.1"] - }, - "created_at": { - "type": "string", - "format": "date-time", - "description": "ISO 8601 timestamp when snapshot was created" - }, - "file": { - "type": "string", - "description": "Filename in _history_/ directory", - "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}_[a-zA-Z0-9._-]+\\.yaml$", - "examples": ["2024-01-15_v1.0.0.yaml", "2024-03-20_v2.0.0.yaml"] - }, - "synced": { - "type": "boolean", - "description": "Whether snapshot has been pushed to web app", - "default": false - } - }, - "required": ["label", "id", "parent_id", "database_version", "created_at", "file", "synced"], - "additionalProperties": false - } + "type": "object", + "description": "Map of snapshots keyed by ID (UUID or local- for unpushed snapshots)", + "propertyNames": { + "pattern": "^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}|local-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})$", + "description": "Remote UUID or local- for unpushed snapshots" + }, + "additionalProperties": { + "$ref": "#/$defs/snapshot" + }, + "minProperties": 1 } }, - "required": ["current", "snapshots"], - "additionalProperties": false + "required": ["snapshots"], + "additionalProperties": false, + "$defs": { + "snapshot": { + "type": "object", + "description": "A snapshot representing a database schema at a point in time", + "properties": { + "label": { + "type": "string", + "description": "Human-readable version label (unique within schema)", + "minLength": 1, + "maxLength": 64, + "examples": ["v1.0.0", "v2.0.0", "v2.1.0-hotfix", "v1.0.1-experiment"] + }, + "parent-id": { + "type": ["string", "null"], + "description": "ID of parent snapshot (UUID or local-). Null for initial snapshot. Parent must have status 'done'.", + "pattern": "^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}|local-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})$" + }, + "status": { + "type": "string", + "description": "Snapshot status. 'draft' snapshots are editable but cannot be parents. 'done' snapshots are immutable and can be parents. Snapshots automatically become 'done' when they have children.", + "enum": ["draft", "done"], + "default": "draft" + }, + "database-version": { + "type": "string", + "description": "Database engine version for this snapshot (e.g., 'v15.0', 'v16.0')", + "pattern": "^v[0-9]+\\.[0-9]+(\\.[0-9]+)?$", + "examples": ["v15.0", "v16.0", "v15.4.1"] + }, + "created-at": { + "type": "string", + "format": "date-time", + "description": "ISO 8601 timestamp when snapshot was created" + }, + "file": { + "type": "string", + "description": "Filename in .bfloo// directory, or 'current' if this is the active snapshot (lives at working path defined in config)", + "pattern": "^(current|[0-9]{4}-[0-9]{2}-[0-9]{2}_[a-zA-Z0-9._-]+\\.yml)$", + "examples": ["current", "2024-01-15_v1.0.0.yml", "2024-03-20_v2.0.0.yml"] + }, + "content-hash": { + "type": "string", + "description": "SHA-256 hash of snapshot file content for integrity verification and drift detection", + "pattern": "^sha256:[a-f0-9]{64}$", + "examples": ["sha256:a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2"] + }, + "sync-state": { + "type": "string", + "description": "Synchronization state with remote web app. 'synced' = exists on remote. 'local-only' = created locally, never pushed. 'orphaned' = was synced but ID no longer exists on remote.", + "enum": ["synced", "local-only", "orphaned"], + "default": "local-only" + }, + "synced-at": { + "type": ["string", "null"], + "format": "date-time", + "description": "ISO 8601 timestamp when snapshot was last synced to remote. Null if never synced.", + "examples": ["2024-01-15T10:35:00Z", null] + } + }, + "required": ["label", "parent-id", "status", "database-version", "created-at", "file", "content-hash", "sync-state"], + "additionalProperties": false + } + } } diff --git a/tests/conftest.py b/tests/conftest.py index ba129a2..ed44df9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -65,33 +65,63 @@ def temp_docs_dir(): with open(temp_dir / "schemas" / "_registry_.json", "w") as f: json.dump(registry, f, indent=2) - # Create base config schema + # Create base config schema with reference to engine-specific envs base_config = { "$schema": "http://json-schema.org/draft-07/schema#", "title": "Base Project Configuration", "type": "object", - "properties": { - "schema_id": {"type": "string", "format": "uuid"}, - "database": { + "$defs": { + "schemaDefinition": { "type": "object", - "properties": {"engine": {"type": "string"}}, - "required": ["engine"], - }, + "allOf": [ + { + "properties": { + "engine": {"type": "string"}, + } + }, + { + "if": { + "properties": {"engine": {"const": "PostgreSQL"}}, + "required": ["engine"], + }, + "then": { + "properties": { + "envs": {"$ref": "engines/postgresql.json#/$defs/envs"} + } + }, + }, + { + "if": { + "properties": {"engine": {"const": "MySQL"}}, + "required": ["engine"], + }, + "then": { + "properties": { + "envs": {"$ref": "engines/mysql.json#/$defs/envs"} + } + }, + }, + ], + } + }, + "properties": { + "schemas": {"additionalProperties": {"$ref": "#/$defs/schemaDefinition"}} }, - "required": ["database"], + "required": ["schemas"], } with open(temp_dir / "schemas" / "project" / "config" / "base.json", "w") as f: json.dump(base_config, f, indent=2) - # Create PostgreSQL config schema + # Create PostgreSQL config schema with $defs/envs postgresql_config = { "$schema": "http://json-schema.org/draft-07/schema#", "title": "PostgreSQL Configuration", "type": "object", - "properties": { - "environments": { + "$defs": { + "envs": { "type": "object", + "description": "PostgreSQL environment configurations", "additionalProperties": { "type": "object", "properties": { @@ -104,7 +134,6 @@ def temp_docs_dir(): }, } }, - "required": ["environments"], } with open( @@ -112,14 +141,15 @@ def temp_docs_dir(): ) as f: json.dump(postgresql_config, f, indent=2) - # Create MySQL config schema + # Create MySQL config schema with $defs/envs mysql_config = { "$schema": "http://json-schema.org/draft-07/schema#", "title": "MySQL Configuration", "type": "object", - "properties": { - "environments": { + "$defs": { + "envs": { "type": "object", + "description": "MySQL environment configurations", "additionalProperties": { "type": "object", "properties": { @@ -132,7 +162,6 @@ def temp_docs_dir(): }, } }, - "required": ["environments"], } with open( @@ -152,33 +181,89 @@ def temp_docs_dir(): with open(temp_dir / "schemas" / "project" / "manifest.json", "w") as f: json.dump(manifest_schema, f, indent=2) - # Create PostgreSQL v15.0 spec - postgresql_spec = { + # Create PostgreSQL v15.0 schemas (tables, snapshot/stored, snapshot/working) + postgresql_v15_dir = temp_dir / "schemas" / "engines" / "postgresql" / "v15.0" + (postgresql_v15_dir / "snapshot").mkdir(parents=True) + + postgresql_tables = { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "PostgreSQL 15.0 Schema", + "title": "PostgreSQL v15.0 Tables Schema", + "type": "array", + "items": {"type": "object", "properties": {"name": {"type": "string"}}}, + } + + with open(postgresql_v15_dir / "tables.json", "w") as f: + json.dump(postgresql_tables, f, indent=2) + + postgresql_stored = { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Stored Snapshot", "type": "object", - "properties": {"name": {"type": "string"}, "tables": {"type": "array"}}, - "required": ["name"], + "properties": { + "description": {"type": "string"}, + "tables": {"type": "array"}, + }, + "required": ["description", "tables"], } - with open( - temp_dir / "schemas" / "engines" / "postgresql" / "v15.0" / "spec.json", "w" - ) as f: - json.dump(postgresql_spec, f, indent=2) + with open(postgresql_v15_dir / "snapshot" / "stored.json", "w") as f: + json.dump(postgresql_stored, f, indent=2) - # Create MySQL v8.0 spec - mysql_spec = { + postgresql_working = { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "MySQL 8.0 Schema", + "title": "Working Snapshot", "type": "object", - "properties": {"name": {"type": "string"}, "tables": {"type": "array"}}, - "required": ["name"], + "properties": { + "schema": {"type": "object"}, + "snapshot": {"type": "object"}, + }, + "required": ["schema"], } - with open( - temp_dir / "schemas" / "engines" / "mysql" / "v8.0" / "spec.json", "w" - ) as f: - json.dump(mysql_spec, f, indent=2) + with open(postgresql_v15_dir / "snapshot" / "working.json", "w") as f: + json.dump(postgresql_working, f, indent=2) + + # Create MySQL v8.0 schemas (tables, snapshot/stored, snapshot/working) + mysql_v8_dir = temp_dir / "schemas" / "engines" / "mysql" / "v8.0" + (mysql_v8_dir / "snapshot").mkdir(parents=True) + + mysql_tables = { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "MySQL 8.0 Tables Schema", + "type": "array", + "items": {"type": "object", "properties": {"name": {"type": "string"}}}, + } + + with open(mysql_v8_dir / "tables.json", "w") as f: + json.dump(mysql_tables, f, indent=2) + + mysql_stored = { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Stored Snapshot", + "type": "object", + "properties": { + "description": {"type": "string"}, + "tables": {"type": "array"}, + }, + "required": ["description", "tables"], + } + + with open(mysql_v8_dir / "snapshot" / "stored.json", "w") as f: + json.dump(mysql_stored, f, indent=2) + + mysql_working = { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Working Snapshot", + "type": "object", + "properties": { + "schema": {"type": "object"}, + "snapshot": {"type": "object"}, + }, + "required": ["schema"], + } + + with open(mysql_v8_dir / "snapshot" / "working.json", "w") as f: + json.dump(mysql_working, f, indent=2) yield temp_dir @@ -199,7 +284,7 @@ def postgresql_variant(): """Standard PostgreSQL variant for testing.""" return DatabaseVariantSpec( engine="postgresql", - version="15.0", + version="v15.0", engine_spec_path="schemas/engines/postgresql/v15.0", ) @@ -295,7 +380,7 @@ def create_multiple_match_schema() -> Dict[str, Any]: { "if": { "properties": { - "database": {"properties": {"version": {"const": "15.0"}}} + "database": {"properties": {"version": {"const": "v15.0"}}} } }, "then": {"properties": {"feature2": {"type": "integer"}}}, diff --git a/tests/test_config.py b/tests/test_config.py index 475bf6e..9ed68c0 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,20 +1,66 @@ """Test custom configuration error handling.""" +from __future__ import annotations + import os +from pathlib import Path from unittest.mock import patch import pytest +from pydantic import Field +from pydantic_settings import BaseSettings -from database_schema_spec.core.config import Config +from database_schema_spec.core.config import ( + ExitCodesConfig, + FileNamesConfig, + JSONSchemaFieldsConfig, +) from database_schema_spec.core.exceptions import ConfigurationError def test_missing_base_url_raises_configuration_error(): """Test that missing BASE_URL raises ConfigurationError instead of ValidationError.""" - # Remove BASE_URL from environment for this test + + # Create a test config class that mimics the real Config but doesn't load .env + class TestConfig(BaseSettings): + """Test config that doesn't load from .env.""" + + docs_dir: Path = Field( + default=Path("docs"), description="Path to documentation/schema files" + ) + output_dir: Path = Field( + default=Path("output"), description="Path for generated output files" + ) + base_url: str = Field(..., description="Base URL for generated spec files") + file_names: FileNamesConfig = Field(default_factory=FileNamesConfig) + json_schema_fields: JSONSchemaFieldsConfig = Field( + default_factory=JSONSchemaFieldsConfig + ) + exit_codes: ExitCodesConfig = Field(default_factory=ExitCodesConfig) + + model_config = { + "env_file": None, # Disable .env file loading + "case_sensitive": False, + } + + def __init__(self, **data): + """Initialize config with custom error handling for missing required fields.""" + from pydantic import ValidationError + + try: + super().__init__(**data) + except ValidationError as e: + for error in e.errors(): + if error["type"] == "missing": + field_name = error["loc"][0] if error["loc"] else "unknown" + env_var_name = str(field_name).upper() + raise ConfigurationError(variable_name=env_var_name) from e + raise + + # Clear environment and test with patch.dict(os.environ, {}, clear=True): with pytest.raises(ConfigurationError) as exc_info: - Config() + TestConfig() error = exc_info.value assert error.variable_name == "BASE_URL" @@ -24,6 +70,9 @@ def test_missing_base_url_raises_configuration_error(): def test_config_with_valid_base_url(): """Test that Config works correctly when BASE_URL is provided.""" with patch.dict(os.environ, {"BASE_URL": "https://example.com/api"}): + # Import fresh to get new instance with patched env + from database_schema_spec.core.config import Config + config = Config() assert config.base_url == "https://example.com/api" assert config.docs_dir.name == "docs" diff --git a/tests/test_integration_production.py b/tests/test_integration_production.py index 620185a..8998948 100644 --- a/tests/test_integration_production.py +++ b/tests/test_integration_production.py @@ -24,25 +24,36 @@ def test_schema_generation_with_real_data(self, temp_docs_dir, temp_output_dir): # Verify output structure assert temp_output_dir.exists() - # Check for generated schema files - postgres_output = temp_output_dir / "postgresql" / "v15.0" / "spec.json" - mysql_output = temp_output_dir / "mysql" / "v8.0" / "spec.json" + # Check for generated schema files (new structure: tables.json, snapshot/) + postgres_tables = temp_output_dir / "postgresql" / "v15.0" / "tables.json" + postgres_stored = ( + temp_output_dir / "postgresql" / "v15.0" / "snapshot" / "stored.json" + ) + postgres_working = ( + temp_output_dir / "postgresql" / "v15.0" / "snapshot" / "working.json" + ) + mysql_tables = temp_output_dir / "mysql" / "v8.0" / "tables.json" - assert postgres_output.exists(), "PostgreSQL schema should be generated" - assert mysql_output.exists(), "MySQL schema should be generated" + assert postgres_tables.exists(), "PostgreSQL tables schema should be generated" + assert postgres_stored.exists(), ( + "PostgreSQL stored snapshot schema should be generated" + ) + assert postgres_working.exists(), ( + "PostgreSQL working snapshot schema should be generated" + ) + assert mysql_tables.exists(), "MySQL tables schema should be generated" - # Check for project schemas - assert (temp_output_dir / "config" / "base.json").exists() - assert (temp_output_dir / "config" / "engines" / "postgresql.json").exists() - assert (temp_output_dir / "config" / "engines" / "mysql.json").exists() + # Check for project schemas (new structure: config/{engine}.json, no base.json) + assert (temp_output_dir / "config" / "postgresql.json").exists() + assert (temp_output_dir / "config" / "mysql.json").exists() assert (temp_output_dir / "manifest.json").exists() assert (temp_output_dir / "smap.json").exists() # Verify content quality - with open(postgres_output) as f: + with open(postgres_tables) as f: postgres_schema = json.load(f) - with open(mysql_output) as f: + with open(mysql_tables) as f: mysql_schema = json.load(f) # Basic schema validation - check they have $id injected @@ -86,16 +97,24 @@ def test_schema_generation_preserves_file_structure( # Verify directory structure matches expected pattern expected_structure = [ temp_output_dir / "postgresql" / "v15.0", + temp_output_dir / "postgresql" / "v15.0" / "snapshot", temp_output_dir / "mysql" / "v8.0", - temp_output_dir / "config" / "engines", + temp_output_dir / "mysql" / "v8.0" / "snapshot", + temp_output_dir / "config", ] for path in expected_structure: assert path.exists(), f"Expected directory {path} should exist" - # Check spec files - assert (temp_output_dir / "postgresql" / "v15.0" / "spec.json").exists() - assert (temp_output_dir / "mysql" / "v8.0" / "spec.json").exists() + # Check new schema files (tables.json, snapshot/stored.json, snapshot/working.json) + assert (temp_output_dir / "postgresql" / "v15.0" / "tables.json").exists() + assert ( + temp_output_dir / "postgresql" / "v15.0" / "snapshot" / "stored.json" + ).exists() + assert ( + temp_output_dir / "postgresql" / "v15.0" / "snapshot" / "working.json" + ).exists() + assert (temp_output_dir / "mysql" / "v8.0" / "tables.json").exists() def test_schema_generation_output_content_validity( self, temp_docs_dir, temp_output_dir @@ -146,20 +165,54 @@ def test_schema_generation_handles_multiple_variants( with open(registry_file, "w") as f: json.dump(registry, f, indent=2) - # Create directory and spec for the new variant + # Create directory and schemas for the new variant (new structure) postgresql_14_dir = ( temp_docs_dir / "schemas" / "engines" / "postgresql" / "v14.0" ) + postgresql_14_snapshot_dir = postgresql_14_dir / "snapshot" postgresql_14_dir.mkdir(parents=True) + postgresql_14_snapshot_dir.mkdir(parents=True) + + # Create tables.json + with open(postgresql_14_dir / "tables.json", "w") as f: + json.dump( + { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "PostgreSQL 14.0 Tables Schema", + "type": "array", + "items": {"type": "object"}, + }, + f, + indent=2, + ) - with open(postgresql_14_dir / "spec.json", "w") as f: + # Create snapshot/stored.json + with open(postgresql_14_snapshot_dir / "stored.json", "w") as f: json.dump( { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "PostgreSQL 14.0 Schema", + "title": "Stored Snapshot", "type": "object", - "properties": {"name": {"type": "string"}}, - "required": ["name"], + "properties": { + "description": {"type": "string"}, + "tables": {"type": "array"}, + }, + }, + f, + indent=2, + ) + + # Create snapshot/working.json + with open(postgresql_14_snapshot_dir / "working.json", "w") as f: + json.dump( + { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Working Snapshot", + "type": "object", + "properties": { + "schema": {"type": "object"}, + "snapshot": {"type": "object"}, + }, }, f, indent=2, @@ -170,10 +223,10 @@ def test_schema_generation_handles_multiple_variants( ) generator.run_for_testing() - # Should create output for all variants - assert (temp_output_dir / "postgresql" / "v15.0" / "spec.json").exists() - assert (temp_output_dir / "postgresql" / "v14.0" / "spec.json").exists() - assert (temp_output_dir / "mysql" / "v8.0" / "spec.json").exists() + # Should create output for all variants (new file structure) + assert (temp_output_dir / "postgresql" / "v15.0" / "tables.json").exists() + assert (temp_output_dir / "postgresql" / "v14.0" / "tables.json").exists() + assert (temp_output_dir / "mysql" / "v8.0" / "tables.json").exists() def test_schema_generation_schema_map_structure( self, temp_docs_dir, temp_output_dir @@ -197,19 +250,27 @@ def test_schema_generation_schema_map_structure( # Check project section assert "manifest" in smap["project"] assert "config" in smap["project"] - assert "base" in smap["project"]["config"] - assert "engines" in smap["project"]["config"] - # Check engine configs are present - assert "postgresql" in smap["project"]["config"]["engines"] - assert "mysql" in smap["project"]["config"]["engines"] + # Check engine configs are directly under config (new structure) + assert "postgresql" in smap["project"]["config"] + assert "mysql" in smap["project"]["config"] - # Check engine specs are present + # Verify config URLs point to config/{engine}.json + assert "config/postgresql.json" in smap["project"]["config"]["postgresql"] + assert "config/mysql.json" in smap["project"]["config"]["mysql"] + + # Check engine specs are present (new nested structure) assert "postgresql" in smap["engines"] assert "mysql" in smap["engines"] assert "v15.0" in smap["engines"]["postgresql"] assert "v8.0" in smap["engines"]["mysql"] + # Check nested schema types + assert "tables" in smap["engines"]["postgresql"]["v15.0"] + assert "snapshot" in smap["engines"]["postgresql"]["v15.0"] + assert "stored" in smap["engines"]["postgresql"]["v15.0"]["snapshot"] + assert "working" in smap["engines"]["postgresql"]["v15.0"]["snapshot"] + def test_schema_generation_performance_with_large_schema( self, temp_docs_dir, temp_output_dir ): @@ -217,22 +278,30 @@ def test_schema_generation_performance_with_large_schema( # Create a large schema with many properties large_schema = { "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": {}, + "type": "array", + "items": { + "type": "object", + "properties": {}, + }, } # Add 100 properties to simulate a large schema for i in range(100): - large_schema["properties"][f"property_{i}"] = { + large_schema["items"]["properties"][f"property_{i}"] = { "type": "string", "description": f"Property {i}", } - # Update the PostgreSQL spec with the large schema - spec_file = ( - temp_docs_dir / "schemas" / "engines" / "postgresql" / "v15.0" / "spec.json" + # Update the PostgreSQL tables.json with the large schema + tables_file = ( + temp_docs_dir + / "schemas" + / "engines" + / "postgresql" + / "v15.0" + / "tables.json" ) - with open(spec_file, "w") as f: + with open(tables_file, "w") as f: json.dump(large_schema, f, indent=2) import time diff --git a/tests/test_output_manager.py b/tests/test_output_manager.py index c69393e..20c0345 100644 --- a/tests/test_output_manager.py +++ b/tests/test_output_manager.py @@ -15,30 +15,41 @@ def temp_output_dir(): """Create a temporary output directory with test data.""" temp_dir = Path(tempfile.mkdtemp()) - # Create test directory structure - postgresql_15_dir = temp_dir / "postgresql" / "15.0" - postgresql_16_dir = temp_dir / "postgresql" / "16.0" - mysql_8_dir = temp_dir / "mysql" / "8.0" - - postgresql_15_dir.mkdir(parents=True) - postgresql_16_dir.mkdir(parents=True) - mysql_8_dir.mkdir(parents=True) - - # Create test spec.json files - test_schema = { - "title": "Test Schema", - "type": "object", - "properties": {"test": {"type": "string"}}, + # Create test directory structure with new schema layout + postgresql_15_dir = temp_dir / "postgresql" / "v15.0" + postgresql_16_dir = temp_dir / "postgresql" / "v16.0" + mysql_8_dir = temp_dir / "mysql" / "v8.0" + + (postgresql_15_dir / "snapshot").mkdir(parents=True) + (postgresql_16_dir / "snapshot").mkdir(parents=True) + (mysql_8_dir / "snapshot").mkdir(parents=True) + + # Create test schema files (tables.json and snapshot schemas) + test_tables = { + "title": "Test Tables Schema", + "type": "array", + "items": {"type": "object"}, } - with open(postgresql_15_dir / "spec.json", "w") as f: - json.dump(test_schema, f) + test_stored = { + "title": "Test Stored Snapshot", + "type": "object", + "properties": {"description": {"type": "string"}, "tables": {"type": "array"}}, + } - with open(postgresql_16_dir / "spec.json", "w") as f: - json.dump(test_schema, f) + test_working = { + "title": "Test Working Snapshot", + "type": "object", + "properties": {"schema": {"type": "object"}, "snapshot": {"type": "object"}}, + } - with open(mysql_8_dir / "spec.json", "w") as f: - json.dump(test_schema, f) + for version_dir in [postgresql_15_dir, postgresql_16_dir, mysql_8_dir]: + with open(version_dir / "tables.json", "w") as f: + json.dump(test_tables, f) + with open(version_dir / "snapshot" / "stored.json", "w") as f: + json.dump(test_stored, f) + with open(version_dir / "snapshot" / "working.json", "w") as f: + json.dump(test_working, f) yield temp_dir @@ -90,29 +101,31 @@ def test_create_output_structure_permission_error(self): ): manager.create_output_structure() - def test_get_output_path(self, output_manager): - """Test _get_output_path method.""" - path = output_manager._get_output_path("postgresql", "15.0") + def test_get_engine_schema_path(self, output_manager): + """Test _get_engine_schema_path method.""" + path = output_manager._get_engine_schema_path("postgresql", "v15.0", "tables") - expected = output_manager.output_dir / "postgresql" / "15.0" / "spec.json" + expected = output_manager.output_dir / "postgresql" / "v15.0" / "tables.json" assert path == expected - def test_get_output_path_lowercase_engine(self, output_manager): - """Test _get_output_path converts engine name to lowercase.""" - path = output_manager._get_output_path("PostgreSQL", "15.0") + def test_get_engine_schema_path_lowercase_engine(self, output_manager): + """Test _get_engine_schema_path converts engine name to lowercase.""" + path = output_manager._get_engine_schema_path("PostgreSQL", "v15.0", "tables") - expected = output_manager.output_dir / "postgresql" / "15.0" / "spec.json" + expected = output_manager.output_dir / "postgresql" / "v15.0" / "tables.json" assert path == expected - def test_write_schema_success(self, output_manager): - """Test successful schema writing.""" + def test_write_engine_schema_success(self, output_manager): + """Test successful engine schema writing.""" schema = { "title": "Test Schema", - "type": "object", - "properties": {"name": {"type": "string"}}, + "type": "array", + "items": {"type": "object"}, } - result_path = output_manager.write_schema(schema, "postgresql", "15.0") + result_path = output_manager.write_engine_schema( + schema, "postgresql", "v15.0", "tables" + ) # Check that file was created assert result_path.exists() @@ -124,52 +137,60 @@ def test_write_schema_success(self, output_manager): assert written_schema == schema - def test_write_schema_creates_directories(self, tmp_path): - """Test that write_schema creates necessary directories.""" + def test_write_engine_schema_creates_directories(self, tmp_path): + """Test that write_engine_schema creates necessary directories.""" output_dir = tmp_path / "new_output" manager = OutputManager(output_dir) schema = {"test": "data"} - result_path = manager.write_schema(schema, "mysql", "8.0") + result_path = manager.write_engine_schema( + schema, "mysql", "v8.0", "snapshot/stored" + ) # Check that directories were created - assert (output_dir / "mysql" / "8.0").exists() + assert (output_dir / "mysql" / "v8.0" / "snapshot").exists() assert result_path.exists() - def test_write_schema_permission_error(self, output_manager): + def test_write_engine_schema_permission_error(self, output_manager): """Test handling of permission errors during schema writing.""" schema = {"test": "data"} with patch("builtins.open", side_effect=PermissionError("Access denied")): with pytest.raises(PermissionError, match="Failed to write schema"): - output_manager.write_schema(schema, "postgresql", "15.0") + output_manager.write_engine_schema( + schema, "postgresql", "v15.0", "tables" + ) - def test_get_spec_url_relative_path(self, output_manager): - """Test _get_spec_url with no base URL (relative path).""" - url = output_manager._get_spec_url("postgresql", "15.0") + def test_get_engine_schema_url_relative_path(self, output_manager): + """Test _get_engine_schema_url with no base URL (relative path).""" + url = output_manager._get_engine_schema_url("postgresql", "v15.0", "tables") - assert url == "postgresql/15.0/spec.json" + assert url == "postgresql/v15.0/tables.json" - def test_get_spec_url_with_base_url(self, output_manager): - """Test _get_spec_url with base URL.""" + def test_get_engine_schema_url_with_base_url(self, output_manager): + """Test _get_engine_schema_url with base URL.""" base_url = "https://api.example.com/schemas" - url = output_manager._get_spec_url("postgresql", "15.0", base_url) + url = output_manager._get_engine_schema_url( + "postgresql", "v15.0", "tables", base_url + ) - assert url == "https://api.example.com/schemas/postgresql/15.0/spec.json" + assert url == "https://api.example.com/schemas/postgresql/v15.0/tables.json" - def test_get_spec_url_with_trailing_slash_base_url(self, output_manager): - """Test _get_spec_url strips trailing slash from base URL.""" + def test_get_engine_schema_url_with_trailing_slash_base_url(self, output_manager): + """Test _get_engine_schema_url strips trailing slash from base URL.""" base_url = "https://api.example.com/schemas/" - url = output_manager._get_spec_url("postgresql", "15.0", base_url) + url = output_manager._get_engine_schema_url( + "postgresql", "v15.0", "tables", base_url + ) - assert url == "https://api.example.com/schemas/postgresql/15.0/spec.json" + assert url == "https://api.example.com/schemas/postgresql/v15.0/tables.json" - def test_get_spec_url_engine_lowercase(self, output_manager): - """Test _get_spec_url converts engine to lowercase.""" - url = output_manager._get_spec_url("PostgreSQL", "15.0") + def test_get_engine_schema_url_engine_lowercase(self, output_manager): + """Test _get_engine_schema_url converts engine to lowercase.""" + url = output_manager._get_engine_schema_url("PostgreSQL", "v15.0", "tables") - assert url == "postgresql/15.0/spec.json" + assert url == "postgresql/v15.0/tables.json" def test_generate_engine_map_empty_directory(self, tmp_path): """Test _generate_engine_map with empty output directory.""" @@ -193,30 +214,48 @@ def test_generate_engine_map_multiple_engines(self, output_manager): """Test _generate_engine_map with multiple engines and versions.""" engine_map = output_manager._generate_engine_map() - expected = { - "postgresql": { - "15.0": "postgresql/15.0/spec.json", - "16.0": "postgresql/16.0/spec.json", - }, - "mysql": {"8.0": "mysql/8.0/spec.json"}, - } + # Check structure - now nested with schema types + assert "postgresql" in engine_map + assert "mysql" in engine_map - assert engine_map == expected + # Check PostgreSQL versions + assert "v15.0" in engine_map["postgresql"] + assert "v16.0" in engine_map["postgresql"] + + # Check schema types within version + assert "tables" in engine_map["postgresql"]["v15.0"] + assert "snapshot" in engine_map["postgresql"]["v15.0"] + assert "stored" in engine_map["postgresql"]["v15.0"]["snapshot"] + assert "working" in engine_map["postgresql"]["v15.0"]["snapshot"] + + # Check URLs + assert ( + engine_map["postgresql"]["v15.0"]["tables"] + == "postgresql/v15.0/tables.json" + ) + assert ( + engine_map["postgresql"]["v15.0"]["snapshot"]["stored"] + == "postgresql/v15.0/snapshot/stored.json" + ) + assert ( + engine_map["postgresql"]["v15.0"]["snapshot"]["working"] + == "postgresql/v15.0/snapshot/working.json" + ) def test_generate_engine_map_with_base_url(self, output_manager): """Test _generate_engine_map with base URL.""" base_url = "https://api.example.com/schemas" engine_map = output_manager._generate_engine_map(base_url) - expected = { - "postgresql": { - "15.0": "https://api.example.com/schemas/postgresql/15.0/spec.json", - "16.0": "https://api.example.com/schemas/postgresql/16.0/spec.json", - }, - "mysql": {"8.0": "https://api.example.com/schemas/mysql/8.0/spec.json"}, - } - - assert engine_map == expected + # Check URLs include base URL + assert ( + engine_map["postgresql"]["v15.0"]["tables"] + == "https://api.example.com/schemas/postgresql/v15.0/tables.json" + ) + assert ( + engine_map["postgresql"]["v15.0"]["snapshot"]["stored"] + == "https://api.example.com/schemas/postgresql/v15.0/snapshot/stored.json" + ) def test_generate_engine_map_ignores_files_in_engine_dir(self, temp_output_dir): """Test _generate_engine_map ignores files in engine directories.""" @@ -229,24 +268,24 @@ def test_generate_engine_map_ignores_files_in_engine_dir(self, temp_output_dir): # Should still have the version directories, but ignore the file assert "postgresql" in engine_map - assert "15.0" in engine_map["postgresql"] - assert "16.0" in engine_map["postgresql"] + assert "v15.0" in engine_map["postgresql"] + assert "v16.0" in engine_map["postgresql"] - def test_generate_engine_map_ignores_version_dirs_without_spec( + def test_generate_engine_map_ignores_version_dirs_without_tables( self, temp_output_dir ): - """Test _generate_engine_map ignores version directories without spec.json.""" - # Create a version directory without spec.json - empty_version_dir = temp_output_dir / "postgresql" / "17.0" + """Test _generate_engine_map ignores version directories without tables.json.""" + # Create a version directory without tables.json + empty_version_dir = temp_output_dir / "postgresql" / "v17.0" empty_version_dir.mkdir() manager = OutputManager(temp_output_dir) engine_map = manager._generate_engine_map() # Should not include the empty version directory - assert "17.0" not in engine_map["postgresql"] - assert "15.0" in engine_map["postgresql"] - assert "16.0" in engine_map["postgresql"] + assert "v17.0" not in engine_map["postgresql"] + assert "v15.0" in engine_map["postgresql"] + assert "v16.0" in engine_map["postgresql"] def test_generate_engine_map_ignores_config_directory(self, temp_output_dir): """Test _generate_engine_map ignores the config directory.""" @@ -274,16 +313,20 @@ def test_write_schema_map_success(self, output_manager): with open(result_path, "r") as f: written_map = json.load(f) - # Check structure + # Check structure - now config is directly a map of engine -> URL assert "project" in written_map assert "engines" in written_map assert "config" in written_map["project"] - assert "base" in written_map["project"]["config"] - assert "engines" in written_map["project"]["config"] - # Check engine configs - assert "postgresql" in written_map["project"]["config"]["engines"] - assert "mysql" in written_map["project"]["config"]["engines"] + # Check engine configs are directly under config (not config.engines) + assert "postgresql" in written_map["project"]["config"] + assert "mysql" in written_map["project"]["config"] + + # Verify the config URLs point to config/{engine}.json + assert ( + written_map["project"]["config"]["postgresql"] == "config/postgresql.json" + ) + assert written_map["project"]["config"]["mysql"] == "config/mysql.json" # Check engine specs assert "postgresql" in written_map["engines"] @@ -300,12 +343,20 @@ def test_write_schema_map_with_base_url(self, output_manager): # All URLs should include the base URL assert written_map["project"]["manifest"].startswith(base_url) - assert written_map["project"]["config"]["base"].startswith(base_url) - for url in written_map["project"]["config"]["engines"].values(): + + # Config URLs are now directly under project.config + for url in written_map["project"]["config"].values(): assert url.startswith(base_url) + + # Check engine schema URLs (now nested structure) for engine_versions in written_map["engines"].values(): - for url in engine_versions.values(): - assert url.startswith(base_url) + for version_schemas in engine_versions.values(): + # tables URL + assert version_schemas["tables"].startswith(base_url) + # snapshot URLs + if "snapshot" in version_schemas: + for snapshot_url in version_schemas["snapshot"].values(): + assert snapshot_url.startswith(base_url) def test_write_schema_map_creates_output_directory(self, tmp_path): """Test that write_schema_map creates output directory if it doesn't exist.""" @@ -344,10 +395,24 @@ def test_integration_with_schema_generation(self, tmp_path): output_dir = tmp_path / "integration_test" manager = OutputManager(output_dir) - # Write some test schemas - test_schema = {"test": "data"} - manager.write_schema(test_schema, "postgresql", "15.0") - manager.write_schema(test_schema, "mysql", "8.0") + # Write some test schemas using the new API + test_tables = {"title": "Test Tables", "type": "array"} + test_stored = {"title": "Test Stored", "type": "object"} + test_working = {"title": "Test Working", "type": "object"} + + # PostgreSQL v15.0 + manager.write_engine_schema(test_tables, "postgresql", "v15.0", "tables") + manager.write_engine_schema( + test_stored, "postgresql", "v15.0", "snapshot/stored" + ) + manager.write_engine_schema( + test_working, "postgresql", "v15.0", "snapshot/working" + ) + + # MySQL v8.0 + manager.write_engine_schema(test_tables, "mysql", "v8.0", "tables") + manager.write_engine_schema(test_stored, "mysql", "v8.0", "snapshot/stored") + manager.write_engine_schema(test_working, "mysql", "v8.0", "snapshot/working") # Generate schema map smap_path = manager.write_schema_map( @@ -360,11 +425,144 @@ def test_integration_with_schema_generation(self, tmp_path): assert "postgresql" in schema_map["engines"] assert "mysql" in schema_map["engines"] + + # Check nested structure + assert "v15.0" in schema_map["engines"]["postgresql"] + assert "tables" in schema_map["engines"]["postgresql"]["v15.0"] + assert "snapshot" in schema_map["engines"]["postgresql"]["v15.0"] + assert ( - schema_map["engines"]["postgresql"]["15.0"] - == "https://example.com/postgresql/15.0/spec.json" + schema_map["engines"]["postgresql"]["v15.0"]["tables"] + == "https://example.com/postgresql/v15.0/tables.json" ) assert ( - schema_map["engines"]["mysql"]["8.0"] - == "https://example.com/mysql/8.0/spec.json" + schema_map["engines"]["postgresql"]["v15.0"]["snapshot"]["stored"] + == "https://example.com/postgresql/v15.0/snapshot/stored.json" ) + assert ( + schema_map["engines"]["mysql"]["v8.0"]["tables"] + == "https://example.com/mysql/v8.0/tables.json" + ) + + +class TestWriteResolvedEngineConfig: + """Tests for the write_resolved_engine_config method.""" + + def test_write_resolved_engine_config_success(self, tmp_path): + """Test successful resolved engine config writing.""" + docs_dir = tmp_path / "docs" + output_dir = tmp_path / "output" + (docs_dir / "schemas" / "project" / "config" / "engines").mkdir(parents=True) + + # Create engine-specific schema + engine_schema = { + "$schema": "http://json-schema.org/draft-07/schema#", + "$defs": {"envs": {"type": "object", "description": "Environment configs"}}, + } + with open( + docs_dir / "schemas" / "project" / "config" / "engines" / "postgresql.json", + "w", + ) as f: + json.dump(engine_schema, f) + + # Create base config schema with reference to engine schema + base_schema = { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Base Project Configuration", + "type": "object", + "$defs": { + "schemaDefinition": { + "properties": { + "envs": {"$ref": "engines/postgresql.json#/$defs/envs"} + } + } + }, + "properties": { + "schemas": { + "additionalProperties": {"$ref": "#/$defs/schemaDefinition"} + } + }, + } + with open(docs_dir / "schemas" / "project" / "config" / "base.json", "w") as f: + json.dump(base_schema, f) + + manager = OutputManager(output_dir, docs_dir) + result_path = manager.write_resolved_engine_config( + "PostgreSQL", + "schemas/project/config/base.json", + "https://example.com", + ) + + # Check that file was created at correct path + assert result_path.exists() + assert result_path == output_dir / "config" / "postgresql.json" + + # Check file contents + with open(result_path, "r") as f: + written_schema = json.load(f) + + # Verify $id was injected + assert written_schema["$id"] == "https://example.com/config/postgresql.json" + + # Verify title was updated + assert "PostgreSQL" in written_schema["title"] + + # Verify the reference was resolved (envs should be inlined) + schema_def = written_schema["$defs"]["schemaDefinition"] + assert schema_def["properties"]["envs"]["type"] == "object" + assert schema_def["properties"]["envs"]["description"] == "Environment configs" + + def test_write_resolved_engine_config_output_path(self, tmp_path): + """Test that config is written to config/{engine}.json.""" + docs_dir = tmp_path / "docs" + output_dir = tmp_path / "output" + (docs_dir / "schemas" / "project" / "config").mkdir(parents=True) + + # Create minimal base config schema + base_schema = { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Base Config", + "type": "object", + } + with open(docs_dir / "schemas" / "project" / "config" / "base.json", "w") as f: + json.dump(base_schema, f) + + manager = OutputManager(output_dir, docs_dir) + result_path = manager.write_resolved_engine_config( + "MySQL", + "schemas/project/config/base.json", + ) + + # Verify output path follows config/{engine}.json pattern + assert result_path == output_dir / "config" / "mysql.json" + assert result_path.exists() + + def test_write_resolved_engine_config_preserves_schema_order(self, tmp_path): + """Test that $schema and $id are ordered correctly.""" + docs_dir = tmp_path / "docs" + output_dir = tmp_path / "output" + (docs_dir / "schemas" / "project" / "config").mkdir(parents=True) + + base_schema = { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Base Config", + "type": "object", + } + with open(docs_dir / "schemas" / "project" / "config" / "base.json", "w") as f: + json.dump(base_schema, f) + + manager = OutputManager(output_dir, docs_dir) + result_path = manager.write_resolved_engine_config( + "PostgreSQL", + "schemas/project/config/base.json", + "https://example.com", + ) + + # Read raw content to check key ordering + with open(result_path, "r") as f: + content = f.read() + + # $schema should come before $id + schema_pos = content.find('"$schema"') + id_pos = content.find('"$id"') + assert schema_pos < id_pos diff --git a/tests/test_resolver.py b/tests/test_resolver.py index bf9a3e1..cef1693 100644 --- a/tests/test_resolver.py +++ b/tests/test_resolver.py @@ -1,30 +1,368 @@ +"""Tests for the JSON reference resolver.""" + +import json +import tempfile +from pathlib import Path + +import pytest + +from database_schema_spec.core.exceptions import ( + CircularReferenceError, + ReferenceResolutionError, +) from database_schema_spec.resolution.resolver import JSONRefResolver -def test_circular_reference_detection(): - resolver = JSONRefResolver() - resolver.resolution_stack = ["a.json", "b.json"] - assert resolver.detect_circular_reference("a.json") - assert not resolver.detect_circular_reference("c.json") +class TestCircularReferenceDetection: + """Tests for circular reference detection.""" + + def test_detect_circular_reference_in_stack(self): + """Test detection of circular reference when ref is in stack.""" + resolver = JSONRefResolver() + resolver.resolution_stack = ["a.json", "b.json"] + assert resolver.detect_circular_reference("a.json") + + def test_detect_no_circular_reference(self): + """Test no circular reference when ref is not in stack.""" + resolver = JSONRefResolver() + resolver.resolution_stack = ["a.json", "b.json"] + assert not resolver.detect_circular_reference("c.json") + + +class TestResolveReferencesBasic: + """Tests for basic reference resolution.""" + + def test_resolve_references_no_ref(self): + """Test schema without $ref returns unchanged.""" + resolver = JSONRefResolver() + schema = {"properties": {"foo": {"type": "string"}}} + result = resolver.resolve_references(schema) + assert result == schema + + def test_resolve_references_with_external_ref(self, monkeypatch): + """Test resolving external file reference.""" + resolver = JSONRefResolver() + schema = {"$ref": "other.json", "extra": 1} + referenced = {"properties": {"bar": {"type": "number"}}} + + def fake_load_referenced_file(ref_path, current_file=None): + assert ref_path == "other.json" + return referenced + + resolver.load_referenced_file = fake_load_referenced_file # type: ignore[method-assign] + result = resolver.resolve_references(schema) + assert "bar" in result["properties"] + assert result["extra"] == 1 + + +class TestParseRef: + """Tests for $ref parsing into file path and JSON pointer.""" + + def test_parse_ref_external_file_only(self): + """Test parsing external file reference without pointer.""" + resolver = JSONRefResolver() + file_path, pointer = resolver._parse_ref("other.json") + assert file_path == "other.json" + assert pointer is None + + def test_parse_ref_local_pointer_only(self): + """Test parsing local pointer reference.""" + resolver = JSONRefResolver() + file_path, pointer = resolver._parse_ref("#/$defs/envs") + assert file_path is None + assert pointer == "/$defs/envs" + + def test_parse_ref_external_with_pointer(self): + """Test parsing external file reference with JSON pointer.""" + resolver = JSONRefResolver() + file_path, pointer = resolver._parse_ref("engines/postgresql.json#/$defs/envs") + assert file_path == "engines/postgresql.json" + assert pointer == "/$defs/envs" + + def test_parse_ref_empty_pointer(self): + """Test parsing reference with empty pointer after #.""" + resolver = JSONRefResolver() + file_path, pointer = resolver._parse_ref("other.json#") + assert file_path == "other.json" + assert pointer is None + + def test_parse_ref_root_pointer(self): + """Test parsing reference with root pointer.""" + resolver = JSONRefResolver() + file_path, pointer = resolver._parse_ref("other.json#/") + assert file_path == "other.json" + assert pointer == "/" + + +class TestResolveJsonPointer: + """Tests for JSON pointer resolution within documents.""" + + def test_resolve_simple_pointer(self): + """Test resolving a simple JSON pointer.""" + resolver = JSONRefResolver() + document = { + "$defs": {"envs": {"type": "object", "description": "Environment configs"}} + } + result = resolver._resolve_json_pointer(document, "/$defs/envs", "test") + assert result == {"type": "object", "description": "Environment configs"} + + def test_resolve_nested_pointer(self): + """Test resolving a deeply nested JSON pointer.""" + resolver = JSONRefResolver() + document = {"level1": {"level2": {"level3": {"value": "deep"}}}} + result = resolver._resolve_json_pointer( + document, "/level1/level2/level3", "test" + ) + assert result == {"value": "deep"} + + def test_resolve_pointer_with_escaped_chars(self): + """Test resolving pointer with escaped characters (~0 and ~1).""" + resolver = JSONRefResolver() + # ~1 decodes to /, ~0 decodes to ~ + document = { + "a/b": {"type": "string"}, + "a~b": {"type": "number"}, + } + result1 = resolver._resolve_json_pointer(document, "/a~1b", "test") + assert result1 == {"type": "string"} + + result2 = resolver._resolve_json_pointer(document, "/a~0b", "test") + assert result2 == {"type": "number"} + + def test_resolve_pointer_empty_returns_document(self): + """Test that empty pointer returns the whole document.""" + resolver = JSONRefResolver() + document = {"foo": {"bar": "baz"}} + result = resolver._resolve_json_pointer(document, "", "test") + assert result == document + + def test_resolve_pointer_root_returns_document(self): + """Test that root pointer (/) returns the whole document.""" + resolver = JSONRefResolver() + document = {"foo": {"bar": "baz"}} + result = resolver._resolve_json_pointer(document, "/", "test") + assert result == document + + def test_resolve_pointer_key_not_found(self): + """Test error when JSON pointer key doesn't exist.""" + resolver = JSONRefResolver() + document = {"$defs": {"envs": {}}} + with pytest.raises(ReferenceResolutionError) as exc_info: + resolver._resolve_json_pointer(document, "/$defs/nonexistent", "test") + assert "not found" in str(exc_info.value) + + def test_resolve_pointer_non_object_result(self): + """Test error when pointer resolves to non-object.""" + resolver = JSONRefResolver() + document = {"$defs": {"value": "string_value"}} + with pytest.raises(ReferenceResolutionError) as exc_info: + resolver._resolve_json_pointer(document, "/$defs/value", "test") + assert "non-object" in str(exc_info.value) + + def test_resolve_pointer_array_index(self): + """Test resolving pointer through array index.""" + resolver = JSONRefResolver() + document = { + "items": [ + {"name": "first"}, + {"name": "second"}, + ] + } + result = resolver._resolve_json_pointer(document, "/items/1", "test") + assert result == {"name": "second"} + + def test_resolve_pointer_invalid_array_index(self): + """Test error with invalid array index.""" + resolver = JSONRefResolver() + document = {"items": [{"name": "first"}]} + with pytest.raises(ReferenceResolutionError) as exc_info: + resolver._resolve_json_pointer(document, "/items/invalid", "test") + assert "invalid array index" in str(exc_info.value) + + +class TestResolveLocalPointer: + """Tests for local pointer resolution (#/$defs/...).""" + + def test_resolve_local_pointer_no_current_file(self): + """Test error when resolving local pointer without current file.""" + resolver = JSONRefResolver() + with pytest.raises(ReferenceResolutionError) as exc_info: + resolver._resolve_local_pointer("/$defs/test", None, "#/$defs/test") + assert "without current file context" in str(exc_info.value) + + +class TestExternalFileWithPointer: + """Tests for external file references with JSON pointers.""" + + def test_resolve_external_file_with_pointer(self): + """Test resolving external file reference with JSON pointer.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + # Create the external file with $defs + external_schema = { + "$schema": "http://json-schema.org/draft-07/schema#", + "$defs": { + "envs": { + "type": "object", + "description": "Environment configurations", + "properties": {"host": {"type": "string"}}, + } + }, + } + (temp_path / "engines").mkdir() + with open(temp_path / "engines" / "postgresql.json", "w") as f: + json.dump(external_schema, f) + + # Create the main file that references the external file + main_schema = { + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": {"envs": {"$ref": "engines/postgresql.json#/$defs/envs"}}, + } + with open(temp_path / "base.json", "w") as f: + json.dump(main_schema, f) + + # Resolve + resolver = JSONRefResolver(temp_path) + result = resolver.resolve_file("base.json") + + # Verify the reference was resolved + assert "properties" in result + assert "envs" in result["properties"] + assert result["properties"]["envs"]["type"] == "object" + assert ( + result["properties"]["envs"]["description"] + == "Environment configurations" + ) + + +class TestLocalPointerResolution: + """Tests for local pointer resolution within the same file.""" + + def test_resolve_local_pointer_in_file(self): + """Test resolving local $ref pointer within the same file.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + # Create a file with local $ref + schema = { + "$schema": "http://json-schema.org/draft-07/schema#", + "$defs": {"stringType": {"type": "string", "minLength": 1}}, + "properties": {"name": {"$ref": "#/$defs/stringType"}}, + } + with open(temp_path / "schema.json", "w") as f: + json.dump(schema, f) + + # Resolve + resolver = JSONRefResolver(temp_path) + result = resolver.resolve_file("schema.json") + + # Verify local reference was resolved + assert result["properties"]["name"]["type"] == "string" + assert result["properties"]["name"]["minLength"] == 1 + + +class TestFileCaching: + """Tests for file caching functionality.""" + + def test_file_caching(self): + """Test that files are cached and not re-read.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + schema = {"type": "object"} + with open(temp_path / "cached.json", "w") as f: + json.dump(schema, f) + + resolver = JSONRefResolver(temp_path) + + # First load + result1 = resolver._load_file_cached("cached.json") + + # Modify the file + with open(temp_path / "cached.json", "w") as f: + json.dump({"type": "string"}, f) + + # Second load should return cached version + result2 = resolver._load_file_cached("cached.json") + + assert result1 == result2 + assert result1["type"] == "object" # Original value, not modified + + +class TestCircularReferenceError: + """Tests for circular reference error handling.""" + + def test_circular_reference_local_pointer(self): + """Test circular reference detection with local pointers.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + # Create a file with circular local reference + schema = { + "$defs": {"a": {"$ref": "#/$defs/b"}, "b": {"$ref": "#/$defs/a"}}, + "properties": {"test": {"$ref": "#/$defs/a"}}, + } + with open(temp_path / "circular.json", "w") as f: + json.dump(schema, f) + + resolver = JSONRefResolver(temp_path) + with pytest.raises(CircularReferenceError): + resolver.resolve_file("circular.json") + +class TestComplexSchemaResolution: + """Tests for complex schema resolution scenarios.""" -def test_resolve_references_no_ref(): - resolver = JSONRefResolver() - schema = {"properties": {"foo": {"type": "string"}}} - result = resolver.resolve_references(schema) - assert result == schema + def test_resolve_nested_external_and_local_refs(self): + """Test resolving schema with both external and local references.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + (temp_path / "components").mkdir() + # Create component file + component = { + "$defs": { + "address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"type": "string"}, + }, + } + } + } + with open(temp_path / "components" / "address.json", "w") as f: + json.dump(component, f) -def test_resolve_references_with_ref(monkeypatch): - resolver = JSONRefResolver() - schema = {"$ref": "other.json", "extra": 1} - referenced = {"properties": {"bar": {"type": "number"}}} + # Create main file with mixed references + main_schema = { + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "address": { + "$ref": "components/address.json#/$defs/address" + }, + }, + } + }, + "properties": {"owner": {"$ref": "#/$defs/person"}}, + } + with open(temp_path / "main.json", "w") as f: + json.dump(main_schema, f) - def fake_load_referenced_file(ref_path, current_file=None): - assert ref_path == "other.json" - return referenced + # Resolve + resolver = JSONRefResolver(temp_path) + result = resolver.resolve_file("main.json") - resolver.load_referenced_file = fake_load_referenced_file # type: ignore[method-assign] - result = resolver.resolve_references(schema) - assert "bar" in result["properties"] - assert result["extra"] == 1 + # Verify nested resolution + owner = result["properties"]["owner"] + assert owner["type"] == "object" + assert owner["properties"]["name"]["type"] == "string" + assert owner["properties"]["address"]["type"] == "object" + assert ( + owner["properties"]["address"]["properties"]["street"]["type"] + == "string" + )