Skip to content
This repository was archived by the owner on Feb 6, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions graphdoc/graphdoc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
setup_logging,
)
from graphdoc.eval import DocGeneratorEvaluator
from graphdoc.main import GraphDoc
from graphdoc.modules import DocGeneratorModule
from graphdoc.prompts import (
BadDocGeneratorSignature,
Expand All @@ -45,7 +44,6 @@
)

__all__ = [
"GraphDoc",
"DocGeneratorModule",
"DocGeneratorEvaluator",
"DocGeneratorTrainer",
Expand Down
149 changes: 148 additions & 1 deletion graphdoc/graphdoc/main.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# Copyright 2025-, Semiotic AI, Inc.
# SPDX-License-Identifier: Apache-2.0

# system packages
import argparse
import logging
import random

# system packages
import sys
from pathlib import Path
from typing import List, Literal, Optional, Union

Expand Down Expand Up @@ -633,3 +636,147 @@ def doc_generator_eval_from_yaml(
evaluator_prediction_field=evaluator_prediction_field,
readable_value=readable_value,
)


#######################
# Main Entry Point #
#######################
"""Run GraphDoc as a command-line application.

This module can be run directly to train models, generate documentation,
or evaluate documentation quality.

Usage:
python -m graphdoc.main --config CONFIG_FILE [--log-level LEVEL] COMMAND [ARGS]

Global Arguments:
--config PATH Path to YAML configuration file with GraphDoc
and language model settings
--log-level LEVEL Set logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
Default: INFO

Commands:
train Train a prompt using a dataset
--trainer-config PATH Path to trainer YAML configuration

generate Generate documentation for schema files
--module-config PATH Path to module YAML configuration
--input PATH Path to input schema file or directory
--output PATH Path to output file

evaluate Evaluate documentation quality
--eval-config PATH Path to evaluator YAML configuration

Examples:
# Train a documentation quality model
python -m graphdoc.main \
--config config.yaml \
train \
--trainer-config trainer_config.yaml

# Generate documentation for schemas
python -m graphdoc.main \
--config config.yaml \
generate \
--module-config module_config.yaml \
--input schema.graphql \
--output documented_schema.graphql

# Evaluate documentation quality
python -m graphdoc.main \
--config config.yaml \
evaluate \
--eval-config eval_config.yaml

Configuration:
See example YAML files in the documentation for format details.
""" # noqa: B950
if __name__ == "__main__":

parser = argparse.ArgumentParser(description="GraphDoc - Documentation Generator")
parser.add_argument("--config", type=str, help="Path to YAML configuration file")
parser.add_argument(
"--log-level",
type=str,
default="INFO",
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="Logging level",
)
subparsers = parser.add_subparsers(dest="command", help="Command to execute")

###################
# train #
###################
train_parser = subparsers.add_parser("train", help="Train a prompt")
train_parser.add_argument(
"--trainer-config",
type=str,
required=True,
help="Path to trainer YAML configuration",
)

###################
# generate #
###################
generate_parser = subparsers.add_parser("generate", help="Generate documentation")
generate_parser.add_argument(
"--module-config",
type=str,
required=True,
help="Path to module YAML configuration",
)
generate_parser.add_argument(
"--input", type=str, required=True, help="Path to input schema file"
)
generate_parser.add_argument(
"--output", type=str, required=True, help="Path to output schema file"
)

###################
# evaluate #
###################
eval_parser = subparsers.add_parser(
"evaluate", help="Evaluate documentation quality"
)
eval_parser.add_argument(
"--eval-config",
type=str,
required=True,
help="Path to evaluator YAML configuration",
)

args = parser.parse_args()
if not args.config:
parser.print_help()
sys.exit(1)

graphdoc = GraphDoc.from_yaml(args.config)

if args.command == "train":
trainer = graphdoc.single_trainer_from_yaml(args.trainer_config)
trained_prompt = trainer.train()
print(
f"Training complete. Saved to MLflow with name: {trainer.mlflow_model_name}"
)

elif args.command == "generate":
module = graphdoc.doc_generator_module_from_yaml(args.module_config)

with open(args.input, "r") as f:
schema = f.read()

documented_schema = module.document_full_schema(schema)

with open(args.output, "w") as f:
f.write(documented_schema.documented_schema)
print(f"Generation complete. Documentation saved to {args.output}")

elif args.command == "evaluate":
evaluator = graphdoc.doc_generator_eval_from_yaml(args.eval_config)
results = evaluator.evaluate()
print(
"Evaluation complete. Results saved to MLflow experiment: "
f"{evaluator.mlflow_experiment_name}"
)
else:
parser.print_help()
75 changes: 39 additions & 36 deletions graphdoc/graphdoc/modules/doc_generator_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,45 @@ def __init__(
)
self.prompt.prompt_metric.prompt_metric = "rating"

#######################
# MLFLOW TRACING #
#######################
# TODO: we will break this out into a separate class later
# when we have need for it elsewhere
def _start_trace(
self,
client: mlflow.MlflowClient,
expirement_name: str,
trace_name: str,
inputs: dict,
attributes: dict,
):
# set the experiment name so that everything is logged to the same experiment
mlflow.set_experiment(expirement_name)

# start the trace
trace = client.start_trace(
name=trace_name,
inputs=inputs,
attributes=attributes,
# experiment_id=expirement_name,
)

return trace

def _end_trace(
self,
client: mlflow.MlflowClient,
trace: Any, # TODO: trace: mlflow.Span,
# E AttributeError: module 'mlflow' has no attribute 'Span'
outputs: dict,
status: Literal["OK", "ERROR"],
):
client.end_trace(request_id=trace.request_id, outputs=outputs, status=status)

#######################
# MODULE FUNCTIONS #
#######################
def _retry_by_rating(self, database_schema: str) -> str:
"""Retry the generation if the quality check fails. Rating threshold is
determined at initialization.
Expand Down Expand Up @@ -211,42 +250,6 @@ def forward(self, database_schema: str) -> dspy.Prediction:
else:
return self._predict(database_schema=database_schema)

#######################
# MLFLOW TRACING #
#######################
# TODO: we will break this out into a separate class later
# when we have need for it elsewhere
def _start_trace(
self,
client: mlflow.MlflowClient,
expirement_name: str,
trace_name: str,
inputs: dict,
attributes: dict,
):
# set the experiment name so that everything is logged to the same experiment
mlflow.set_experiment(expirement_name)

# start the trace
trace = client.start_trace(
name=trace_name,
inputs=inputs,
attributes=attributes,
# experiment_id=expirement_name,
)

return trace

def _end_trace(
self,
client: mlflow.MlflowClient,
trace: Any, # TODO: trace: mlflow.Span,
# E AttributeError: module 'mlflow' has no attribute 'Span'
outputs: dict,
status: Literal["OK", "ERROR"],
):
client.end_trace(request_id=trace.request_id, outputs=outputs, status=status)

def document_full_schema(
self,
database_schema: str,
Expand Down
2 changes: 1 addition & 1 deletion graphdoc/runners/eval/eval_doc_generator_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from dotenv import load_dotenv

# internal packages
from graphdoc import GraphDoc
from graphdoc.main import GraphDoc

# logging
log = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion graphdoc/runners/train/single_prompt_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import mlflow
from dotenv import load_dotenv

from graphdoc import GraphDoc, load_yaml_config
from graphdoc.main import GraphDoc, load_yaml_config

# logging
log = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion graphdoc/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
from graphdoc import (
DocGeneratorPrompt,
DocQualityPrompt,
GraphDoc,
LocalDataHelper,
Parser,
)
from graphdoc.main import GraphDoc

# logging
log = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion graphdoc/tests/test_confest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
from graphdoc import (
DocGeneratorPrompt,
DocQualityPrompt,
GraphDoc,
LocalDataHelper,
Parser,
)
from graphdoc.main import GraphDoc

from .conftest import (
OverwriteSchemaCategory,
Expand Down
2 changes: 1 addition & 1 deletion graphdoc/tests/test_graphdoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
DocGeneratorTrainer,
DocQualityPrompt,
DocQualityTrainer,
GraphDoc,
SinglePromptTrainer,
load_yaml_config,
)
from graphdoc.main import GraphDoc

# logging
log = logging.getLogger(__name__)
Expand Down